WARNING: THIS SITE IS A MIRROR OF GITHUB.COM / IT CANNOT LOGIN OR REGISTER ACCOUNTS / THE CONTENTS ARE PROVIDED AS-IS / THIS SITE ASSUMES NO RESPONSIBILITY FOR ANY DISPLAYED CONTENT OR LINKS / IF YOU FOUND SOMETHING MAY NOT GOOD FOR EVERYONE, CONTACT ADMIN AT ilovescratch@foxmail.com
Skip to content

Commit d88fc3b

Browse files
dsharletgxnnpack-bot
authored andcommitted
Clean up SIMD headers and tests
- Be more precise/specific about the mapping from architecture to header - Add more test coverage by giving up on using gtest to instantiate the tests - Fix bugs that motivated these changes: AVX512 s32 multiply was wrong, sse41 intrinsics were enabled from sse2 headers - Refactor arm_neon_xf16.h to be unnecessary, multi_vec.h can do this `extract` now. PiperOrigin-RevId: 840525070
1 parent 149835f commit d88fc3b

30 files changed

+823
-601
lines changed

ynnpack/base/simd/BUILD

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,12 @@ cc_library(
1818
"//ynnpack:x86": [
1919
"x86_avx.h",
2020
"x86_avx2.h",
21-
"x86_avx512.h",
22-
"x86_sse.h",
21+
"x86_avx512bw.h",
22+
"x86_avx512f.h",
23+
"x86_sse2.h",
2324
"x86_sse41.h",
2425
],
25-
"//ynnpack:arm": ["arm.h"],
26+
"//ynnpack:arm": ["arm_neon.h"],
2627
"//conditions:default": [],
2728
}),
2829
compatible_with = _COMPATIBLE_WITH,
File renamed without changes.

ynnpack/base/simd/multi_vec.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,12 @@ YNN_ALWAYS_INLINE void store(typename Vec::value_type* ptr,
124124
}
125125
}
126126

127+
template <int Index, typename Vec>
128+
Vec extract(Vec x, Vec) {
129+
static_assert(Index == 0, "");
130+
return x;
131+
}
132+
127133
template <int Index, typename Vec, typename ResultVec, size_t M>
128134
ResultVec extract(multi_vec<Vec, M> b, ResultVec) {
129135
static_assert(Index * ResultVec::N < M * Vec::N);

ynnpack/base/simd/test/BUILD

Lines changed: 8 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -11,27 +11,16 @@ package(default_visibility = ["//ynnpack:__subpackages__"])
1111
ynn_cc_library(
1212
name = "tests",
1313
testonly = True,
14+
hdrs = ["generic.h"],
1415
per_arch_srcs = {
1516
# This test defines different tests depending on which architecture is being compiled.
16-
"x86_sse2": [
17-
"test.cc",
18-
"convert_only_test.cc",
19-
],
20-
"x86_sse41": ["test.cc"],
21-
"x86_avx": ["test.cc"],
22-
"x86_avx2": [
23-
"test.cc",
24-
"convert_only_test.cc",
25-
],
26-
"x86_f16c": ["convert_only_test.cc"],
27-
"x86_avx512bf16": ["convert_only_test.cc"],
28-
"x86_avx512fp16": ["convert_only_test.cc"],
29-
"x86_avx512f": ["test.cc"],
30-
"x86_avx512bw": ["test.cc"],
31-
"arm_neon": [
32-
"convert_only_test.cc",
33-
"test.cc",
34-
],
17+
"x86_sse2": ["x86_sse2.cc"],
18+
"x86_sse41": ["x86_sse41.cc"],
19+
"x86_avx": ["x86_avx.cc"],
20+
"x86_avx2": ["x86_avx2.cc"],
21+
"x86_avx512f": ["x86_avx512f.cc"],
22+
"x86_avx512bw": ["x86_avx512bw.cc"],
23+
"arm_neon": ["arm_neon.cc"],
3524
},
3625
deps = [
3726
"//ynnpack/base", # buildcleaner:keep

ynnpack/base/simd/test/arm_neon.cc

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
// Copyright 2025 Google LLC
2+
//
3+
// This source code is licensed under the BSD-style license found in the
4+
// LICENSE file in the root directory of this source tree.
5+
6+
#include "ynnpack/base/simd/arm_neon.h"
7+
8+
#include "ynnpack/base/simd/test/generic.h"
9+
10+
namespace ynn {
11+
namespace simd {
12+
13+
TEST_LOAD_STORE(arm_neon, u8x16, /*arch_flags=*/0);
14+
TEST_LOAD_STORE(arm_neon, s8x16, /*arch_flags=*/0);
15+
TEST_LOAD_STORE(arm_neon, s16x8, /*arch_flags=*/0);
16+
TEST_LOAD_STORE(arm_neon, f16x8, /*arch_flags=*/0);
17+
TEST_LOAD_STORE(arm_neon, bf16x8, /*arch_flags=*/0);
18+
TEST_LOAD_STORE(arm_neon, f32x4, /*arch_flags=*/0);
19+
TEST_LOAD_STORE(arm_neon, s32x4, /*arch_flags=*/0);
20+
21+
TEST_ALIGNED_LOAD_STORE(arm_neon, u8x16, /*arch_flags=*/0);
22+
TEST_ALIGNED_LOAD_STORE(arm_neon, s8x16, /*arch_flags=*/0);
23+
TEST_ALIGNED_LOAD_STORE(arm_neon, s16x8, /*arch_flags=*/0);
24+
TEST_ALIGNED_LOAD_STORE(arm_neon, f16x8, /*arch_flags=*/0);
25+
TEST_ALIGNED_LOAD_STORE(arm_neon, bf16x8, /*arch_flags=*/0);
26+
TEST_ALIGNED_LOAD_STORE(arm_neon, f32x4, /*arch_flags=*/0);
27+
TEST_ALIGNED_LOAD_STORE(arm_neon, s32x4, /*arch_flags=*/0);
28+
29+
TEST_PARTIAL_LOAD_STORE(arm_neon, u8x16, /*arch_flags=*/0);
30+
TEST_PARTIAL_LOAD_STORE(arm_neon, s8x16, /*arch_flags=*/0);
31+
TEST_PARTIAL_LOAD_STORE(arm_neon, s16x8, /*arch_flags=*/0);
32+
TEST_PARTIAL_LOAD_STORE(arm_neon, f16x8, /*arch_flags=*/0);
33+
TEST_PARTIAL_LOAD_STORE(arm_neon, bf16x8, /*arch_flags=*/0);
34+
TEST_PARTIAL_LOAD_STORE(arm_neon, f32x4, /*arch_flags=*/0);
35+
TEST_PARTIAL_LOAD_STORE(arm_neon, s32x4, /*arch_flags=*/0);
36+
37+
TEST_ADD(arm_neon, u8x16, /*arch_flags=*/0);
38+
TEST_ADD(arm_neon, s8x16, /*arch_flags=*/0);
39+
TEST_ADD(arm_neon, f32x4, /*arch_flags=*/0);
40+
TEST_ADD(arm_neon, s32x4, /*arch_flags=*/0);
41+
42+
TEST_SUBTRACT(arm_neon, u8x16, /*arch_flags=*/0);
43+
TEST_SUBTRACT(arm_neon, s8x16, /*arch_flags=*/0);
44+
TEST_SUBTRACT(arm_neon, f32x4, /*arch_flags=*/0);
45+
TEST_SUBTRACT(arm_neon, s32x4, /*arch_flags=*/0);
46+
47+
TEST_MULTIPLY(arm_neon, u8x16, /*arch_flags=*/0);
48+
TEST_MULTIPLY(arm_neon, s8x16, /*arch_flags=*/0);
49+
TEST_MULTIPLY(arm_neon, f32x4, /*arch_flags=*/0);
50+
TEST_MULTIPLY(arm_neon, s32x4, /*arch_flags=*/0);
51+
52+
TEST_MIN(arm_neon, u8x16, /*arch_flags=*/0);
53+
TEST_MIN(arm_neon, s8x16, /*arch_flags=*/0);
54+
TEST_MIN(arm_neon, s16x8, /*arch_flags=*/0);
55+
TEST_MIN(arm_neon, f32x4, /*arch_flags=*/0);
56+
TEST_MIN(arm_neon, s32x4, /*arch_flags=*/0);
57+
58+
TEST_MAX(arm_neon, u8x16, /*arch_flags=*/0);
59+
TEST_MAX(arm_neon, s8x16, /*arch_flags=*/0);
60+
TEST_MAX(arm_neon, s16x8, /*arch_flags=*/0);
61+
TEST_MAX(arm_neon, f32x4, /*arch_flags=*/0);
62+
TEST_MAX(arm_neon, s32x4, /*arch_flags=*/0);
63+
64+
TEST_HORIZONTAL_MIN(arm_neon, u8x16, /*arch_flags=*/0);
65+
TEST_HORIZONTAL_MIN(arm_neon, s8x16, /*arch_flags=*/0);
66+
TEST_HORIZONTAL_MIN(arm_neon, s16x8, /*arch_flags=*/0);
67+
TEST_HORIZONTAL_MIN(arm_neon, f32x4, /*arch_flags=*/0);
68+
TEST_HORIZONTAL_MIN(arm_neon, s32x4, /*arch_flags=*/0);
69+
70+
TEST_HORIZONTAL_MAX(arm_neon, u8x16, /*arch_flags=*/0);
71+
TEST_HORIZONTAL_MAX(arm_neon, s8x16, /*arch_flags=*/0);
72+
TEST_HORIZONTAL_MAX(arm_neon, s16x8, /*arch_flags=*/0);
73+
TEST_HORIZONTAL_MAX(arm_neon, f32x4, /*arch_flags=*/0);
74+
TEST_HORIZONTAL_MAX(arm_neon, s32x4, /*arch_flags=*/0);
75+
76+
} // namespace simd
77+
} // namespace ynn

ynnpack/base/simd/test/convert_only_test.cc

Lines changed: 0 additions & 135 deletions
This file was deleted.

0 commit comments

Comments
 (0)