WARNING: THIS SITE IS A MIRROR OF GITHUB.COM / IT CANNOT LOGIN OR REGISTER ACCOUNTS / THE CONTENTS ARE PROVIDED AS-IS / THIS SITE ASSUMES NO RESPONSIBILITY FOR ANY DISPLAYED CONTENT OR LINKS / IF YOU FOUND SOMETHING MAY NOT GOOD FOR EVERYONE, CONTACT ADMIN AT ilovescratch@foxmail.com
Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions ynnpack/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,19 @@ selects.config_setting_group(
],
)

# TODO: This doesn't actually do anything yet, it's just a placeholder for a toolchain configuration.
config_setting(
name = "hexagon-clang",
define_values = {"ynn_hexagon_clang": "true"},
)

selects.config_setting_group(
name = "hexagon",
match_any = [
":hexagon-clang",
],
)

selects.config_setting_group(
name = "apple_clang",
match_any = [
Expand Down Expand Up @@ -252,3 +265,8 @@ define_build_option(
name = "ynn_enable_x86_amxint8",
default_all = [":ynn_enable_x86_amx"],
)

define_build_option(
name = "ynn_enable_hvx",
default_all = [":hexagon"],
)
3 changes: 3 additions & 0 deletions ynnpack/base/arch.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ uint64_t get_supported_arch_flags() {
if (cpuinfo_has_arm_sme2()) result |= arch_flag::sme2;
#endif // YNN_ARCH_ARM
#endif // YNN_ENABLE_CPUINFO
#ifdef YNN_ARCH_HEXAGON
result |= arch_flag::hvx;
#endif // YNN_ARCH_HEXAGON
return result;
}();
return flags;
Expand Down
3 changes: 3 additions & 0 deletions ynnpack/base/arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ enum {
sme = 1 << 6,
sme2 = 1 << 7,
#endif
#ifdef YNN_ARCH_HEXAGON
hvx = 1 << 0,
#endif
};

} // namespace arch_flag
Expand Down
4 changes: 4 additions & 0 deletions ynnpack/base/build_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@
#define YNN_ARCH_ARM
#endif

#if defined(__hexagon__)
#define YNN_ARCH_HEXAGON
#endif

// We want to use _Float16 if the compiler supports it fully, but it's
// tricky to do this detection; there are compiler versions that define the
// type in broken ways. We're only going to bother using it if the support is
Expand Down
14 changes: 14 additions & 0 deletions ynnpack/base/hexagon/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
load("@rules_cc//cc:cc_library.bzl", "cc_library")

licenses(["notice"])

cc_library(
name = "test_main",
testonly = True,
srcs = [
"test_main.cc",
],
visibility = ["//:__subpackages__"],
deps = ["@com_google_googletest//:gtest"],
alwayslink = True,
)
17 changes: 17 additions & 0 deletions ynnpack/base/hexagon/test_main.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// Copyright 2025 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#include <gtest/gtest.h>
#include <gmock/gmock.h>

int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);

// This calls InitGoogleTest as well. It consume any arguments it understands
// from argv.
testing::InitGoogleMock(&argc, argv);

return RUN_ALL_TESTS();
}
17 changes: 14 additions & 3 deletions ynnpack/build_defs.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,10 @@ _YNN_PARAMS_FOR_ARCH = {
"cond": "//ynnpack:ynn_enable_x86_amxint8",
"copts": ["-mamx-tile", "-mamx-int8"],
},
"hvx": {
"cond": "//ynnpack:ynn_enable_hvx",
"copts": ["-mhvx"],
},
}

def _map_copts_to_msvc(copts):
Expand Down Expand Up @@ -238,23 +242,30 @@ def ynn_kernel_copts(unroll_loops = True):

def ynn_binary_linkopts():
return select({
"//ynnpack:hexagon": [
"-shared",
"-Wno-unused-command-line-argument",
],
"//conditions:default": [],
})

def ynn_binary_malloc():
return select({
"//ynnpack:hexagon": "@bazel_tools//tools/cpp:malloc",
"//conditions:default": "@bazel_tools//tools/cpp:malloc",
})

def ynn_test_deps():
return select({
"//ynnpack:hexagon": [
"@com_google_googletest//:gtest",
"//ynnpack/base/hexagon:test_main",
],
"//conditions:default": ["@com_google_googletest//:gtest_main"],
})

def ynn_benchmark_deps():
return select({
"//conditions:default": ["@com_google_benchmark//:benchmark_main"],
})
return ["@com_google_benchmark//:benchmark_main"]

def ynn_cc_library(
name,
Expand Down
4 changes: 4 additions & 0 deletions ynnpack/kernels/transpose/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ ynn_cc_library(
"x86_avx2.h",
"x86_avx2.cc",
],
"hvx": [
"hvx.h",
"hvx.cc",
],
},
visibility = ["//ynnpack:__subpackages__"],
deps = [
Expand Down
99 changes: 99 additions & 0 deletions ynnpack/kernels/transpose/hvx.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
// Copyright 2025 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#include "ynnpack/kernels/transpose/hvx.h"

#include <hexagon_types.h>

#include <array>
#include <cassert>
#include <cstddef>
#include <type_traits>

#include "ynnpack/kernels/transpose/generic.h"
#include "ynnpack/kernels/transpose/interleave.h"
#include "ynnpack/kernels/transpose/transpose.h"

namespace ynn {

void transpose_x32_hvx(size_t m, size_t n, size_t n_bytes_a, size_t stride_a,
const void* a, size_t stride_x, void* x) {
transpose<std::array<HVX_Vector, 32>>(m, n, n_bytes_a, stride_a, a, stride_x,
x,
std::integral_constant<size_t, 32>{});
}
void transpose_x64_hvx(size_t m, size_t n, size_t n_bytes_a, size_t stride_a,
const void* a, size_t stride_x, void* x) {
transpose<std::array<HVX_Vector, 16>>(m, n, n_bytes_a, stride_a, a, stride_x,
x,
std::integral_constant<size_t, 64>{});
}
void transpose_x128_hvx(size_t m, size_t n, size_t n_bytes_a, size_t stride_a,
const void* a, size_t stride_x, void* x) {
transpose<std::array<HVX_Vector, 8>>(m, n, n_bytes_a, stride_a, a, stride_x,
x,
std::integral_constant<size_t, 128>{});
}
void transpose_x256_hvx(size_t m, size_t n, size_t n_bytes_a, size_t stride_a,
const void* a, size_t stride_x, void* x) {
transpose<std::array<HVX_Vector, 4>>(m, n, n_bytes_a, stride_a, a, stride_x,
x,
std::integral_constant<size_t, 256>{});
}
void transpose_x512_hvx(size_t m, size_t n, size_t n_bytes_a, size_t stride_a,
const void* a, size_t stride_x, void* x) {
transpose<std::array<HVX_Vector, 2>>(m, n, n_bytes_a, stride_a, a, stride_x,
x,
std::integral_constant<size_t, 512>{});
}
void transpose_x1024_hvx(size_t m, size_t n, size_t n_bytes_a, size_t stride_a,
const void* a, size_t stride_x, void* x) {
transpose(m, n, n_bytes_a, stride_a, a, stride_x, x,
std::integral_constant<size_t, 1024>{});
}

void interleave2_x8_hvx(size_t factor, size_t m, size_t n, size_t stride_a,
const void* a, void* x) {
assert(factor == 2);
interleave<std::array<HVX_Vector, 2>>(m, n, stride_a, a, x,
std::integral_constant<size_t, 8>{});
}

void interleave2_x16_hvx(size_t factor, size_t m, size_t n, size_t stride_a,
const void* a, void* x) {
assert(factor == 2);
interleave<std::array<HVX_Vector, 2>>(m, n, stride_a, a, x,
std::integral_constant<size_t, 16>{});
}

void interleave2_x32_hvx(size_t factor, size_t m, size_t n, size_t stride_a,
const void* a, void* x) {
assert(factor == 2);
interleave<std::array<HVX_Vector, 2>>(m, n, stride_a, a, x,
std::integral_constant<size_t, 32>{});
}

void interleave4_x8_hvx(size_t factor, size_t m, size_t n, size_t stride_a,
const void* a, void* x) {
assert(factor == 4);
interleave<std::array<HVX_Vector, 4>>(m, n, stride_a, a, x,
std::integral_constant<size_t, 8>{});
}

void interleave4_x16_hvx(size_t factor, size_t m, size_t n, size_t stride_a,
const void* a, void* x) {
assert(factor == 4);
interleave<std::array<HVX_Vector, 4>>(m, n, stride_a, a, x,
std::integral_constant<size_t, 16>{});
}

void interleave4_x32_hvx(size_t factor, size_t m, size_t n, size_t stride_a,
const void* a, void* x) {
assert(factor == 4);
interleave<std::array<HVX_Vector, 4>>(m, n, stride_a, a, x,
std::integral_constant<size_t, 32>{});
}

} // namespace ynn
61 changes: 61 additions & 0 deletions ynnpack/kernels/transpose/hvx.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Copyright 2025 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#ifndef XNNPACK_YNNPACK_KERNELS_TRANSPOSE_HVX_H_
#define XNNPACK_YNNPACK_KERNELS_TRANSPOSE_HVX_H_

#include <hexagon_protos.h>
#include <hexagon_types.h>
#include <hvx_hexagon_protos.h>

#include <array>
#include <cassert>
#include <cstddef>
#include <type_traits>

#include "ynnpack/base/arithmetic.h"

namespace ynn {

template <typename ElemSizeBits>
static std::array<HVX_Vector, 2> interleave(ElemSizeBits elem_size_bits,
std::array<HVX_Vector, 2> x) {
HVX_VectorPair x01 = Q6_W_vshuff_VVR(x[1], x[0], -(elem_size_bits / 8));
return {Q6_V_lo_W(x01), Q6_V_hi_W(x01)};
}

template <size_t M>
static std::array<HVX_Vector, M> load(
std::array<HVX_Vector, M>, const void* a, size_t stride, size_t m,
std::integral_constant<size_t, 16> /*n_bytes*/) {
assert(m > 0);
assert(m <= M);
std::array<HVX_Vector, M> x;
x[0] = *reinterpret_cast<const HVX_UVector*>(a);
for (size_t i = 1; i < M; ++i) {
x[i] =
i < m
? *reinterpret_cast<const HVX_UVector*>(offset_bytes(a, i * stride))
: Q6_V_vsplat_R(0);
}
return x;
}

template <size_t M>
static void store(std::array<HVX_Vector, M> x, void* a, size_t stride, size_t m,
std::integral_constant<size_t, 16> /*n_bytes*/) {
assert(m > 0);
assert(m <= M);
*reinterpret_cast<HVX_UVector*>(a) = x[0];
for (size_t i = 1; i < M; ++i) {
if (i < m) {
*reinterpret_cast<HVX_UVector*>(offset_bytes(a, i * stride)) = x[i];
}
}
}

} // namespace ynn

#endif // XNNPACK_YNNPACK_KERNELS_TRANSPOSE_HVX_H_
9 changes: 9 additions & 0 deletions ynnpack/kernels/transpose/interleave.inc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,15 @@ YNN_INTERLEAVE_KERNEL(arch_flag::neon, interleave4_x16_neon, 4, 16)
YNN_INTERLEAVE_KERNEL(arch_flag::neon, interleave4_x32_neon, 4, 32)
#endif // YNN_ARCH_ARM_NEON

#ifdef YNN_ARCH_HVX
YNN_INTERLEAVE_KERNEL(arch_flag::hvx, interleave2_x8_hvx, 2, 8)
YNN_INTERLEAVE_KERNEL(arch_flag::hvx, interleave2_x16_hvx, 2, 16)
YNN_INTERLEAVE_KERNEL(arch_flag::hvx, interleave2_x32_hvx, 2, 32)
YNN_INTERLEAVE_KERNEL(arch_flag::hvx, interleave4_x8_hvx, 4, 8)
YNN_INTERLEAVE_KERNEL(arch_flag::hvx, interleave4_x16_hvx, 4, 16)
YNN_INTERLEAVE_KERNEL(arch_flag::hvx, interleave4_x32_hvx, 4, 32)
#endif // YNN_ARCH_HVX

YNN_INTERLEAVE_KERNEL(arch_flag::none, interleave_x4, 0, 4)
YNN_INTERLEAVE_KERNEL(arch_flag::none, interleave_x8, 0, 8)
YNN_INTERLEAVE_KERNEL(arch_flag::none, interleave_x16, 0, 16)
Expand Down
9 changes: 9 additions & 0 deletions ynnpack/kernels/transpose/transpose.inc
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,15 @@ YNN_TRANSPOSE_KERNEL(arch_flag::neon, transpose_x512_neon, 512)
YNN_TRANSPOSE_KERNEL(arch_flag::neon, transpose_x1024_neon, 1024)
#endif // YNN_ARCH_ARM_NEON

#ifdef YNN_ARCH_HVX
YNN_TRANSPOSE_KERNEL(arch_flag::hvx, transpose_x32_hvx, 32)
YNN_TRANSPOSE_KERNEL(arch_flag::hvx, transpose_x64_hvx, 64)
YNN_TRANSPOSE_KERNEL(arch_flag::hvx, transpose_x128_hvx, 128)
YNN_TRANSPOSE_KERNEL(arch_flag::hvx, transpose_x256_hvx, 256)
YNN_TRANSPOSE_KERNEL(arch_flag::hvx, transpose_x512_hvx, 512)
YNN_TRANSPOSE_KERNEL(arch_flag::hvx, transpose_x1024_hvx, 1024)
#endif // YNN_ARCH_HVX

YNN_TRANSPOSE_KERNEL(arch_flag::none, transpose_x4, 4)
YNN_TRANSPOSE_KERNEL(arch_flag::none, transpose_x8, 8)
YNN_TRANSPOSE_KERNEL(arch_flag::none, transpose_x16, 16)
Expand Down
Loading