From c81e60b1f5d62b74a2ebc269348b36bf91df5015 Mon Sep 17 00:00:00 2001 From: Nikita Kostovsky Date: Sun, 9 Nov 2025 08:53:54 +0100 Subject: fix memcpy_neon --- src/mem_utils.h | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) (limited to 'src/mem_utils.h') diff --git a/src/mem_utils.h b/src/mem_utils.h index 8601f78..cb5f179 100644 --- a/src/mem_utils.h +++ b/src/mem_utils.h @@ -1,25 +1,42 @@ #pragma once #include +#include #include +// using ARRAY_TYPE = uint16_t; +// using ARRAY_TYPE = uint32_t; +using ARRAY_TYPE = uint64_t; +// using ARRAY_TYPE = uint64_t; + template -void memcpy_1by1(std::byte *dst, const std::byte *src) +void memcpy_1by1(ARRAY_TYPE *dst, const ARRAY_TYPE *src) { for (std::size_t i{0}; i < S; ++i) { dst[i] = src[i]; } } - -using ARRAY_TYPE = uint32_t; template -void memcpy_neon(ARRAY_TYPE *dst, const ARRAY_TYPE *src) +inline void memcpy_neon(ARRAY_TYPE *dst, const ARRAY_TYPE *src) { - uint32x4_t tmp; + // for (std::size_t i{0}; i < S; i += 4) { + // vst1q_u32(&dst[i], vld1q_u32(src + i)); + // } + // if (uint64_t(src) % 16 != 0) { + // std::cerr << "src misaligned" << std::endl; + // } + // if (uint64_t(dst) % 16 != 0) { + // std::cerr << "dst misaligned" << std::endl; + // } - for (std::size_t i{0}; i < (S / 4); i += 4) { - tmp = vld1q_u32(src + i); - vst1q_u32(&dst[i], tmp); + for (std::size_t i{0}; i < S; i += 2) { + vst1q_u64(&dst[i], vld1q_u64(src + i)); } + // for (std::size_t i{0}; i < S; i += 8) { + // vst1q_u16(&dst[i], vld1q_u16(src + i)); + // } + // for (std::size_t i{0}; i < S; i += 4) { + // vst1q_u64(&dst[i], vld1q_u64(src + i)); + // } } -- cgit v1.2.3-70-g09d2