summaryrefslogtreecommitdiff
path: root/src/mem_utils.h
diff options
context:
space:
mode:
authorNikita Kostovsky <nikita@kostovsky.me>2025-11-09 08:53:54 +0100
committerNikita Kostovsky <nikita@kostovsky.me>2025-11-09 08:55:23 +0100
commitc81e60b1f5d62b74a2ebc269348b36bf91df5015 (patch)
tree8589c151556a8d4755ebfa5520d076ae0b902387 /src/mem_utils.h
parent69c5e9c07941212ac77368effd1c60db3140d4a3 (diff)
fix memcpy_neon
Diffstat (limited to 'src/mem_utils.h')
-rw-r--r--src/mem_utils.h33
1 files changed, 25 insertions, 8 deletions
diff --git a/src/mem_utils.h b/src/mem_utils.h
index 8601f78..cb5f179 100644
--- a/src/mem_utils.h
+++ b/src/mem_utils.h
@@ -1,25 +1,42 @@
#pragma once
#include <cstddef>
+#include <iostream>
#include <arm_neon.h>
+// using ARRAY_TYPE = uint16_t;
+// using ARRAY_TYPE = uint32_t;
+using ARRAY_TYPE = uint64_t;
+// using ARRAY_TYPE = uint64_t;
+
template<std::size_t S>
-void memcpy_1by1(std::byte *dst, const std::byte *src)
+void memcpy_1by1(ARRAY_TYPE *dst, const ARRAY_TYPE *src)
{
for (std::size_t i{0}; i < S; ++i) {
dst[i] = src[i];
}
}
-
-using ARRAY_TYPE = uint32_t;
template<std::size_t S>
-void memcpy_neon(ARRAY_TYPE *dst, const ARRAY_TYPE *src)
+inline void memcpy_neon(ARRAY_TYPE *dst, const ARRAY_TYPE *src)
{
- uint32x4_t tmp;
+ // for (std::size_t i{0}; i < S; i += 4) {
+ // vst1q_u32(&dst[i], vld1q_u32(src + i));
+ // }
+ // if (uint64_t(src) % 16 != 0) {
+ // std::cerr << "src misaligned" << std::endl;
+ // }
+ // if (uint64_t(dst) % 16 != 0) {
+ // std::cerr << "dst misaligned" << std::endl;
+ // }
- for (std::size_t i{0}; i < (S / 4); i += 4) {
- tmp = vld1q_u32(src + i);
- vst1q_u32(&dst[i], tmp);
+ for (std::size_t i{0}; i < S; i += 2) {
+ vst1q_u64(&dst[i], vld1q_u64(src + i));
}
+ // for (std::size_t i{0}; i < S; i += 8) {
+ // vst1q_u16(&dst[i], vld1q_u16(src + i));
+ // }
+ // for (std::size_t i{0}; i < S; i += 4) {
+ // vst1q_u64(&dst[i], vld1q_u64(src + i));
+ // }
}