summaryrefslogtreecommitdiff
path: root/src/mem_utils.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/mem_utils.h')
-rw-r--r--src/mem_utils.h33
1 files changed, 25 insertions, 8 deletions
diff --git a/src/mem_utils.h b/src/mem_utils.h
index 8601f78..cb5f179 100644
--- a/src/mem_utils.h
+++ b/src/mem_utils.h
@@ -1,25 +1,42 @@
#pragma once
#include <cstddef>
+#include <iostream>
#include <arm_neon.h>
+// using ARRAY_TYPE = uint16_t;
+// using ARRAY_TYPE = uint32_t;
+using ARRAY_TYPE = uint64_t;
+// using ARRAY_TYPE = uint64_t;
+
template<std::size_t S>
-void memcpy_1by1(std::byte *dst, const std::byte *src)
+void memcpy_1by1(ARRAY_TYPE *dst, const ARRAY_TYPE *src)
{
for (std::size_t i{0}; i < S; ++i) {
dst[i] = src[i];
}
}
-
-using ARRAY_TYPE = uint32_t;
template<std::size_t S>
-void memcpy_neon(ARRAY_TYPE *dst, const ARRAY_TYPE *src)
+inline void memcpy_neon(ARRAY_TYPE *dst, const ARRAY_TYPE *src)
{
- uint32x4_t tmp;
+ // for (std::size_t i{0}; i < S; i += 4) {
+ // vst1q_u32(&dst[i], vld1q_u32(src + i));
+ // }
+ // if (uint64_t(src) % 16 != 0) {
+ // std::cerr << "src misaligned" << std::endl;
+ // }
+ // if (uint64_t(dst) % 16 != 0) {
+ // std::cerr << "dst misaligned" << std::endl;
+ // }
- for (std::size_t i{0}; i < (S / 4); i += 4) {
- tmp = vld1q_u32(src + i);
- vst1q_u32(&dst[i], tmp);
+ for (std::size_t i{0}; i < S; i += 2) {
+ vst1q_u64(&dst[i], vld1q_u64(src + i));
}
+ // for (std::size_t i{0}; i < S; i += 8) {
+ // vst1q_u16(&dst[i], vld1q_u16(src + i));
+ // }
+ // for (std::size_t i{0}; i < S; i += 4) {
+ // vst1q_u64(&dst[i], vld1q_u64(src + i));
+ // }
}