summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorNikita Kostovsky <nikita@kostovsky.me>2025-11-09 08:53:54 +0100
committerNikita Kostovsky <nikita@kostovsky.me>2025-11-09 08:55:23 +0100
commitc81e60b1f5d62b74a2ebc269348b36bf91df5015 (patch)
tree8589c151556a8d4755ebfa5520d076ae0b902387 /src
parent69c5e9c07941212ac77368effd1c60db3140d4a3 (diff)
fix memcpy_neon
Diffstat (limited to 'src')
-rw-r--r--src/camera/veyeimx287m.cpp22
-rw-r--r--src/image.h6
-rw-r--r--src/mem_utils.h33
3 files changed, 45 insertions, 16 deletions
diff --git a/src/camera/veyeimx287m.cpp b/src/camera/veyeimx287m.cpp
index d4c19fe..e89442c 100644
--- a/src/camera/veyeimx287m.cpp
+++ b/src/camera/veyeimx287m.cpp
@@ -81,8 +81,7 @@ std::vector<std::shared_ptr<ICamera> > VeyeIMX287m::search()
if (!cam->init())
return {};
- // if (!cam->setExposureTimeUs(30))
- if (!cam->setExposureTimeUs(250))
+ if (!cam->setExposureTimeUs(30))
return {};
if (!cam->setLaserLevel(1))
@@ -379,12 +378,25 @@ void VeyeIMX287m::calcFrameLoop(std::stop_token stopToken)
t.start();
// std::lock_guard buffer_lock{m_bufferMutexes[bufferIdx]};
// get: 4100-4500
- // memcpy(&image.data, m_videoBuffers[bufferIdx], img_size);
+ memcpy(&image.data, m_videoBuffers[bufferIdx], img_size);
// get: 5000-5100
+ // memcpy_1by1<img_size / sizeof(ARRAY_TYPE)>((ARRAY_TYPE *) &image.data,
+ // (ARRAY_TYPE *) m_videoBuffers[bufferIdx]);
// memcpy_1by1<img_size>((std::byte *) &image.data,
// (std::byte *) m_videoBuffers[bufferIdx]);
- memcpy_neon<img_size / sizeof(ARRAY_TYPE)>((ARRAY_TYPE *) &image.data,
- (ARRAY_TYPE *) m_videoBuffers[bufferIdx]);
+
+ // memcpy_neon<img_size / sizeof(ARRAY_TYPE)>((ARRAY_TYPE *) &image.data,
+ // (ARRAY_TYPE *) m_videoBuffers[bufferIdx]);
+ // std::cerr << "size: " << img_size / sizeof(ARRAY_TYPE) << std::endl;
+ // Image::data_t native;
+ // Image::data_t neon;
+ // memcpy(&native, m_videoBuffers[bufferIdx], img_size);
+ // memcpy_neon<img_size / sizeof(ARRAY_TYPE)>((ARRAY_TYPE *) &neon,
+ // (ARRAY_TYPE *) m_videoBuffers[bufferIdx]);
+
+ // if (memcmp(&native, &neon, img_size) != 0) {
+ // std::cerr << "different: " << img_size / sizeof(ARRAY_TYPE) << std::endl;
+ // }
get_elapsed_ns += t.nsecsElapsed();
}
diff --git a/src/image.h b/src/image.h
index a3c4476..e4cf9eb 100644
--- a/src/image.h
+++ b/src/image.h
@@ -17,8 +17,6 @@ struct Image
Image(Image &other) = delete;
Image(Image &&other) = delete;
Image &operator=(Image &&other) = default;
- int width{0};
- int height{0};
// uint8_t data[img_height][img_width] = {{0}};
using row_t = std::array<uint8_t, img_width>;
@@ -27,7 +25,9 @@ struct Image
using column_t = rotated_row_t;
using rotated_data_t = std::array<column_t, img_width>;
// data_t d;
- data_t data;
+ alignas(128) data_t data;
+ int width{0};
+ int height{0};
// data_t *data;
// uint8_t *data = {nullptr};
// uint8_t &dataAt(size_t row, size_t col);
diff --git a/src/mem_utils.h b/src/mem_utils.h
index 8601f78..cb5f179 100644
--- a/src/mem_utils.h
+++ b/src/mem_utils.h
@@ -1,25 +1,42 @@
#pragma once
#include <cstddef>
+#include <iostream>
#include <arm_neon.h>
+// using ARRAY_TYPE = uint16_t;
+// using ARRAY_TYPE = uint32_t;
+using ARRAY_TYPE = uint64_t;
+// using ARRAY_TYPE = uint64_t;
+
template<std::size_t S>
-void memcpy_1by1(std::byte *dst, const std::byte *src)
+void memcpy_1by1(ARRAY_TYPE *dst, const ARRAY_TYPE *src)
{
for (std::size_t i{0}; i < S; ++i) {
dst[i] = src[i];
}
}
-
-using ARRAY_TYPE = uint32_t;
template<std::size_t S>
-void memcpy_neon(ARRAY_TYPE *dst, const ARRAY_TYPE *src)
+inline void memcpy_neon(ARRAY_TYPE *dst, const ARRAY_TYPE *src)
{
- uint32x4_t tmp;
+ // for (std::size_t i{0}; i < S; i += 4) {
+ // vst1q_u32(&dst[i], vld1q_u32(src + i));
+ // }
+ // if (uint64_t(src) % 16 != 0) {
+ // std::cerr << "src misaligned" << std::endl;
+ // }
+ // if (uint64_t(dst) % 16 != 0) {
+ // std::cerr << "dst misaligned" << std::endl;
+ // }
- for (std::size_t i{0}; i < (S / 4); i += 4) {
- tmp = vld1q_u32(src + i);
- vst1q_u32(&dst[i], tmp);
+ for (std::size_t i{0}; i < S; i += 2) {
+ vst1q_u64(&dst[i], vld1q_u64(src + i));
}
+ // for (std::size_t i{0}; i < S; i += 8) {
+ // vst1q_u16(&dst[i], vld1q_u16(src + i));
+ // }
+ // for (std::size_t i{0}; i < S; i += 4) {
+ // vst1q_u64(&dst[i], vld1q_u64(src + i));
+ // }
}