From c81e60b1f5d62b74a2ebc269348b36bf91df5015 Mon Sep 17 00:00:00 2001 From: Nikita Kostovsky Date: Sun, 9 Nov 2025 08:53:54 +0100 Subject: fix memcpy_neon --- src/camera/veyeimx287m.cpp | 22 +++++++++++++++++----- src/image.h | 6 +++--- src/mem_utils.h | 33 +++++++++++++++++++++++++-------- 3 files changed, 45 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/camera/veyeimx287m.cpp b/src/camera/veyeimx287m.cpp index d4c19fe..e89442c 100644 --- a/src/camera/veyeimx287m.cpp +++ b/src/camera/veyeimx287m.cpp @@ -81,8 +81,7 @@ std::vector > VeyeIMX287m::search() if (!cam->init()) return {}; - // if (!cam->setExposureTimeUs(30)) - if (!cam->setExposureTimeUs(250)) + if (!cam->setExposureTimeUs(30)) return {}; if (!cam->setLaserLevel(1)) @@ -379,12 +378,25 @@ void VeyeIMX287m::calcFrameLoop(std::stop_token stopToken) t.start(); // std::lock_guard buffer_lock{m_bufferMutexes[bufferIdx]}; // get: 4100-4500 - // memcpy(&image.data, m_videoBuffers[bufferIdx], img_size); + memcpy(&image.data, m_videoBuffers[bufferIdx], img_size); // get: 5000-5100 + // memcpy_1by1((ARRAY_TYPE *) &image.data, + // (ARRAY_TYPE *) m_videoBuffers[bufferIdx]); // memcpy_1by1((std::byte *) &image.data, // (std::byte *) m_videoBuffers[bufferIdx]); - memcpy_neon((ARRAY_TYPE *) &image.data, - (ARRAY_TYPE *) m_videoBuffers[bufferIdx]); + + // memcpy_neon((ARRAY_TYPE *) &image.data, + // (ARRAY_TYPE *) m_videoBuffers[bufferIdx]); + // std::cerr << "size: " << img_size / sizeof(ARRAY_TYPE) << std::endl; + // Image::data_t native; + // Image::data_t neon; + // memcpy(&native, m_videoBuffers[bufferIdx], img_size); + // memcpy_neon((ARRAY_TYPE *) &neon, + // (ARRAY_TYPE *) m_videoBuffers[bufferIdx]); + + // if (memcmp(&native, &neon, img_size) != 0) { + // std::cerr << "different: " << img_size / sizeof(ARRAY_TYPE) << std::endl; + // } get_elapsed_ns += t.nsecsElapsed(); } diff --git a/src/image.h b/src/image.h index a3c4476..e4cf9eb 100644 --- a/src/image.h +++ b/src/image.h @@ -17,8 +17,6 @@ struct Image Image(Image &other) = delete; Image(Image &&other) = delete; Image &operator=(Image &&other) = default; - int width{0}; - int height{0}; // uint8_t data[img_height][img_width] = {{0}}; using row_t = std::array; @@ -27,7 +25,9 @@ struct Image using column_t = rotated_row_t; using rotated_data_t = std::array; // data_t d; - data_t data; + alignas(128) data_t data; + int width{0}; + int height{0}; // data_t *data; // uint8_t *data = {nullptr}; // uint8_t &dataAt(size_t row, size_t col); diff --git a/src/mem_utils.h b/src/mem_utils.h index 8601f78..cb5f179 100644 --- a/src/mem_utils.h +++ b/src/mem_utils.h @@ -1,25 +1,42 @@ #pragma once #include +#include #include +// using ARRAY_TYPE = uint16_t; +// using ARRAY_TYPE = uint32_t; +using ARRAY_TYPE = uint64_t; +// using ARRAY_TYPE = uint64_t; + template -void memcpy_1by1(std::byte *dst, const std::byte *src) +void memcpy_1by1(ARRAY_TYPE *dst, const ARRAY_TYPE *src) { for (std::size_t i{0}; i < S; ++i) { dst[i] = src[i]; } } - -using ARRAY_TYPE = uint32_t; template -void memcpy_neon(ARRAY_TYPE *dst, const ARRAY_TYPE *src) +inline void memcpy_neon(ARRAY_TYPE *dst, const ARRAY_TYPE *src) { - uint32x4_t tmp; + // for (std::size_t i{0}; i < S; i += 4) { + // vst1q_u32(&dst[i], vld1q_u32(src + i)); + // } + // if (uint64_t(src) % 16 != 0) { + // std::cerr << "src misaligned" << std::endl; + // } + // if (uint64_t(dst) % 16 != 0) { + // std::cerr << "dst misaligned" << std::endl; + // } - for (std::size_t i{0}; i < (S / 4); i += 4) { - tmp = vld1q_u32(src + i); - vst1q_u32(&dst[i], tmp); + for (std::size_t i{0}; i < S; i += 2) { + vst1q_u64(&dst[i], vld1q_u64(src + i)); } + // for (std::size_t i{0}; i < S; i += 8) { + // vst1q_u16(&dst[i], vld1q_u16(src + i)); + // } + // for (std::size_t i{0}; i < S; i += 4) { + // vst1q_u64(&dst[i], vld1q_u64(src + i)); + // } } -- cgit v1.2.3-70-g09d2