summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt3
-rw-r--r--src/camera/veyeimx287m.cpp22
-rw-r--r--src/image.h6
-rw-r--r--src/mem_utils.h33
4 files changed, 47 insertions, 17 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f5f0579..6343f7d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -133,7 +133,8 @@ install(TARGETS app${PROJECT_NAME}
if ("${CMAKE_BUILD_TYPE}" MATCHES "Release")
message("it's Release")
- add_compile_options(-Ofast -fno-unroll-loops -Wall)
+ # add_compile_options(-Ofast -fno-unroll-loops -Wall)
+ add_compile_options(-Ofast -Wall)
# add_compile_options(-Wall -Wextra -Wpedantic)
# add_compile_options(-O3 -Wall)
else()
diff --git a/src/camera/veyeimx287m.cpp b/src/camera/veyeimx287m.cpp
index d4c19fe..e89442c 100644
--- a/src/camera/veyeimx287m.cpp
+++ b/src/camera/veyeimx287m.cpp
@@ -81,8 +81,7 @@ std::vector<std::shared_ptr<ICamera> > VeyeIMX287m::search()
if (!cam->init())
return {};
- // if (!cam->setExposureTimeUs(30))
- if (!cam->setExposureTimeUs(250))
+ if (!cam->setExposureTimeUs(30))
return {};
if (!cam->setLaserLevel(1))
@@ -379,12 +378,25 @@ void VeyeIMX287m::calcFrameLoop(std::stop_token stopToken)
t.start();
// std::lock_guard buffer_lock{m_bufferMutexes[bufferIdx]};
// get: 4100-4500
- // memcpy(&image.data, m_videoBuffers[bufferIdx], img_size);
+ memcpy(&image.data, m_videoBuffers[bufferIdx], img_size);
// get: 5000-5100
+ // memcpy_1by1<img_size / sizeof(ARRAY_TYPE)>((ARRAY_TYPE *) &image.data,
+ // (ARRAY_TYPE *) m_videoBuffers[bufferIdx]);
// memcpy_1by1<img_size>((std::byte *) &image.data,
// (std::byte *) m_videoBuffers[bufferIdx]);
- memcpy_neon<img_size / sizeof(ARRAY_TYPE)>((ARRAY_TYPE *) &image.data,
- (ARRAY_TYPE *) m_videoBuffers[bufferIdx]);
+
+ // memcpy_neon<img_size / sizeof(ARRAY_TYPE)>((ARRAY_TYPE *) &image.data,
+ // (ARRAY_TYPE *) m_videoBuffers[bufferIdx]);
+ // std::cerr << "size: " << img_size / sizeof(ARRAY_TYPE) << std::endl;
+ // Image::data_t native;
+ // Image::data_t neon;
+ // memcpy(&native, m_videoBuffers[bufferIdx], img_size);
+ // memcpy_neon<img_size / sizeof(ARRAY_TYPE)>((ARRAY_TYPE *) &neon,
+ // (ARRAY_TYPE *) m_videoBuffers[bufferIdx]);
+
+ // if (memcmp(&native, &neon, img_size) != 0) {
+ // std::cerr << "different: " << img_size / sizeof(ARRAY_TYPE) << std::endl;
+ // }
get_elapsed_ns += t.nsecsElapsed();
}
diff --git a/src/image.h b/src/image.h
index a3c4476..e4cf9eb 100644
--- a/src/image.h
+++ b/src/image.h
@@ -17,8 +17,6 @@ struct Image
Image(Image &other) = delete;
Image(Image &&other) = delete;
Image &operator=(Image &&other) = default;
- int width{0};
- int height{0};
// uint8_t data[img_height][img_width] = {{0}};
using row_t = std::array<uint8_t, img_width>;
@@ -27,7 +25,9 @@ struct Image
using column_t = rotated_row_t;
using rotated_data_t = std::array<column_t, img_width>;
// data_t d;
- data_t data;
+ alignas(128) data_t data;
+ int width{0};
+ int height{0};
// data_t *data;
// uint8_t *data = {nullptr};
// uint8_t &dataAt(size_t row, size_t col);
diff --git a/src/mem_utils.h b/src/mem_utils.h
index 8601f78..cb5f179 100644
--- a/src/mem_utils.h
+++ b/src/mem_utils.h
@@ -1,25 +1,42 @@
#pragma once
#include <cstddef>
+#include <iostream>
#include <arm_neon.h>
+// using ARRAY_TYPE = uint16_t;
+// using ARRAY_TYPE = uint32_t;
+using ARRAY_TYPE = uint64_t;
+// using ARRAY_TYPE = uint64_t;
+
template<std::size_t S>
-void memcpy_1by1(std::byte *dst, const std::byte *src)
+void memcpy_1by1(ARRAY_TYPE *dst, const ARRAY_TYPE *src)
{
for (std::size_t i{0}; i < S; ++i) {
dst[i] = src[i];
}
}
-
-using ARRAY_TYPE = uint32_t;
template<std::size_t S>
-void memcpy_neon(ARRAY_TYPE *dst, const ARRAY_TYPE *src)
+inline void memcpy_neon(ARRAY_TYPE *dst, const ARRAY_TYPE *src)
{
- uint32x4_t tmp;
+ // for (std::size_t i{0}; i < S; i += 4) {
+ // vst1q_u32(&dst[i], vld1q_u32(src + i));
+ // }
+ // if (uint64_t(src) % 16 != 0) {
+ // std::cerr << "src misaligned" << std::endl;
+ // }
+ // if (uint64_t(dst) % 16 != 0) {
+ // std::cerr << "dst misaligned" << std::endl;
+ // }
- for (std::size_t i{0}; i < (S / 4); i += 4) {
- tmp = vld1q_u32(src + i);
- vst1q_u32(&dst[i], tmp);
+ for (std::size_t i{0}; i < S; i += 2) {
+ vst1q_u64(&dst[i], vld1q_u64(src + i));
}
+ // for (std::size_t i{0}; i < S; i += 8) {
+ // vst1q_u16(&dst[i], vld1q_u16(src + i));
+ // }
+ // for (std::size_t i{0}; i < S; i += 4) {
+ // vst1q_u64(&dst[i], vld1q_u64(src + i));
+ // }
}