diff options
| author | Nikita Kostovsky <nikita@kostovsky.me> | 2025-03-01 01:19:51 +0100 |
|---|---|---|
| committer | Nikita Kostovsky <nikita@kostovsky.me> | 2025-03-01 01:19:51 +0100 |
| commit | a9af2d69c0a4d46dc98b6e0adaf13a7f23a24bce (patch) | |
| tree | 5eb50679833feb3a67a39fb4c64a47e8d264c503 /src/image.cpp | |
| parent | fe81095bf011786ee5303549abc8debb22cddcf8 (diff) | |
got 370 fps with pixels calc (previous was for pixels / 4)
Diffstat (limited to 'src/image.cpp')
| -rw-r--r-- | src/image.cpp | 70 |
1 files changed, 52 insertions, 18 deletions
diff --git a/src/image.cpp b/src/image.cpp index e22c9bb..f0c16aa 100644 --- a/src/image.cpp +++ b/src/image.cpp @@ -16,12 +16,14 @@ uint64_t corr_elapsed_ns = 0; uint64_t max_elapsed_ns = 0; uint64_t value_elapsed_ns = 0; uint64_t rot_elapsed_ns = 0; +uint64_t pix_elapsed_ns = 0; uint64_t dropped_count = 0; // float process_column(const uint8_t (&column)[]) // float process_column(const Image::row_t &column) float process_column(const Image::column_t &column) { + // Image::column_t c = column; start_timer(process_column); QElapsedTimer t; t.start(); @@ -39,11 +41,50 @@ float process_column(const Image::column_t &column) int32_t y1 = 0; int32_t y2 = 0; + static_assert((img_height % patternSize) == 0, "img_height % patternSize should be 0"); + std::array<uint16_t, img_height / patternSize> sums; + uint16_t maxLALASum{0}; + size_t maxLALAIdx{0}; + // uint16_t sum{0}; + + for (size_t i = 0; i < img_height; i += patternSize) { + // const auto sum = std::accumulate(column.cbegin() + i, + // column.cbegin() + i + patternSize, + // 0, + // std::plus<uint16_t>()); + uint16_t sum{0}; + + for (size_t j = i; j < i + patternSize; ++j) + sum += column[j]; + // if ((i % patternSize) == 0) + // sum = 0; + // sum += column[i]; + + // if (sum > 0xff) + // std::cout << sum << ' '; + + if (sum > maxLALASum) { + maxLALASum = sum; + maxLALAIdx = i; + } + } + + // maxLALAIdx = patternSize * 32 - 14; // crash + // maxLALAIdx = patternSize * 32 - 14; // no crash + + maxLALAIdx = std::clamp(uint32_t(maxLALAIdx), + uint32_t(patternSize), + uint32_t(img_height - patternSize * 2 - 14)); + + // std::cout << maxLALAIdx << ' '; memset(correlation, 0, img_height * sizeof(correlation[0])); // memset(correlation, 0, patternSize / 2); - integralSum[0] = 0; + // integralSum[0] = 0; + // integralSum[maxLALAIdx] = 0; + memset(integralSum, 0, img_height * sizeof(integralSum[0])); - for (uint32_t i = 1; i < img_height; ++i) { + // for (uint32_t i = 1; i < img_height; ++i) { + for (uint32_t i = maxLALAIdx + 1; i < maxLALAIdx + patternSize * 3; ++i) { integralSum[i] = column[i] + integralSum[i - 1]; } @@ -51,7 +92,8 @@ float process_column(const Image::column_t &column) t.restart(); // pixel * <sum of neighbours> - for (uint32_t i = 0; i < correlationSize; ++i) + // for (uint32_t i = 0; i < correlationSize; ++i) + for (uint32_t i = maxLALAIdx; i < maxLALAIdx + patternSize * 2; ++i) correlation[i + patternSize / 2] = column[i + patternSize / 2] * (integralSum[i + patternOffset] - integralSum[i]); // * (integralSum[i + patternSize] - integralSum[i]); @@ -59,7 +101,8 @@ float process_column(const Image::column_t &column) corr_elapsed_ns += t.nsecsElapsed(); t.restart(); - for (uint32_t i = 3; i < img_height - 2; ++i) { + // for (uint32_t i = 3; i < img_height - 2; ++i) { + for (uint32_t i = maxLALAIdx + 3; i < maxLALAIdx + patternSize * 3 - 2; ++i) { // p - pixel, n - neighbour // P - pixel used in sum, N - neighbour used in sum // [N P N] @@ -109,7 +152,6 @@ float process_column(const Image::column_t &column) void Image::rotate() { - QElapsedTimer t; t.start(); for (size_t j = 0; j < img_width; ++j) { @@ -121,24 +163,16 @@ void Image::rotate() rot_elapsed_ns += t.nsecsElapsed(); } -Pixels Image::pixels() const +Pixels Image::pixels() { - // auto result = std::make_shared<Pixels>(); + t.start(); + Pixels result; result.counters = counters; - start_timer(process_columns); - // std::transform(std::execution::par_unseq, - // rotated_cw.cbegin(), - // rotated_cw.cend(), - // result->pixels.begin(), - // [](const auto &column) -> float { return process_column(column); }); + std::transform(rotated_cw.cbegin(), rotated_cw.cend(), result.pixels.begin(), process_column); - // #pragma omp chunk - // #pragma omp parallel for - for (size_t i = 0; i < img_width / 4; i++) { - result.pixels[i] = process_column(rotated_cw[i]); - } + pix_elapsed_ns += t.nsecsElapsed(); return result; } |
