| #include <openpose_private/tracking/pyramidalLK.hpp> |
| #ifdef WITH_SSE4 |
| #include <emmintrin.h> |
| #include "smmintrin.h" |
| #endif |
|
|
| #ifdef WITH_AVX |
| #include <immintrin.h> |
| #endif |
|
|
| #include <iostream> |
| #include <opencv2/core/core.hpp> |
| #include <opencv2/imgproc/imgproc.hpp> |
| #include <opencv2/video/video.hpp> |
| #include <openpose/utilities/profiler.hpp> |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| #define SUCCESS 0 |
| #define INVALID_PATCH_SIZE 1 |
| #define OUT_OF_FRAME 2 |
| #define ZERO_DENOMINATOR 3 |
| #define UNDEFINED_ERROR 4 |
|
|
| namespace op |
| { |
| #ifdef WITH_SSE4 |
| float sse_dot_product(std::vector<float> &av, std::vector<float> &bv) |
| { |
|
|
| |
| unsigned int niters = av.size() / 4; |
| float zeros[] = {0.0, 0.0, 0.0, 0.0}; |
|
|
| float *a = (float *) aligned_alloc(16, av.size()*sizeof(float)); |
| float *b = (float *) aligned_alloc(16, av.size()*sizeof(float)); |
| memcpy(a,&av[0],av.size()*sizeof(float)); |
| memcpy(b,&bv[0],bv.size()*sizeof(float)); |
|
|
| __m128 *ptrA = (__m128*) &a[0], *ptrB = (__m128*) &b[0]; |
| __m128 res = _mm_load_ps(zeros); |
|
|
| |
| for (unsigned int i = 0; i < niters; i++, ptrA++,ptrB++) |
| res = _mm_add_ps(_mm_dp_ps(*ptrA, *ptrB, 255), res); |
|
|
|
|
| |
| float fres[4]; |
| _mm_store_ps (fres, res); |
| int q = 4 * niters; |
|
|
| for (unsigned int i = 0; i < av.size() % 4; i++) |
| fres[0] += (a[i+q]*b[i+q]); |
|
|
| free(a); |
| free(b); |
|
|
| return fres[0]; |
| } |
| #endif |
|
|
| |
| #if defined (WITH_AVX) && !defined (_WIN32) |
| float avx_dot_product(std::vector<float> &av, std::vector<float> &bv) |
| { |
| |
| const size_t niters = av.size() / 8; |
|
|
| float *a = (float *)aligned_alloc(32, av.size() * sizeof(float)); |
| float *b = (float *)aligned_alloc(32, av.size() * sizeof(float)); |
| memcpy(a, &av[0], av.size() * sizeof(float)); |
| memcpy(b, &bv[0], bv.size() * sizeof(float)); |
|
|
| __m256 *ptrA = (__m256*) &a[0], *ptrB = (__m256*) &b[0]; |
| __m256 res = _mm256_set1_ps(0.0); |
|
|
| for (size_t i = 0; i < niters; i++, ptrA++, ptrB++) |
| res = _mm256_add_ps(_mm256_dp_ps(*ptrA, *ptrB, 255), res); |
|
|
| |
| float fres[8]; |
| _mm256_storeu_ps(fres, res); |
| const size_t q = 8 * niters; |
|
|
| for (size_t i = 0; i < av.size() % 8; i++) |
| fres[0] += (a[i + q] * b[i + q]); |
|
|
| free(a); |
| free(b); |
|
|
| return fres[0] + fres[4]; |
| } |
| #endif |
|
|
| char computeLK(cv::Point2f& delta, std::vector<float>& ix, |
| std::vector<float>& iy, std::vector<float>& it) |
| { |
| try |
| { |
| |
| #if defined (WITH_AVX) && !defined (_WIN32) |
| const float sumXX = avx_dot_product(ix,ix); |
| const float sumYY = avx_dot_product(iy,iy); |
| const float sumXY = avx_dot_product(ix,iy); |
| const float sumXT = avx_dot_product(ix,it); |
| const float sumYT = avx_dot_product(iy,it); |
| #elif defined (WITH_SSE4) |
| const float sumXX = sse_dot_product(ix,ix); |
| const float sumYY = sse_dot_product(iy,iy); |
| const float sumXY = sse_dot_product(ix,iy); |
| const float sumXT = sse_dot_product(ix,it); |
| const float sumYT = sse_dot_product(iy,it); |
| #else |
| auto sumXX = 0.f; |
| auto sumYY = 0.f; |
| auto sumXT = 0.f; |
| auto sumYT = 0.f; |
| auto sumXY = 0.f; |
| for (auto i = 0u; i < ix.size(); i++) |
| { |
| sumXX += ix[i] * ix[i]; |
| sumYY += iy[i] * iy[i]; |
| sumXY += ix[i] * iy[i]; |
| sumXT += ix[i] * it[i]; |
| sumYT += iy[i] * it[i]; |
| } |
| #endif |
|
|
| |
| const auto den = (sumXX*sumYY) - (sumXY * sumXY); |
|
|
| if (std::abs(den) < 1e-9f) |
| return ZERO_DENOMINATOR; |
|
|
| const auto numU = (-1.f * sumYY * sumXT) + (sumXY * sumYT); |
| const auto numV = (-1.f * sumXX * sumYT) + (sumXT * sumXY); |
|
|
| delta.x = numU / den; |
| delta.y = numV / den; |
|
|
| return SUCCESS; |
| } |
| catch (const std::exception& e) |
| { |
| error(e.what(), __LINE__, __FUNCTION__, __FILE__); |
| return UNDEFINED_ERROR; |
| } |
| } |
|
|
| void getVectors(std::vector<float>& ix, std::vector<float>& iy, std::vector<float>& it, |
| const std::vector<std::vector<float>>& patch, const std::vector<std::vector<float>>& patchIt, |
| const int patchSize) |
| { |
| try |
| { |
| |
| const auto numberElements = patchSize*patchSize; |
| |
| ix.resize(numberElements); |
| iy.resize(numberElements); |
| for (auto i = 1; i <= patchSize; i++) |
| { |
| const auto baseIndex = (i-1)*patchSize; |
| for (auto j = 1; j <= patchSize; j++) |
| { |
| ix[baseIndex+j-1] = (patch[i][j+1] - patch[i][j-1])/2.f; |
| iy[baseIndex+j-1] = (patch[i+1][j] - patch[i-1][j])/2.f; |
| } |
| } |
| |
| it.resize(numberElements); |
| for (auto i = 0; i < patchSize; i++) |
| { |
| const auto baseIndex = i*patchSize; |
| for (auto j = 0; j < patchSize; j++) |
| it[baseIndex+j] = patchIt[i][j]; |
| } |
| } |
| catch (const std::exception& e) |
| { |
| error(e.what(), __LINE__, __FUNCTION__, __FILE__); |
| } |
| } |
|
|
| char extractPatch(std::vector< std::vector<float>>& patch, const int x, const int y, const int patchSize, |
| const cv::Mat& image) |
| { |
| try |
| { |
| int radix = patchSize / 2; |
|
|
| if ( ((x - radix) < 0) || |
| ((x + radix) >= image.cols) || |
| ((y - radix) < 0) || |
| ((y + radix) >= image.rows)) |
| return OUT_OF_FRAME; |
|
|
| for (auto i = -radix; i <= radix; i++) |
| for (auto j = -radix; j <= radix; j++) |
| patch[i+radix][j+radix] = image.at<float>(y+i,x+j); |
|
|
| return SUCCESS; |
| } |
| catch (const std::exception& e) |
| { |
| error(e.what(), __LINE__, __FUNCTION__, __FILE__); |
| return UNDEFINED_ERROR; |
| } |
| } |
|
|
| char extractPatchIt(std::vector<std::vector<float>>& patch, const int xI, const int yI, const int xJ, |
| const int yJ, const cv::Mat& I, const cv::Mat& J, const int patchSize) |
| { |
| try |
| { |
| const int radix = patchSize / 2; |
|
|
| if (((xI - radix) < 0) || |
| ((xI + radix) >= I.cols) || |
| ((yI - radix) < 0) || |
| ((yI + radix) >= I.rows)) |
| return OUT_OF_FRAME; |
|
|
| if (((xJ - radix) < 0) || |
| ((xJ + radix) >= J.cols) || |
| ((yJ - radix) < 0) || |
| ((yJ + radix) >= J.rows)) |
| return OUT_OF_FRAME; |
|
|
| for (auto i = -radix; i <= radix; i++) |
| for (auto j = -radix; j <= radix; j++) |
| patch[i+radix][j+radix] = J.at<float>(yJ+i,xJ+j) - I.at<float>(yI+i,xI+j); |
|
|
| return SUCCESS; |
| } |
| catch (const std::exception& e) |
| { |
| error(e.what(), __LINE__, __FUNCTION__, __FILE__); |
| return UNDEFINED_ERROR; |
| } |
| } |
|
|
| |
| void buildGaussianPyramid(std::vector<cv::Mat>& pyramidImages, const cv::Mat& image, const int levels) |
| { |
| try |
| { |
| pyramidImages.clear(); |
| pyramidImages.emplace_back(image); |
|
|
| for (auto i = 0; i < levels - 1; i++) |
| { |
| cv::Mat pyredImage; |
| cv::pyrDown(pyramidImages.back(), pyredImage); |
| pyramidImages.emplace_back(pyredImage); |
| } |
| } |
| catch (const std::exception& e) |
| { |
| error(e.what(), __LINE__, __FUNCTION__, __FILE__); |
| } |
| } |
|
|
| cv::Point2f pyramidIteration(char& status, const cv::Point2f& pointI, const cv::Point2f& pointJ, const cv::Mat& I, |
| const cv::Mat& J, const int patchSize = 5) |
| { |
| try |
| { |
| cv::Point2f result; |
|
|
| |
| std::vector<std::vector<float>> patch(patchSize + 2, std::vector<float>(patchSize + 2)); |
| std::vector<std::vector<float>> patchIt(patchSize, std::vector<float>(patchSize)); |
|
|
| status = extractPatch(patch, (int)pointI.x,(int)pointI.y, patchSize + 2, I); |
| |
| |
|
|
| status = extractPatchIt(patchIt, int(pointI.x), int(pointI.y), int(pointJ.x), int(pointJ.y), I, J, patchSize); |
|
|
| |
| |
|
|
| |
| std::vector<float> ix, iy, it; |
| getVectors(ix, iy, it, patch, patchIt, patchSize); |
|
|
| |
| cv::Point2f delta; |
| status = computeLK(delta, ix, iy, it); |
|
|
| |
| |
|
|
| result = pointJ + delta; |
|
|
| return result; |
| } |
| catch (const std::exception& e) |
| { |
| error(e.what(), __LINE__, __FUNCTION__, __FILE__); |
| return cv::Point2f{}; |
| } |
| } |
|
|
| void pyramidalLKCpu(std::vector<cv::Point2f>& coordI, std::vector<cv::Point2f>& coordJ, |
| std::vector<cv::Mat>& pyramidImagesPrevious, std::vector<cv::Mat>& pyramidImagesCurrent, |
| std::vector<char>& status, const cv::Mat& imagePrevious, |
| const cv::Mat& imageCurrent, const int levels, const int patchSize) |
| { |
| try |
| { |
| |
| if (coordI.size() == 0) |
| return; |
|
|
| std::vector<cv::Point2f> I; |
| I.assign(coordI.begin(), coordI.end()); |
|
|
| const auto rescaleScale = 1.0/(float)(1<<(levels-1)); |
| for (auto& coordenate : I) |
| coordenate *= rescaleScale; |
|
|
| coordJ.clear(); |
| coordJ.assign(I.begin(), I.end()); |
|
|
| if (pyramidImagesPrevious.empty()) |
| buildGaussianPyramid(pyramidImagesPrevious, imagePrevious, levels); |
| if (pyramidImagesCurrent.empty()) |
| buildGaussianPyramid(pyramidImagesCurrent, imageCurrent, levels); |
|
|
|
|
| |
| for (auto i = 0u; i < coordI.size(); i++) |
| { |
| for (auto l = levels - 1; l >= 0; l--) |
| { |
| char status_point = 0; |
| cv::Point2f result; |
|
|
| result = pyramidIteration(status_point, I[i], coordJ[i],pyramidImagesPrevious[l], |
| pyramidImagesCurrent[l], patchSize); |
| if (status_point) |
| status[i] = status_point; |
|
|
| coordJ[i] = result; |
|
|
| if (l == 0) |
| break; |
|
|
| I[i] *= 2.f; |
| coordJ[i] *= 2.f; |
| } |
| } |
| } |
| catch (const std::exception& e) |
| { |
| error(e.what(), __LINE__, __FUNCTION__, __FILE__); |
| } |
| } |
|
|
| |
| void pyramidalLKOcv(std::vector<cv::Point2f>& coordI, std::vector<cv::Point2f>& coordJ, |
| std::vector<cv::Mat>& pyramidImagesPrevious, std::vector<cv::Mat>& pyramidImagesCurrent, |
| std::vector<char>& status, const cv::Mat& imagePrevious, |
| const cv::Mat& imageCurrent, const int levels, const int patchSize, const bool initFlow) |
| { |
| try |
| { |
| |
| if (coordI.size() != 0) |
| { |
| |
|
|
| std::vector<cv::Point2f> I; |
| I.assign(coordI.begin(), coordI.end()); |
|
|
| if (!initFlow) |
| { |
| coordJ.clear(); |
| coordJ.assign(I.begin(), I.end()); |
| } |
|
|
| const cv::Mat& imagePrevGray = imagePrevious; |
| const cv::Mat& imageCurrGray = imageCurrent; |
|
|
| |
| if (pyramidImagesPrevious.empty()) |
| cv::buildOpticalFlowPyramid(imagePrevGray, pyramidImagesPrevious, cv::Size{patchSize,patchSize}, levels); |
| if (pyramidImagesCurrent.empty()) |
| cv::buildOpticalFlowPyramid(imageCurrGray, pyramidImagesCurrent, cv::Size{patchSize,patchSize}, levels); |
|
|
| |
| std::vector<uchar> st; |
| std::vector<float> err; |
| if (initFlow) |
| cv::calcOpticalFlowPyrLK(pyramidImagesPrevious, pyramidImagesCurrent, coordI, coordJ, st, err, |
| cv::Size{patchSize,patchSize},levels, |
| cv::TermCriteria(cv::TermCriteria::COUNT+cv::TermCriteria::EPS,30,0.01), |
| cv::OPTFLOW_USE_INITIAL_FLOW); |
| else |
| cv::calcOpticalFlowPyrLK(pyramidImagesPrevious, pyramidImagesCurrent, coordI, coordJ, st, err, |
| cv::Size{patchSize,patchSize},levels); |
|
|
| |
| for (size_t i=0; i<status.size(); i++) |
| { |
| const double distance = std::sqrt( |
| std::pow(coordI[i].x-coordJ[i].x,2) + std::pow(coordI[i].y-coordJ[i].y,2)); |
|
|
| |
| if (st[i] != (status[i])) |
| if (distance <= patchSize*2) |
| st[i] = 1; |
|
|
| |
| if (distance > patchSize*2) |
| st[i] = 0; |
| } |
|
|
| |
| if (st.size() != status.size()) |
| error("st.size() != status.size().", __LINE__, __FUNCTION__, __FILE__); |
| for (size_t i=0; i<status.size(); i++) |
| { |
| |
| if (status[i] != 0) |
| { |
| if (st[i] == 0) |
| st[i] = 0; |
| else if (st[i] == 1) |
| st[i] = 1; |
| else |
| error("Wrong CV Type.", __LINE__, __FUNCTION__, __FILE__); |
| status[i] = st[i]; |
| } |
| } |
|
|
| |
| |
|
|
| |
| |
| |
| |
| } |
| } |
| catch (const std::exception& e) |
| { |
| error(e.what(), __LINE__, __FUNCTION__, __FILE__); |
| } |
| } |
| } |
|
|