openpose

Model card Files Files and versions

xet

Community

openpose / src /openpose /tracking /pyramidalLK.cpp

JusJ7

Duplicate from camenduru/openpose

b7f741c 20 days ago

raw

history blame contribute delete

16.3 kB

	#include <openpose_private/tracking/pyramidalLK.hpp>
	#ifdef WITH_SSE4
	#include <emmintrin.h>
	#include "smmintrin.h"
	#endif

	#ifdef WITH_AVX
	#include <immintrin.h>
	#endif

	#include <iostream>
	#include <opencv2/core/core.hpp> // cv::Point2f, cv::Mat
	#include <opencv2/imgproc/imgproc.hpp> // cv::pyrDown
	#include <opencv2/video/video.hpp> // cv::buildOpticalFlowPyramid
	#include <openpose/utilities/profiler.hpp>

	//#define DEBUG
	// #ifdef DEBUG
	// // When debugging is enabled, these form aliases to useful functions
	// #define dbg_printf(...) printf(__VA_ARGS__);
	// #else
	// // When debugging is disabled, no code gets generated for these
	// #define dbg_printf(...)
	// #endif

	#define SUCCESS 0
	#define INVALID_PATCH_SIZE 1
	#define OUT_OF_FRAME 2
	#define ZERO_DENOMINATOR 3
	#define UNDEFINED_ERROR 4

	namespace op
	{
	#ifdef WITH_SSE4
	float sse_dot_product(std::vector<float> &av, std::vector<float> &bv)
	{

	/* Get SIMD-vector pointers to the start of each vector */
	unsigned int niters = av.size() / 4;
	float zeros[] = {0.0, 0.0, 0.0, 0.0};

	float a = (float ) aligned_alloc(16, av.size()*sizeof(float));
	float b = (float ) aligned_alloc(16, av.size()*sizeof(float));
	memcpy(a,&av[0],av.size()*sizeof(float));
	memcpy(b,&bv[0],bv.size()*sizeof(float));

	__m128 ptrA = (__m128) &a[0], ptrB = (__m128) &b[0];
	__m128 res = _mm_load_ps(zeros);

	/* Do SIMD dot product */
	for (unsigned int i = 0; i < niters; i++, ptrA++,ptrB++)
	res = _mm_add_ps(_mm_dp_ps(ptrA, ptrB, 255), res);


	/* Get result back from the SIMD vector */
	float fres[4];
	_mm_store_ps (fres, res);
	int q = 4 * niters;

	for (unsigned int i = 0; i < av.size() % 4; i++)
	fres[0] += (a[i+q]*b[i+q]);

	free(a);
	free(b);

	return fres[0];
	}
	#endif

	// Function aligned_alloc requires C++17 in VS
	#if defined (WITH_AVX) && !defined (_WIN32)
	float avx_dot_product(std::vector<float> &av, std::vector<float> &bv)
	{
	/* Get SIMD-vector pointers to the start of each vector */
	const size_t niters = av.size() / 8;

	float a = (float )aligned_alloc(32, av.size() * sizeof(float));
	float b = (float )aligned_alloc(32, av.size() * sizeof(float));
	memcpy(a, &av[0], av.size() * sizeof(float));
	memcpy(b, &bv[0], bv.size() * sizeof(float));

	__m256 ptrA = (__m256) &a[0], ptrB = (__m256) &b[0];
	__m256 res = _mm256_set1_ps(0.0);

	for (size_t i = 0; i < niters; i++, ptrA++, ptrB++)
	res = _mm256_add_ps(_mm256_dp_ps(ptrA, ptrB, 255), res);

	/* Get result back from the SIMD vector */
	float fres[8];
	_mm256_storeu_ps(fres, res);
	const size_t q = 8 * niters;

	for (size_t i = 0; i < av.size() % 8; i++)
	fres[0] += (a[i + q] * b[i + q]);

	free(a);
	free(b);

	return fres[0] + fres[4];
	}
	#endif

	char computeLK(cv::Point2f& delta, std::vector<float>& ix,
	std::vector<float>& iy, std::vector<float>& it)
	{
	try
	{
	// Calculate sums
	#if defined (WITH_AVX) && !defined (_WIN32)
	const float sumXX = avx_dot_product(ix,ix);
	const float sumYY = avx_dot_product(iy,iy);
	const float sumXY = avx_dot_product(ix,iy);
	const float sumXT = avx_dot_product(ix,it);
	const float sumYT = avx_dot_product(iy,it);
	#elif defined (WITH_SSE4)
	const float sumXX = sse_dot_product(ix,ix);
	const float sumYY = sse_dot_product(iy,iy);
	const float sumXY = sse_dot_product(ix,iy);
	const float sumXT = sse_dot_product(ix,it);
	const float sumYT = sse_dot_product(iy,it);
	#else
	auto sumXX = 0.f;
	auto sumYY = 0.f;
	auto sumXT = 0.f;
	auto sumYT = 0.f;
	auto sumXY = 0.f;
	for (auto i = 0u; i < ix.size(); i++)
	{
	sumXX += ix[i] * ix[i];
	sumYY += iy[i] * iy[i];
	sumXY += ix[i] * iy[i];
	sumXT += ix[i] * it[i];
	sumYT += iy[i] * it[i];
	}
	#endif

	// Get numerator and denominator of u and v
	const auto den = (sumXXsumYY) - (sumXY sumXY);

	if (std::abs(den) < 1e-9f)
	return ZERO_DENOMINATOR;

	const auto numU = (-1.f * sumYY * sumXT) + (sumXY * sumYT);
	const auto numV = (-1.f * sumXX * sumYT) + (sumXT * sumXY);

	delta.x = numU / den;
	delta.y = numV / den;

	return SUCCESS;
	}
	catch (const std::exception& e)
	{
	error(e.what(), __LINE__, __FUNCTION__, __FILE__);
	return UNDEFINED_ERROR;
	}
	}

	void getVectors(std::vector<float>& ix, std::vector<float>& iy, std::vector<float>& it,
	const std::vector<std::vector<float>>& patch, const std::vector<std::vector<float>>& patchIt,
	const int patchSize)
	{
	try
	{
	// Performance: resize faster than emplace_back/push_back
	const auto numberElements = patchSize*patchSize;
	// Get `ix` and `iy`
	ix.resize(numberElements);
	iy.resize(numberElements);
	for (auto i = 1; i <= patchSize; i++)
	{
	const auto baseIndex = (i-1)*patchSize;
	for (auto j = 1; j <= patchSize; j++)
	{
	ix[baseIndex+j-1] = (patch[i][j+1] - patch[i][j-1])/2.f;
	iy[baseIndex+j-1] = (patch[i+1][j] - patch[i-1][j])/2.f;
	}
	}
	// Get `it`
	it.resize(numberElements);
	for (auto i = 0; i < patchSize; i++)
	{
	const auto baseIndex = i*patchSize;
	for (auto j = 0; j < patchSize; j++)
	it[baseIndex+j] = patchIt[i][j];
	}
	}
	catch (const std::exception& e)
	{
	error(e.what(), __LINE__, __FUNCTION__, __FILE__);
	}
	}

	char extractPatch(std::vector< std::vector<float>>& patch, const int x, const int y, const int patchSize,
	const cv::Mat& image)
	{
	try
	{
	int radix = patchSize / 2;

	if ( ((x - radix) < 0) \|\|
	((x + radix) >= image.cols) \|\|
	((y - radix) < 0) \|\|
	((y + radix) >= image.rows))
	return OUT_OF_FRAME;

	for (auto i = -radix; i <= radix; i++)
	for (auto j = -radix; j <= radix; j++)
	patch[i+radix][j+radix] = image.at<float>(y+i,x+j);

	return SUCCESS;
	}
	catch (const std::exception& e)
	{
	error(e.what(), __LINE__, __FUNCTION__, __FILE__);
	return UNDEFINED_ERROR;
	}
	}

	char extractPatchIt(std::vector<std::vector<float>>& patch, const int xI, const int yI, const int xJ,
	const int yJ, const cv::Mat& I, const cv::Mat& J, const int patchSize)
	{
	try
	{
	const int radix = patchSize / 2;

	if (((xI - radix) < 0) \|\|
	((xI + radix) >= I.cols) \|\|
	((yI - radix) < 0) \|\|
	((yI + radix) >= I.rows))
	return OUT_OF_FRAME;

	if (((xJ - radix) < 0) \|\|
	((xJ + radix) >= J.cols) \|\|
	((yJ - radix) < 0) \|\|
	((yJ + radix) >= J.rows))
	return OUT_OF_FRAME;

	for (auto i = -radix; i <= radix; i++)
	for (auto j = -radix; j <= radix; j++)
	patch[i+radix][j+radix] = J.at<float>(yJ+i,xJ+j) - I.at<float>(yI+i,xI+j);

	return SUCCESS;
	}
	catch (const std::exception& e)
	{
	error(e.what(), __LINE__, __FUNCTION__, __FILE__);
	return UNDEFINED_ERROR;
	}
	}

	// Given an OpenCV image, build a gaussian pyramid of size 'levels'
	void buildGaussianPyramid(std::vector<cv::Mat>& pyramidImages, const cv::Mat& image, const int levels)
	{
	try
	{
	pyramidImages.clear();
	pyramidImages.emplace_back(image);

	for (auto i = 0; i < levels - 1; i++)
	{
	cv::Mat pyredImage;
	cv::pyrDown(pyramidImages.back(), pyredImage);
	pyramidImages.emplace_back(pyredImage);
	}
	}
	catch (const std::exception& e)
	{
	error(e.what(), __LINE__, __FUNCTION__, __FILE__);
	}
	}

	cv::Point2f pyramidIteration(char& status, const cv::Point2f& pointI, const cv::Point2f& pointJ, const cv::Mat& I,
	const cv::Mat& J, const int patchSize = 5)
	{
	try
	{
	cv::Point2f result;

	// Extract a patch around the image
	std::vector<std::vector<float>> patch(patchSize + 2, std::vector<float>(patchSize + 2));
	std::vector<std::vector<float>> patchIt(patchSize, std::vector<float>(patchSize));

	status = extractPatch(patch, (int)pointI.x,(int)pointI.y, patchSize + 2, I);
	// if (status)
	// return result;

	status = extractPatchIt(patchIt, int(pointI.x), int(pointI.y), int(pointJ.x), int(pointJ.y), I, J, patchSize);

	// if (status)
	// return result;

	// Get the Ix, Iy and It vectors
	std::vector<float> ix, iy, it;
	getVectors(ix, iy, it, patch, patchIt, patchSize);

	// Calculate optical flow
	cv::Point2f delta;
	status = computeLK(delta, ix, iy, it);

	// if (status)
	// return result;

	result = pointJ + delta;

	return result;
	}
	catch (const std::exception& e)
	{
	error(e.what(), __LINE__, __FUNCTION__, __FILE__);
	return cv::Point2f{};
	}
	}

	void pyramidalLKCpu(std::vector<cv::Point2f>& coordI, std::vector<cv::Point2f>& coordJ,
	std::vector<cv::Mat>& pyramidImagesPrevious, std::vector<cv::Mat>& pyramidImagesCurrent,
	std::vector<char>& status, const cv::Mat& imagePrevious,
	const cv::Mat& imageCurrent, const int levels, const int patchSize)
	{
	try
	{
	// Empty coordinates
	if (coordI.size() == 0)
	return;

	std::vector<cv::Point2f> I;
	I.assign(coordI.begin(), coordI.end());

	const auto rescaleScale = 1.0/(float)(1<<(levels-1));
	for (auto& coordenate : I)
	coordenate *= rescaleScale;

	coordJ.clear();
	coordJ.assign(I.begin(), I.end());

	if (pyramidImagesPrevious.empty())
	buildGaussianPyramid(pyramidImagesPrevious, imagePrevious, levels);
	if (pyramidImagesCurrent.empty())
	buildGaussianPyramid(pyramidImagesCurrent, imageCurrent, levels);


	// Process all pixel requests
	for (auto i = 0u; i < coordI.size(); i++)
	{
	for (auto l = levels - 1; l >= 0; l--)
	{
	char status_point = 0;
	cv::Point2f result;

	result = pyramidIteration(status_point, I[i], coordJ[i],pyramidImagesPrevious[l],
	pyramidImagesCurrent[l], patchSize);
	if (status_point)
	status[i] = status_point;

	coordJ[i] = result;

	if (l == 0)
	break;

	I[i] *= 2.f;
	coordJ[i] *= 2.f;
	}
	}
	}
	catch (const std::exception& e)
	{
	error(e.what(), __LINE__, __FUNCTION__, __FILE__);
	}
	}

	// old, nwekp, pyramidPrev, pyramidCurr, status, imagePrev, imageCurr
	void pyramidalLKOcv(std::vector<cv::Point2f>& coordI, std::vector<cv::Point2f>& coordJ,
	std::vector<cv::Mat>& pyramidImagesPrevious, std::vector<cv::Mat>& pyramidImagesCurrent,
	std::vector<char>& status, const cv::Mat& imagePrevious,
	const cv::Mat& imageCurrent, const int levels, const int patchSize, const bool initFlow)
	{
	try
	{
	// Empty coordinates
	if (coordI.size() != 0)
	{
	// const auto profilerKey = Profiler::timerInit(__LINE__, __FUNCTION__, __FILE__);

	std::vector<cv::Point2f> I;
	I.assign(coordI.begin(), coordI.end());

	if (!initFlow)
	{
	coordJ.clear();
	coordJ.assign(I.begin(), I.end());
	}

	const cv::Mat& imagePrevGray = imagePrevious;
	const cv::Mat& imageCurrGray = imageCurrent;

	// Compute Pyramids
	if (pyramidImagesPrevious.empty())
	cv::buildOpticalFlowPyramid(imagePrevGray, pyramidImagesPrevious, cv::Size{patchSize,patchSize}, levels);
	if (pyramidImagesCurrent.empty())
	cv::buildOpticalFlowPyramid(imageCurrGray, pyramidImagesCurrent, cv::Size{patchSize,patchSize}, levels);

	// Compute Flow
	std::vector<uchar> st;
	std::vector<float> err;
	if (initFlow)
	cv::calcOpticalFlowPyrLK(pyramidImagesPrevious, pyramidImagesCurrent, coordI, coordJ, st, err,
	cv::Size{patchSize,patchSize},levels,
	cv::TermCriteria(cv::TermCriteria::COUNT+cv::TermCriteria::EPS,30,0.01),
	cv::OPTFLOW_USE_INITIAL_FLOW);
	else
	cv::calcOpticalFlowPyrLK(pyramidImagesPrevious, pyramidImagesCurrent, coordI, coordJ, st, err,
	cv::Size{patchSize,patchSize},levels);

	// Check distance
	for (size_t i=0; i<status.size(); i++)
	{
	const double distance = std::sqrt(
	std::pow(coordI[i].x-coordJ[i].x,2) + std::pow(coordI[i].y-coordJ[i].y,2));

	// Check if lk loss track, if distance is close keep it
	if (st[i] != (status[i]))
	if (distance <= patchSize*2)
	st[i] = 1;

	// If distance too far discard it
	if (distance > patchSize*2)
	st[i] = 0;
	}

	// Stupid hack because apparently in this tracker 0 means 1 and 1 is 0 wtf
	if (st.size() != status.size())
	error("st.size() != status.size().", __LINE__, __FUNCTION__, __FILE__);
	for (size_t i=0; i<status.size(); i++)
	{
	// If its 0 to begin with (Because OP lost track?)
	if (status[i] != 0)
	{
	if (st[i] == 0)
	st[i] = 0;
	else if (st[i] == 1)
	st[i] = 1;
	else
	error("Wrong CV Type.", __LINE__, __FUNCTION__, __FILE__);
	status[i] = st[i];
	}
	}

	// Profiler::timerEnd(profilerKey);
	// Profiler::printAveragedTimeMsEveryXIterations(profilerKey, __LINE__, __FUNCTION__, __FILE__, 5);

	// // Debug
	// std::cout << "LK: ";
	// for (int i=0; i<status.size(); i++) std::cout << !(int)status[i];
	// std::cout << std::endl;
	}
	}
	catch (const std::exception& e)
	{
	error(e.what(), __LINE__, __FUNCTION__, __FILE__);
	}
	}
	}