Spaces:
Build error
Build error
| namespace op | |
| { | |
| __inline__ __device__ void getBoundingBoxPerPerson( | |
| float* maxPtr, float* minPtr, float* scalePtr,const unsigned int targetWidth, const unsigned int targetHeight, | |
| const float* const keypointsPtr, const int numberPeople, const int numberParts, const float threshold) | |
| { | |
| const unsigned long globalIdx = threadIdx.x; | |
| // Fill shared parameters | |
| if (globalIdx < numberPeople) | |
| { | |
| float minValueX = (float)targetWidth; | |
| float minValueY = (float)targetHeight; | |
| float maxValueX = 0.f; | |
| float maxValueY = 0.f; | |
| for (auto part = 0 ; part < numberParts ; part++) | |
| { | |
| const unsigned long index = 3u * (globalIdx*numberParts + part); | |
| const float x = keypointsPtr[index]; | |
| const float y = keypointsPtr[index+1]; | |
| const float score = keypointsPtr[index+2]; | |
| if (score > threshold) | |
| { | |
| if (x < minValueX) | |
| minValueX = x; | |
| if (x > maxValueX) | |
| maxValueX = x; | |
| if (y < minValueY) | |
| minValueY = y; | |
| if (y > maxValueY) | |
| maxValueY = y; | |
| } | |
| } | |
| if (maxValueX != 0.f && maxValueY != 0.f) | |
| { | |
| const auto averageX = maxValueX - minValueX; | |
| const auto averageY = maxValueY - minValueY; | |
| // (averageX + averageY) / 2.f / 400.f | |
| scalePtr[globalIdx] = fastTruncateCuda((averageX + averageY) / 400.f, 0.33f, 1.f); | |
| const auto constantToAdd = 50.f; | |
| maxValueX += constantToAdd; | |
| maxValueY += constantToAdd; | |
| minValueX -= constantToAdd; | |
| minValueY -= constantToAdd; | |
| } | |
| // const auto xIndex = 2*globalIdx; | |
| // const auto yIndex = xIndex+1; | |
| const auto xIndex = globalIdx; | |
| const auto yIndex = numberPeople+globalIdx; | |
| minPtr[xIndex] = minValueX; | |
| minPtr[yIndex] = minValueY; | |
| maxPtr[xIndex] = maxValueX; | |
| maxPtr[yIndex] = maxValueY; | |
| } | |
| } | |
| // Note: renderKeypoints is not working for videos with many people, renderKeypointsOld speed was slightly improved instead | |
| __inline__ __device__ void renderKeypoints( | |
| float* targetPtr, float* sharedMaxs, float* sharedMins, float* sharedScaleF, const float* const maxPtr, | |
| const float* const minPtr, const float* const scalePtr, const int globalIdx, const int x, const int y, | |
| const unsigned int targetWidth, const unsigned int targetHeight, const float* const keypointsPtr, | |
| const unsigned int* const partPairsPtr, const int numberPeople, const int numberParts, | |
| const int numberPartPairs, const float* const rgbColorsPtr, const int numberColors, const float radius, | |
| const float lineWidth, const float* const keypointScalePtr, const int numberScales, const float threshold, | |
| const float alphaColorToAdd, const bool blendOriginalFrame = true, const int googlyEye1 = -1, | |
| const int googlyEye2 = -1) | |
| { | |
| // Load shared memory | |
| if (globalIdx < 2*numberPeople) | |
| { | |
| sharedMins[globalIdx] = minPtr[globalIdx]; | |
| sharedMaxs[globalIdx] = maxPtr[globalIdx]; | |
| if (globalIdx < numberPeople) | |
| sharedScaleF[globalIdx] = scalePtr[globalIdx]; | |
| } | |
| __syncthreads(); | |
| // Fill each (x,y) target pixel | |
| if (x < targetWidth && y < targetHeight) | |
| { | |
| const unsigned long baseIndex = 3u*(y * (unsigned long)targetWidth + x); | |
| float b = targetPtr[baseIndex]; | |
| float g = targetPtr[baseIndex+1]; | |
| float r = targetPtr[baseIndex+2]; | |
| if (!blendOriginalFrame) | |
| { | |
| b = 0.f; | |
| g = 0.f; | |
| r = 0.f; | |
| } | |
| const auto lineWidthSquared = lineWidth * lineWidth; | |
| const auto radiusSquared = radius * radius; | |
| for (auto person = 0; person < numberPeople; person++) | |
| { | |
| // Make sure person x,y in the limits | |
| // Make sure person is not empty. Assume all joints are below threshold. Then | |
| // maxs = 0 and mins = width/height. So if statement would be false | |
| // const auto xIndex = 2*person; | |
| // const auto yIndex = xIndex+1; | |
| const auto xIndex = person; | |
| const auto yIndex = numberPeople+person; | |
| if (x <= sharedMaxs[xIndex] && x >= sharedMins[xIndex] | |
| && y <= sharedMaxs[yIndex] && y >= sharedMins[yIndex]) | |
| { | |
| // Part pair connections | |
| for (auto partPair = 0; partPair < numberPartPairs; partPair++) | |
| { | |
| const auto partA = partPairsPtr[2*partPair]; | |
| const auto partB = partPairsPtr[2*partPair+1]; | |
| const auto indexA = person*numberParts*3 + partA*3; | |
| const auto xA = keypointsPtr[indexA]; | |
| const auto yA = keypointsPtr[indexA + 1]; | |
| const auto scoreA = keypointsPtr[indexA + 2]; | |
| const auto indexB = person*numberParts*3 + partB*3; | |
| const auto xB = keypointsPtr[indexB]; | |
| const auto yB = keypointsPtr[indexB + 1]; | |
| const auto scoreB = keypointsPtr[indexB + 2]; | |
| if (scoreA > threshold && scoreB > threshold) | |
| { | |
| const auto keypointScale = keypointScalePtr[partB%numberScales] | |
| * keypointScalePtr[partB%numberScales] | |
| * keypointScalePtr[partB%numberScales]; | |
| const auto lineWidthScaled = lineWidthSquared * keypointScale; | |
| const auto bSqrt = sharedScaleF[person] * sharedScaleF[person] * lineWidthScaled; | |
| const auto xP = (xA + xB) / 2.f; | |
| const auto yP = (yA + yB) / 2.f; | |
| const auto aSqrt = (xA - xP) * (xA - xP) + (yA - yP) * (yA - yP); | |
| const auto angle = atan2f(yB - yA, xB - xA); | |
| const auto sine = sinf(angle); | |
| const auto cosine = cosf(angle); | |
| const auto A = cosine * (x - xP) + sine * (y - yP); | |
| const auto B = sine * (x - xP) - cosine * (y - yP); | |
| const auto judge = A * A / aSqrt + B * B / bSqrt; | |
| const auto minV = 0.f; | |
| const auto maxV = 1.f; | |
| if (minV <= judge && judge <= maxV) | |
| // Before used partPair vs partB | |
| addColorWeighted(r, g, b, &rgbColorsPtr[(partB%numberColors)*3], alphaColorToAdd); | |
| } | |
| } | |
| // Part circles | |
| for (auto part = 0u; part < numberParts; part++) | |
| { | |
| const auto index = 3 * (person*numberParts + part); | |
| const auto localX = keypointsPtr[index]; | |
| const auto localY = keypointsPtr[index + 1]; | |
| const auto score = keypointsPtr[index + 2]; | |
| if (score > threshold) | |
| { | |
| const auto keypointScale = keypointScalePtr[part%numberScales] | |
| * keypointScalePtr[part%numberScales] | |
| * keypointScalePtr[part%numberScales]; | |
| const auto radiusScaled = radiusSquared * keypointScale; | |
| const auto dist2 = (x - localX) * (x - localX) + (y - localY) * (y - localY); | |
| // Googly eyes | |
| if (googlyEye1 == part || googlyEye2 == part) | |
| { | |
| const auto eyeRatio = 2.5f * sqrt(radiusScaled); | |
| const auto minr2 = sharedScaleF[person] * sharedScaleF[person] | |
| * (eyeRatio - 2) * (eyeRatio - 2); | |
| const auto maxr2 = sharedScaleF[person] * sharedScaleF[person] * eyeRatio * eyeRatio; | |
| if (dist2 <= maxr2) | |
| { | |
| float colorToAdd [3] = {0., 0., 0.}; | |
| if (dist2 <= minr2) | |
| for (auto& color : colorToAdd) | |
| color = {255.f}; | |
| if (dist2 <= minr2*0.6f) | |
| { | |
| const auto dist3 = (x-4 - localX) | |
| * (x-4 - localX) + (y - localY+4) * (y - localY+4); | |
| if (dist3 > 14.0625f) // 3.75f^2 | |
| for (auto& color : colorToAdd) | |
| color = {0.f}; | |
| } | |
| const auto alphaColorToAdd = 0.9f; | |
| addColorWeighted(r, g, b, colorToAdd, alphaColorToAdd); | |
| } | |
| } | |
| // Other parts | |
| else | |
| { | |
| const auto minr2 = 0.f; | |
| const auto maxr2 = sharedScaleF[person] * sharedScaleF[person] * radiusScaled; | |
| if (minr2 <= dist2 && dist2 <= maxr2) | |
| addColorWeighted(r, g, b, &rgbColorsPtr[(part%numberColors)*3], alphaColorToAdd); | |
| } | |
| } | |
| } | |
| } | |
| } | |
| targetPtr[baseIndex] = b; | |
| targetPtr[baseIndex+1] = g; | |
| targetPtr[baseIndex+2] = r; | |
| } | |
| } | |
| __inline__ __device__ void renderKeypointsOld( | |
| float* targetPtr, float2* sharedMaxs, float2* sharedMins, float* sharedScaleF, const int globalIdx, | |
| const int x, const int y, const unsigned int targetWidth, const unsigned int targetHeight, const float* const keypointsPtr, | |
| const unsigned int* const partPairsPtr, const int numberPeople, const int numberParts, | |
| const int numberPartPairs, const float* const rgbColorsPtr, const int numberColors, const float radius, | |
| const float lineWidth, const float* const keypointScalePtr, const int numberScales, const float threshold, | |
| const float alphaColorToAdd, const bool blendOriginalFrame = true, const int googlyEye1 = -1, | |
| const int googlyEye2 = -1) | |
| { | |
| // Fill shared parameters | |
| if (globalIdx < numberPeople) | |
| { | |
| float minValueX = (float)targetWidth; | |
| float minValueY = (float)targetHeight; | |
| float maxValueX = 0.f; | |
| float maxValueY = 0.f; | |
| for (auto part = 0 ; part < numberParts ; part++) | |
| { | |
| const unsigned long index = 3u * (((unsigned long)globalIdx)*numberParts + part); | |
| const float x = keypointsPtr[index]; | |
| const float y = keypointsPtr[index+1]; | |
| const float score = keypointsPtr[index+2]; | |
| if (score > threshold) | |
| { | |
| if (x < minValueX) | |
| minValueX = x; | |
| if (x > maxValueX) | |
| maxValueX = x; | |
| if (y < minValueY) | |
| minValueY = y; | |
| if (y > maxValueY) | |
| maxValueY = y; | |
| } | |
| } | |
| if (maxValueX != 0.f && maxValueY != 0.f) | |
| { | |
| const auto averageX = maxValueX - minValueX; | |
| const auto averageY = maxValueY - minValueY; | |
| // (averageX + averageY) / 2.f / 400.f | |
| sharedScaleF[globalIdx] = fastTruncateCuda((averageX + averageY) / 400.f, 0.33f, 1.f); | |
| const auto constantToAdd = 50.f; | |
| maxValueX += constantToAdd; | |
| maxValueY += constantToAdd; | |
| minValueX -= constantToAdd; | |
| minValueY -= constantToAdd; | |
| } | |
| sharedMins[globalIdx].x = minValueX; | |
| sharedMins[globalIdx].y = minValueY; | |
| sharedMaxs[globalIdx].x = maxValueX; | |
| sharedMaxs[globalIdx].y = maxValueY; | |
| } | |
| __syncthreads(); | |
| // Fill each (x,y) target pixel | |
| if (x < targetWidth && y < targetHeight) | |
| { | |
| const unsigned long baseIndex = 3u*(y * (unsigned long)targetWidth + x); | |
| float b = targetPtr[baseIndex]; | |
| float g = targetPtr[baseIndex+1]; | |
| float r = targetPtr[baseIndex+2]; | |
| if (!blendOriginalFrame) | |
| { | |
| b = 0.f; | |
| g = 0.f; | |
| r = 0.f; | |
| } | |
| const auto lineWidthSquared = lineWidth * lineWidth; | |
| const auto radiusSquared = radius * radius; | |
| for (auto person = 0; person < numberPeople; person++) | |
| { | |
| // Make sure person x,y in the limits | |
| // Make sure person is not empty. Assume all joints are below threshold. Then | |
| // maxs = 0 and mins = width/height. So if statement would be false | |
| if (x <= sharedMaxs[person].x && x >= sharedMins[person].x | |
| && y <= sharedMaxs[person].y && y >= sharedMins[person].y) | |
| { | |
| // Part pair connections | |
| for (auto partPair = 0; partPair < numberPartPairs; partPair++) | |
| { | |
| const auto partA = partPairsPtr[2*partPair]; | |
| const auto partB = partPairsPtr[2*partPair+1]; | |
| const auto indexA = person*numberParts*3 + partA*3; | |
| const auto xA = keypointsPtr[indexA]; | |
| const auto yA = keypointsPtr[indexA + 1]; | |
| const auto scoreA = keypointsPtr[indexA + 2]; | |
| const auto indexB = person*numberParts*3 + partB*3; | |
| const auto xB = keypointsPtr[indexB]; | |
| const auto yB = keypointsPtr[indexB + 1]; | |
| const auto scoreB = keypointsPtr[indexB + 2]; | |
| if (scoreA > threshold && scoreB > threshold) | |
| { | |
| const auto keypointScale = keypointScalePtr[partB%numberScales] | |
| * keypointScalePtr[partB%numberScales] | |
| * keypointScalePtr[partB%numberScales]; | |
| const auto lineWidthScaled = lineWidthSquared * keypointScale; | |
| const auto bSqrt = sharedScaleF[person] * sharedScaleF[person] * lineWidthScaled; | |
| const auto xP = (xA + xB) / 2.f; | |
| const auto yP = (yA + yB) / 2.f; | |
| const auto aSqrt = (xA - xP) * (xA - xP) + (yA - yP) * (yA - yP); | |
| const auto angle = atan2f(yB - yA, xB - xA); | |
| const auto sine = sinf(angle); | |
| const auto cosine = cosf(angle); | |
| const auto A = cosine * (x - xP) + sine * (y - yP); | |
| const auto B = sine * (x - xP) - cosine * (y - yP); | |
| const auto judge = A * A / aSqrt + B * B / bSqrt; | |
| const auto minV = 0.f; | |
| const auto maxV = 1.f; | |
| if (minV <= judge && judge <= maxV) | |
| // Before used partPair vs partB | |
| addColorWeighted(r, g, b, &rgbColorsPtr[(partB%numberColors)*3], alphaColorToAdd); | |
| } | |
| } | |
| // Part circles | |
| for (auto part = 0u; part < numberParts; part++) | |
| { | |
| const auto index = 3 * (person*numberParts + part); | |
| const auto localX = keypointsPtr[index]; | |
| const auto localY = keypointsPtr[index + 1]; | |
| const auto score = keypointsPtr[index + 2]; | |
| if (score > threshold) | |
| { | |
| const auto keypointScale = keypointScalePtr[part%numberScales] | |
| * keypointScalePtr[part%numberScales] | |
| * keypointScalePtr[part%numberScales]; | |
| const auto radiusScaled = radiusSquared * keypointScale; | |
| const auto dist2 = (x - localX) * (x - localX) + (y - localY) * (y - localY); | |
| // Googly eyes | |
| if (googlyEye1 == part || googlyEye2 == part) | |
| { | |
| const auto eyeRatio = 2.5f * sqrt(radiusScaled); | |
| const auto minr2 = sharedScaleF[person] * sharedScaleF[person] | |
| * (eyeRatio - 2) * (eyeRatio - 2); | |
| const auto maxr2 = sharedScaleF[person] * sharedScaleF[person] * eyeRatio * eyeRatio; | |
| if (dist2 <= maxr2) | |
| { | |
| float colorToAdd [3] = {0., 0., 0.}; | |
| if (dist2 <= minr2) | |
| for (auto& color : colorToAdd) | |
| color = {255.f}; | |
| if (dist2 <= minr2*0.6f) | |
| { | |
| const auto dist3 = (x-4 - localX) | |
| * (x-4 - localX) + (y - localY+4) * (y - localY+4); | |
| if (dist3 > 14.0625f) // 3.75f^2 | |
| for (auto& color : colorToAdd) | |
| color = {0.f}; | |
| } | |
| const auto alphaColorToAdd = 0.9f; | |
| addColorWeighted(r, g, b, colorToAdd, alphaColorToAdd); | |
| } | |
| } | |
| // Other parts | |
| else | |
| { | |
| const auto minr2 = 0.f; | |
| const auto maxr2 = sharedScaleF[person] * sharedScaleF[person] * radiusScaled; | |
| if (minr2 <= dist2 && dist2 <= maxr2) | |
| addColorWeighted(r, g, b, &rgbColorsPtr[(part%numberColors)*3], alphaColorToAdd); | |
| } | |
| } | |
| } | |
| } | |
| } | |
| targetPtr[baseIndex] = b; | |
| targetPtr[baseIndex+1] = g; | |
| targetPtr[baseIndex+2] = r; | |
| } | |
| } | |
| } | |