camenduru's picture
thanks to openpose ❤
7fc5a59
#ifndef OPENPOSE_WRAPPER_WRAPPER_STRUCT_POSE_HPP
#define OPENPOSE_WRAPPER_WRAPPER_STRUCT_POSE_HPP
#include <openpose/core/common.hpp>
#include <openpose/core/enumClasses.hpp>
#include <openpose/pose/enumClasses.hpp>
#include <openpose/pose/poseParameters.hpp>
#include <openpose/pose/poseParametersRender.hpp>
#include <openpose/wrapper/enumClasses.hpp>
namespace op
{
/**
* WrapperStructPose: Pose estimation and rendering configuration struct.
* WrapperStructPose allows the user to set up the pose estimation and rendering parameters that will be used for
* the OpenPose WrapperT template and Wrapper class.
*/
struct OP_API WrapperStructPose
{
/**
* Whether to extract body.
* It might be optionally disabled for very few cases (e.g., if only face keypoint detection is desired for
* speedup while reducing its accuracy). Otherwise, it must be always enabled.
*/
PoseMode poseMode;
/**
* CCN (Conv Net) input size.
* The greater, the slower and more memory it will be needed, but it will potentially increase accuracy.
* Both width and height must be divisible by 16.
*/
Point<int> netInputSize;
/**
* Zero or negative means that using `-1` in netInputSize will behave as explained in its flag description.
* Otherwise, and to avoid out of memory errors, the `-1` in netInputSize will clip to this value times the
* default 16/9 aspect ratio value (i.e., 656 width for a 368 height). E.g., netInputSizeDynamicBehavior = 10
* and netInputSize = {-1x368} will clip to 6560x368 (10 x 656). Recommended 1 for small GPUs (to avoid out of
* memory errors but maximize speed) and 0 for big GPUs (for maximum accuracy and speed).
*/
double netInputSizeDynamicBehavior;
/**
* Output size of the final rendered image.
* It barely affects performance compared to netInputSize.
* The final Datum.poseKeypoints can be scaled with respect to outputSize if `keypointScaleMode` is set to
* ScaleMode::OutputResolution, even if the rendering is disabled.
*/
Point<int> outputSize;
/**
* Final scale of the Array<float> Datum.poseKeypoints and the written pose data.
* The final Datum.poseKeypoints can be scaled with respect to input size (ScaleMode::InputResolution), net
* output size (ScaleMode::NetOutputResolution), output rendering size (ScaleMode::OutputResolution), from 0 to
* 1 (ScaleMode::ZeroToOne(FixedAspect)), and -1 to 1 (ScaleMode::PlusMinusOne(FixedAspect)).
*/
ScaleMode keypointScaleMode;
/**
* Number of GPUs processing in parallel.
* The greater, the faster the algorithm will run, but potentially higher lag will appear (which only affects
* in real-time webcam scenarios).
*/
int gpuNumber;
/**
* First GPU device.
* Such as the GPUs used will be the ones in the range: [gpuNumberStart, gpuNumberStart + gpuNumber].
*/
int gpuNumberStart;
/**
* Number of scales to process.
* The greater, the slower and more memory it will be needed, but it will potentially increase accuracy.
* This parameter is related with scaleGap, such as the final pose estimation will be an average of the
* predicted results for each scale.
*/
int scalesNumber;
/**
* Gap between successive scales.
* The pose estimation will be estimation for the scales in the range [1, 1-scaleGap*scalesNumber], with a gap
* of scaleGap.
*/
float scaleGap;
/**
* Whether to render the output (pose locations, body, background or PAF heat maps) with CPU or GPU.
* Select `None` for no rendering, `Cpu` or `Gpu` por CPU and GPU rendering respectively.
*/
RenderMode renderMode;
/**
* Pose model, it affects the number of body parts to render
* Select PoseModel::BODY_25 for 25 body-part COCO + foot model; PoseModel::COCO_18 for 18 body-part COCO;
* PoseModel::MPI_15 for 15 body-part MPI; PoseModel::MPI_15_4 for faster version of MPI; etc..
*/
PoseModel poseModel;
/**
* Whether to blend the final results on top of the original image, or just render them on a flat background.
*/
bool blendOriginalFrame;
/**
* Rendering blending alpha value of the pose point locations with respect to the background image.
* Value in the range [0, 1]. 0 will only render the background, 1 will fully render the pose.
*/
float alphaKeypoint;
/**
* Rendering blending alpha value of the heat maps (body part, background or PAF) with respect to the
* background image.
* Value in the range [0, 1]. 0 will only render the background, 1 will only render the heat map.
*/
float alphaHeatMap;
/**
* Element to initially render.
* Set 0 for pose, [1, #body parts] for each body part following the order on POSE_BODY_PART_MAPPING on
* `include/pose/poseParameters.hpp`, #body parts+1 for background, #body parts+2 for all body parts
* overlapped, #body parts+3 for all PAFs, and [#body parts+4, #body parts+4+#pair pairs] for each PAF
* following the order on POSE_BODY_PART_PAIRS.
*/
int defaultPartToRender;
/**
* Folder where the pose Caffe models are located.
*/
String modelFolder;
/**
* Whether and which heat maps to save on the Array<float> Datum.heatmaps.
* Use HeatMapType::Parts for body parts, HeatMapType::Background for the background, and HeatMapType::PAFs for
* the Part Affinity Fields.
*/
std::vector<HeatMapType> heatMapTypes;
/**
* Scale of the Datum.heatmaps.
* Select ScaleMode::ZeroToOne(FixedAspect) for range [0,1], ScaleMode::PlusMinusOne(FixedAspect) for [-1,1]
* and ScaleMode::UnsignedChar for [0, 255].
* If heatMapTypes.empty(), then this parameters makes no effect.
*/
ScaleMode heatMapScaleMode;
/**
* Whether to add the body part candidates.
* Candidates refer to all the detected body parts, before being assembled into people.
*/
bool addPartCandidates;
/**
* Rendering threshold. Only estimated keypoints whose score confidences are higher than this value will be
* rendered. Note: Rendered refers only to visual display in the OpenPose basic GUI, not in the saved results.
* Generally, a high threshold (> 0.5) will only render very clear body parts; while small thresholds
* (~0.1) will also output guessed and occluded keypoints, but also more false positives (i.e., wrong
* detections).
*/
float renderThreshold;
/**
* Maximum number of people to be detected.
* This parameter will limit the maximum number of people detected, by keeping the people with the
* `numberPeopleMax` top scores.
* Useful if you know the exact number of people in the scene, so it can remove false positives (if all the
* people have been detected.
* However, it might also include false negatives by removing very small or highly occluded people.
*/
int numberPeopleMax;
/**
* Whether to maximize the number of positives.
* It reduces the thresholds to accept a person candidate. It highly increases both false and true positives.
* I.e., it maximizes average recall but could harm average precision.
*/
bool maximizePositives;
/**
* Maximum processing frame rate.
* By default (-1), OpenPose will process frames as fast as possible.
* Example usage: If OpenPose is displaying images too quickly, this can reduce the speed so the user can
* analyze better each frame from the GUI.
*/
double fpsMax;
/**
* Final path where the pose Caffe ProtoTxt file is located.
* The combination modelFolder + protoTxtPath represents the whole path to the prototxt file.
* If empty, it will use the default OpenPose ProtoTxt file.
*/
String protoTxtPath;
/**
* Final path where the pose Caffe CaffeModel is located.
* The combination modelFolder + caffeModelPath represents the whole path to the caffemodel file.
* If empty, it will use the default OpenPose CaffeModel file.
*/
String caffeModelPath;
/**
* The image upsampling scale. 8 is the stride of the network, so the ideal value to maximize the
* speed/accuracy trade-off.
*/
float upsamplingRatio;
/**
* Whether to internally enable Google Logging.
* This option is only applicable if Caffe is used.
* Only disable it if the user is already calling google::InitGoogleLogging() in his code.
* If the user disables Google Logging and he does not call it by himself, then Caffe will start to pop up
* all the verbose messages.
*/
bool enableGoogleLogging;
/**
* Constructor of the struct.
* It has the recommended and default values we recommend for each element of the struct.
* Since all the elements of the struct are public, they can also be manually filled.
*/
WrapperStructPose(
const PoseMode poseMode = PoseMode::Enabled, const Point<int>& netInputSize = Point<int>{-1, 368},
const double netInputSizeDynamicBehavior = 1.,
const Point<int>& outputSize = Point<int>{-1, -1},
const ScaleMode keypointScaleMode = ScaleMode::InputResolution, const int gpuNumber = -1,
const int gpuNumberStart = 0, const int scalesNumber = 1, const float scaleGap = 0.25f,
const RenderMode renderMode = RenderMode::Auto, const PoseModel poseModel = PoseModel::BODY_25,
const bool blendOriginalFrame = true, const float alphaKeypoint = POSE_DEFAULT_ALPHA_KEYPOINT,
const float alphaHeatMap = POSE_DEFAULT_ALPHA_HEAT_MAP, const int defaultPartToRender = 0,
const String& modelFolder = "models/", const std::vector<HeatMapType>& heatMapTypes = {},
const ScaleMode heatMapScaleMode = ScaleMode::UnsignedChar, const bool addPartCandidates = false,
const float renderThreshold = 0.05f, const int numberPeopleMax = -1, const bool maximizePositives = false,
const double fpsMax = -1., const String& protoTxtPath = "", const String& caffeModelPath = "",
const float upsamplingRatio = 0.f, const bool enableGoogleLogging = true);
};
}
#endif // OPENPOSE_WRAPPER_WRAPPER_STRUCT_POSE_HPP