File size: 11,113 Bytes
7fc5a59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
#ifndef OPENPOSE_WRAPPER_WRAPPER_STRUCT_POSE_HPP
#define OPENPOSE_WRAPPER_WRAPPER_STRUCT_POSE_HPP

#include <openpose/core/common.hpp>
#include <openpose/core/enumClasses.hpp>
#include <openpose/pose/enumClasses.hpp>
#include <openpose/pose/poseParameters.hpp>
#include <openpose/pose/poseParametersRender.hpp>
#include <openpose/wrapper/enumClasses.hpp>

namespace op
{
    /**
     * WrapperStructPose: Pose estimation and rendering configuration struct.
     * WrapperStructPose allows the user to set up the pose estimation and rendering parameters that will be used for
     * the OpenPose WrapperT template and Wrapper class.
     */
    struct OP_API WrapperStructPose
    {
        /**
         * Whether to extract body.
         * It might be optionally disabled for very few cases (e.g., if only face keypoint detection is desired for
         * speedup while reducing its accuracy). Otherwise, it must be always enabled.
         */
        PoseMode poseMode;

        /**
         * CCN (Conv Net) input size.
         * The greater, the slower and more memory it will be needed, but it will potentially increase accuracy.
         * Both width and height must be divisible by 16.
         */
        Point<int> netInputSize;

        /**
         * Zero or negative means that using `-1` in netInputSize will behave as explained in its flag description.
         * Otherwise, and to avoid out of memory errors, the `-1` in netInputSize will clip to this value times the
         * default 16/9 aspect ratio value (i.e., 656 width for a 368 height). E.g., netInputSizeDynamicBehavior = 10
         * and netInputSize = {-1x368} will clip to 6560x368 (10 x 656). Recommended 1 for small GPUs (to avoid out of
         * memory errors but maximize speed) and 0 for big GPUs (for maximum accuracy and speed).
         */
        double netInputSizeDynamicBehavior;

        /**
         * Output size of the final rendered image.
         * It barely affects performance compared to netInputSize.
         * The final Datum.poseKeypoints can be scaled with respect to outputSize if `keypointScaleMode` is set to
         * ScaleMode::OutputResolution, even if the rendering is disabled.
         */
        Point<int> outputSize;

        /**
         * Final scale of the Array<float> Datum.poseKeypoints and the written pose data.
         * The final Datum.poseKeypoints can be scaled with respect to input size (ScaleMode::InputResolution), net
         * output size (ScaleMode::NetOutputResolution), output rendering size (ScaleMode::OutputResolution), from 0 to
         * 1 (ScaleMode::ZeroToOne(FixedAspect)), and -1 to 1 (ScaleMode::PlusMinusOne(FixedAspect)).
         */
        ScaleMode keypointScaleMode;

        /**
         * Number of GPUs processing in parallel.
         * The greater, the faster the algorithm will run, but potentially higher lag will appear (which only affects
         * in real-time webcam scenarios).
         */
        int gpuNumber;

        /**
         * First GPU device.
         * Such as the GPUs used will be the ones in the range: [gpuNumberStart, gpuNumberStart + gpuNumber].
         */
        int gpuNumberStart;

        /**
         * Number of scales to process.
         * The greater, the slower and more memory it will be needed, but it will potentially increase accuracy.
         * This parameter is related with scaleGap, such as the final pose estimation will be an average of the
         * predicted results for each scale.
         */
        int scalesNumber;

        /**
         * Gap between successive scales.
         * The pose estimation will be estimation for the scales in the range [1, 1-scaleGap*scalesNumber], with a gap
         * of scaleGap.
         */
        float scaleGap;

        /**
         * Whether to render the output (pose locations, body, background or PAF heat maps) with CPU or GPU.
         * Select `None` for no rendering, `Cpu` or `Gpu` por CPU and GPU rendering respectively.
         */
        RenderMode renderMode;

        /**
         * Pose model, it affects the number of body parts to render
         * Select PoseModel::BODY_25 for 25 body-part COCO + foot model; PoseModel::COCO_18 for 18 body-part COCO;
         * PoseModel::MPI_15 for 15 body-part MPI; PoseModel::MPI_15_4 for faster version of MPI; etc..
         */
        PoseModel poseModel;

        /**
         * Whether to blend the final results on top of the original image, or just render them on a flat background.
         */
        bool blendOriginalFrame;

        /**
         * Rendering blending alpha value of the pose point locations with respect to the background image.
         * Value in the range [0, 1]. 0 will only render the background, 1 will fully render the pose.
         */
        float alphaKeypoint;

        /**
         * Rendering blending alpha value of the heat maps (body part, background or PAF) with respect to the
         * background image.
         * Value in the range [0, 1]. 0 will only render the background, 1 will only render the heat map.
         */
        float alphaHeatMap;

        /**
         * Element to initially render.
         * Set 0 for pose, [1, #body parts] for each body part following the order on POSE_BODY_PART_MAPPING on
         * `include/pose/poseParameters.hpp`, #body parts+1 for background, #body parts+2 for all body parts
         * overlapped, #body parts+3 for all PAFs, and [#body parts+4, #body parts+4+#pair pairs] for each PAF
         * following the order on POSE_BODY_PART_PAIRS.
         */
        int defaultPartToRender;

        /**
         * Folder where the pose Caffe models are located.
         */
        String modelFolder;

        /**
         * Whether and which heat maps to save on the Array<float> Datum.heatmaps.
         * Use HeatMapType::Parts for body parts, HeatMapType::Background for the background, and HeatMapType::PAFs for
         * the Part Affinity Fields.
         */
        std::vector<HeatMapType> heatMapTypes;

        /**
         * Scale of the Datum.heatmaps.
         * Select ScaleMode::ZeroToOne(FixedAspect) for range [0,1], ScaleMode::PlusMinusOne(FixedAspect) for [-1,1]
         * and ScaleMode::UnsignedChar for [0, 255].
         * If heatMapTypes.empty(), then this parameters makes no effect.
         */
        ScaleMode heatMapScaleMode;

        /**
         * Whether to add the body part candidates.
         * Candidates refer to all the detected body parts, before being assembled into people.
         */
        bool addPartCandidates;

        /**
         * Rendering threshold. Only estimated keypoints whose score confidences are higher than this value will be
         * rendered. Note: Rendered refers only to visual display in the OpenPose basic GUI, not in the saved results.
         * Generally, a high threshold (> 0.5) will only render very clear body parts; while small thresholds
         * (~0.1) will also output guessed and occluded keypoints, but also more false positives (i.e., wrong
         * detections).
         */
        float renderThreshold;

        /**
         * Maximum number of people to be detected.
         * This parameter will limit the maximum number of people detected, by keeping the people with the
         * `numberPeopleMax` top scores.
         * Useful if you know the exact number of people in the scene, so it can remove false positives (if all the
         * people have been detected.
         * However, it might also include false negatives by removing very small or highly occluded people.
         */
        int numberPeopleMax;

        /**
         * Whether to maximize the number of positives.
         * It reduces the thresholds to accept a person candidate. It highly increases both false and true positives.
         * I.e., it maximizes average recall but could harm average precision.
         */
        bool maximizePositives;

        /**
         * Maximum processing frame rate.
         * By default (-1), OpenPose will process frames as fast as possible.
         * Example usage: If OpenPose is displaying images too quickly, this can reduce the speed so the user can
         * analyze better each frame from the GUI.
         */
        double fpsMax;

        /**
         * Final path where the pose Caffe ProtoTxt file is located.
         * The combination modelFolder + protoTxtPath represents the whole path to the prototxt file.
         * If empty, it will use the default OpenPose ProtoTxt file.
         */
        String protoTxtPath;

        /**
         * Final path where the pose Caffe CaffeModel is located.
         * The combination modelFolder + caffeModelPath represents the whole path to the caffemodel file.
         * If empty, it will use the default OpenPose CaffeModel file.
         */
        String caffeModelPath;

        /**
         * The image upsampling scale. 8 is the stride of the network, so the ideal value to maximize the
         * speed/accuracy trade-off.
         */
        float upsamplingRatio;

        /**
         * Whether to internally enable Google Logging.
         * This option is only applicable if Caffe is used.
         * Only disable it if the user is already calling google::InitGoogleLogging() in his code.
         * If the user disables Google Logging and he does not call it by himself, then Caffe will start to pop up
         * all the verbose messages.
         */
        bool enableGoogleLogging;

        /**
         * Constructor of the struct.
         * It has the recommended and default values we recommend for each element of the struct.
         * Since all the elements of the struct are public, they can also be manually filled.
         */
        WrapperStructPose(
            const PoseMode poseMode = PoseMode::Enabled, const Point<int>& netInputSize = Point<int>{-1, 368},
            const double netInputSizeDynamicBehavior = 1.,
            const Point<int>& outputSize = Point<int>{-1, -1},
            const ScaleMode keypointScaleMode = ScaleMode::InputResolution, const int gpuNumber = -1,
            const int gpuNumberStart = 0, const int scalesNumber = 1, const float scaleGap = 0.25f,
            const RenderMode renderMode = RenderMode::Auto, const PoseModel poseModel = PoseModel::BODY_25,
            const bool blendOriginalFrame = true, const float alphaKeypoint = POSE_DEFAULT_ALPHA_KEYPOINT,
            const float alphaHeatMap = POSE_DEFAULT_ALPHA_HEAT_MAP, const int defaultPartToRender = 0,
            const String& modelFolder = "models/", const std::vector<HeatMapType>& heatMapTypes = {},
            const ScaleMode heatMapScaleMode = ScaleMode::UnsignedChar, const bool addPartCandidates = false,
            const float renderThreshold = 0.05f, const int numberPeopleMax = -1, const bool maximizePositives = false,
            const double fpsMax = -1., const String& protoTxtPath = "", const String& caffeModelPath = "",
            const float upsamplingRatio = 0.f, const bool enableGoogleLogging = true);
    };
}

#endif // OPENPOSE_WRAPPER_WRAPPER_STRUCT_POSE_HPP