| TRAIN: | |
| USE_FP16: true | |
| FP16_TYPE: bfloat16 | |
| MODEL: | |
| IMAGE_SIZE: | |
| - 512 | |
| - 512 | |
| IMAGE_MEAN: | |
| - 0.485 | |
| - 0.456 | |
| - 0.406 | |
| IMAGE_STD: | |
| - 0.229 | |
| - 0.224 | |
| - 0.225 | |
| BACKBONE: | |
| TYPE: dinov3_vith16plus | |
| PRETRAINED_WEIGHTS: '' | |
| FROZEN_STAGES: -1 | |
| DROP_PATH_RATE: 0.1 | |
| DECODER: | |
| TYPE: sam | |
| DIM: 1024 | |
| DEPTH: 6 | |
| HEADS: 8 | |
| MLP_DIM: 1024 | |
| DIM_HEAD: 64 | |
| LAYER_SCALE_INIT: 0.0 | |
| DROP_RATE: 0.0 | |
| ATTN_DROP_RATE: 0.0 | |
| DROP_PATH_RATE: 0.0 | |
| FFN_TYPE: origin | |
| ENABLE_TWOWAY: false | |
| REPEAT_PE: true | |
| FROZEN: false | |
| CONDITION_TYPE: cliff | |
| USE_INTRIN_CENTER: true | |
| DO_INTERM_PREDS: true | |
| DO_INTERM_SUP: true | |
| DO_KEYPOINT_TOKENS: true | |
| DO_HAND_DETECT_TOKENS: true | |
| KEYPOINT_TOKEN_UPDATE: v2 | |
| KEYPOINT_TOKEN_UPDATE_COORD_EMB_USE_MLP: true | |
| DO_KEYPOINT3D_TOKENS: true | |
| PROMPT_ENCODER: | |
| ENABLE: true | |
| MAX_NUM_CLICKS: 2 | |
| PROMPT_KEYPOINTS: mhr70 | |
| FROZEN: false | |
| KEYPOINT_SAMPLER: | |
| TYPE: v1 | |
| WORST_RATIO: 0.8 | |
| KEYBODY_RATIO: 0.8 | |
| NEGATIVE_RATIO: 0.1 | |
| DUMMY_RATIO: 0.1 | |
| DISTANCE_THRESH: 0.0001 | |
| MASK_EMBED_TYPE: v2 | |
| MASK_PROMPT: v1 | |
| PERSON_HEAD: | |
| POSE_TYPE: mhr | |
| CAMERA_ENABLE: true | |
| CAMERA_TYPE: perspective | |
| ZERO_POSE_INIT: true | |
| ZERO_POSE_INIT_BODY_FACTOR: 1 | |
| MHR_HEAD: | |
| MLP_DEPTH: 2 | |
| MLP_CHANNEL_DIV_FACTOR: 1 | |
| CAMERA_HEAD: | |
| MLP_DEPTH: 2 | |
| MLP_CHANNEL_DIV_FACTOR: 1 | |
| DEFAULT_SCALE_FACTOR_HAND: 10 | |
| ENABLE_BODY: true | |
| ENABLE_HAND: true | |