TRAIN: USE_FP16: true FP16_TYPE: bfloat16 MODEL: IMAGE_SIZE: - 512 - 512 IMAGE_MEAN: - 0.485 - 0.456 - 0.406 IMAGE_STD: - 0.229 - 0.224 - 0.225 BACKBONE: TYPE: dinov3_vith16plus PRETRAINED_WEIGHTS: '' FROZEN_STAGES: -1 DROP_PATH_RATE: 0.1 DECODER: TYPE: sam DIM: 1024 DEPTH: 6 HEADS: 8 MLP_DIM: 1024 DIM_HEAD: 64 LAYER_SCALE_INIT: 0.0 DROP_RATE: 0.0 ATTN_DROP_RATE: 0.0 DROP_PATH_RATE: 0.0 FFN_TYPE: origin ENABLE_TWOWAY: false REPEAT_PE: true FROZEN: false CONDITION_TYPE: cliff USE_INTRIN_CENTER: true DO_INTERM_PREDS: true DO_INTERM_SUP: true DO_KEYPOINT_TOKENS: true DO_HAND_DETECT_TOKENS: true KEYPOINT_TOKEN_UPDATE: v2 KEYPOINT_TOKEN_UPDATE_COORD_EMB_USE_MLP: true DO_KEYPOINT3D_TOKENS: true PROMPT_ENCODER: ENABLE: true MAX_NUM_CLICKS: 2 PROMPT_KEYPOINTS: mhr70 FROZEN: false KEYPOINT_SAMPLER: TYPE: v1 WORST_RATIO: 0.8 KEYBODY_RATIO: 0.8 NEGATIVE_RATIO: 0.1 DUMMY_RATIO: 0.1 DISTANCE_THRESH: 0.0001 MASK_EMBED_TYPE: v2 MASK_PROMPT: v1 PERSON_HEAD: POSE_TYPE: mhr CAMERA_ENABLE: true CAMERA_TYPE: perspective ZERO_POSE_INIT: true ZERO_POSE_INIT_BODY_FACTOR: 1 MHR_HEAD: MLP_DEPTH: 2 MLP_CHANNEL_DIV_FACTOR: 1 CAMERA_HEAD: MLP_DEPTH: 2 MLP_CHANNEL_DIV_FACTOR: 1 DEFAULT_SCALE_FACTOR_HAND: 10 ENABLE_BODY: true ENABLE_HAND: true