GEM-X / model_config.yaml
jeffffffli's picture
Upload model_config.yaml with huggingface_hub
7bc8261 verified
TRAIN:
USE_FP16: true
FP16_TYPE: bfloat16
MODEL:
IMAGE_SIZE:
- 512
- 512
IMAGE_MEAN:
- 0.485
- 0.456
- 0.406
IMAGE_STD:
- 0.229
- 0.224
- 0.225
BACKBONE:
TYPE: dinov3_vith16plus
PRETRAINED_WEIGHTS: ''
FROZEN_STAGES: -1
DROP_PATH_RATE: 0.1
DECODER:
TYPE: sam
DIM: 1024
DEPTH: 6
HEADS: 8
MLP_DIM: 1024
DIM_HEAD: 64
LAYER_SCALE_INIT: 0.0
DROP_RATE: 0.0
ATTN_DROP_RATE: 0.0
DROP_PATH_RATE: 0.0
FFN_TYPE: origin
ENABLE_TWOWAY: false
REPEAT_PE: true
FROZEN: false
CONDITION_TYPE: cliff
USE_INTRIN_CENTER: true
DO_INTERM_PREDS: true
DO_INTERM_SUP: true
DO_KEYPOINT_TOKENS: true
DO_HAND_DETECT_TOKENS: true
KEYPOINT_TOKEN_UPDATE: v2
KEYPOINT_TOKEN_UPDATE_COORD_EMB_USE_MLP: true
DO_KEYPOINT3D_TOKENS: true
PROMPT_ENCODER:
ENABLE: true
MAX_NUM_CLICKS: 2
PROMPT_KEYPOINTS: mhr70
FROZEN: false
KEYPOINT_SAMPLER:
TYPE: v1
WORST_RATIO: 0.8
KEYBODY_RATIO: 0.8
NEGATIVE_RATIO: 0.1
DUMMY_RATIO: 0.1
DISTANCE_THRESH: 0.0001
MASK_EMBED_TYPE: v2
MASK_PROMPT: v1
PERSON_HEAD:
POSE_TYPE: mhr
CAMERA_ENABLE: true
CAMERA_TYPE: perspective
ZERO_POSE_INIT: true
ZERO_POSE_INIT_BODY_FACTOR: 1
MHR_HEAD:
MLP_DEPTH: 2
MLP_CHANNEL_DIV_FACTOR: 1
CAMERA_HEAD:
MLP_DEPTH: 2
MLP_CHANNEL_DIV_FACTOR: 1
DEFAULT_SCALE_FACTOR_HAND: 10
ENABLE_BODY: true
ENABLE_HAND: true