| { | |
| "accelerator": null, | |
| "action_dim": 128, | |
| "expert_configs": [ | |
| { | |
| "checkpoint_path": "/data/user/wsong890/user68/project/RoboTwin/policy/RDT_repa/checkpoints/RDT_theia_half21_stack_bowls_two-clean-5032-0.05/checkpoint-15000", | |
| "config": { | |
| "ema": { | |
| "inv_gamma": 1, | |
| "max_value": 0.9999, | |
| "min_value": 0.0, | |
| "power": 0.95, | |
| "update_after_step": 1000 | |
| }, | |
| "img_adaptor": "mlp2x_gelu", | |
| "img_token_dim": 1152, | |
| "lang_adaptor": "mlp2x_gelu", | |
| "lang_token_dim": 4096, | |
| "noise_scheduler": { | |
| "beta_schedule": "squaredcos_cap_v2", | |
| "clip_sample": false, | |
| "num_inference_timesteps": 5, | |
| "num_train_timesteps": 1000, | |
| "prediction_type": "sample", | |
| "type": "ddpm" | |
| }, | |
| "rdt": { | |
| "cond_pos_embed_type": "multimodal", | |
| "depth": 28, | |
| "hidden_size": 2048, | |
| "num_heads": 32 | |
| }, | |
| "state_adaptor": "mlp3x_gelu", | |
| "state_token_dim": 128 | |
| }, | |
| "enc_type": "dinov2-vit-b", | |
| "learnable_tokens": 256, | |
| "name": "dinov2_expert" | |
| }, | |
| { | |
| "checkpoint_path": "/data/user/wsong890/user68/project/RoboTwin/policy/RDT_repa/checkpoints/RDT_theia_half21_stack_bowls_two-clean-5032-0.05/checkpoint-15000", | |
| "config": { | |
| "ema": { | |
| "inv_gamma": 1, | |
| "max_value": 0.9999, | |
| "min_value": 0.0, | |
| "power": 0.95, | |
| "update_after_step": 1000 | |
| }, | |
| "img_adaptor": "mlp2x_gelu", | |
| "img_token_dim": 1152, | |
| "lang_adaptor": "mlp2x_gelu", | |
| "lang_token_dim": 4096, | |
| "noise_scheduler": { | |
| "beta_schedule": "squaredcos_cap_v2", | |
| "clip_sample": false, | |
| "num_inference_timesteps": 5, | |
| "num_train_timesteps": 1000, | |
| "prediction_type": "sample", | |
| "type": "ddpm" | |
| }, | |
| "rdt": { | |
| "cond_pos_embed_type": "multimodal", | |
| "depth": 28, | |
| "hidden_size": 2048, | |
| "num_heads": 32 | |
| }, | |
| "state_adaptor": "mlp3x_gelu", | |
| "state_token_dim": 128 | |
| }, | |
| "enc_type": "clip-vit-h", | |
| "learnable_tokens": 256, | |
| "name": "clip_expert" | |
| }, | |
| { | |
| "checkpoint_path": "/data/user/wsong890/user68/project/RoboTwin/policy/RDT_repa/checkpoints/RDT_theia_half21_stack_bowls_two-clean-5032-0.05/checkpoint-15000", | |
| "config": { | |
| "ema": { | |
| "inv_gamma": 1, | |
| "max_value": 0.9999, | |
| "min_value": 0.0, | |
| "power": 0.95, | |
| "update_after_step": 1000 | |
| }, | |
| "img_adaptor": "mlp2x_gelu", | |
| "img_token_dim": 1152, | |
| "lang_adaptor": "mlp2x_gelu", | |
| "lang_token_dim": 4096, | |
| "noise_scheduler": { | |
| "beta_schedule": "squaredcos_cap_v2", | |
| "clip_sample": false, | |
| "num_inference_timesteps": 5, | |
| "num_train_timesteps": 1000, | |
| "prediction_type": "sample", | |
| "type": "ddpm" | |
| }, | |
| "rdt": { | |
| "cond_pos_embed_type": "multimodal", | |
| "depth": 28, | |
| "hidden_size": 2048, | |
| "num_heads": 32 | |
| }, | |
| "state_adaptor": "mlp3x_gelu", | |
| "state_token_dim": 128 | |
| }, | |
| "enc_type": "vit-huge-patch", | |
| "learnable_tokens": 256, | |
| "name": "vit_expert" | |
| } | |
| ], | |
| "gate_hidden_dim": 256, | |
| "img_cond_len": 4374, | |
| "img_pos_embed_config": [ | |
| [ | |
| "image", | |
| [ | |
| 2, | |
| 3, | |
| -729 | |
| ] | |
| ] | |
| ], | |
| "img_token_dim": 1152, | |
| "lang_pos_embed_config": [ | |
| [ | |
| "lang", | |
| -1024 | |
| ] | |
| ], | |
| "lang_token_dim": 4096, | |
| "lora_config": { | |
| "alpha": 32, | |
| "dropout": 0.1, | |
| "r": 16, | |
| "target_modules": [ | |
| "qkv", | |
| "q", | |
| "kv", | |
| "proj", | |
| "fc1", | |
| "fc2" | |
| ] | |
| }, | |
| "max_lang_cond_len": 1024, | |
| "pred_horizon": 32, | |
| "resolution": 256, | |
| "state_token_dim": 128, | |
| "use_lora": true | |
| } |