{ "accelerator": null, "action_dim": 128, "expert_configs": [ { "checkpoint_path": "/data/user/wsong890/user68/project/RoboTwin/policy/RDT_repa/checkpoints/RDT_theia_half21_stack_bowls_two-clean-5032-0.05/checkpoint-15000", "config": { "ema": { "inv_gamma": 1, "max_value": 0.9999, "min_value": 0.0, "power": 0.95, "update_after_step": 1000 }, "img_adaptor": "mlp2x_gelu", "img_token_dim": 1152, "lang_adaptor": "mlp2x_gelu", "lang_token_dim": 4096, "noise_scheduler": { "beta_schedule": "squaredcos_cap_v2", "clip_sample": false, "num_inference_timesteps": 5, "num_train_timesteps": 1000, "prediction_type": "sample", "type": "ddpm" }, "rdt": { "cond_pos_embed_type": "multimodal", "depth": 28, "hidden_size": 2048, "num_heads": 32 }, "state_adaptor": "mlp3x_gelu", "state_token_dim": 128 }, "enc_type": "dinov2-vit-b", "learnable_tokens": 256, "name": "dinov2_expert" }, { "checkpoint_path": "/data/user/wsong890/user68/project/RoboTwin/policy/RDT_repa/checkpoints/RDT_theia_half21_stack_bowls_two-clean-5032-0.05/checkpoint-15000", "config": { "ema": { "inv_gamma": 1, "max_value": 0.9999, "min_value": 0.0, "power": 0.95, "update_after_step": 1000 }, "img_adaptor": "mlp2x_gelu", "img_token_dim": 1152, "lang_adaptor": "mlp2x_gelu", "lang_token_dim": 4096, "noise_scheduler": { "beta_schedule": "squaredcos_cap_v2", "clip_sample": false, "num_inference_timesteps": 5, "num_train_timesteps": 1000, "prediction_type": "sample", "type": "ddpm" }, "rdt": { "cond_pos_embed_type": "multimodal", "depth": 28, "hidden_size": 2048, "num_heads": 32 }, "state_adaptor": "mlp3x_gelu", "state_token_dim": 128 }, "enc_type": "clip-vit-h", "learnable_tokens": 256, "name": "clip_expert" }, { "checkpoint_path": "/data/user/wsong890/user68/project/RoboTwin/policy/RDT_repa/checkpoints/RDT_theia_half21_stack_bowls_two-clean-5032-0.05/checkpoint-15000", "config": { "ema": { "inv_gamma": 1, "max_value": 0.9999, "min_value": 0.0, "power": 0.95, "update_after_step": 1000 }, "img_adaptor": "mlp2x_gelu", "img_token_dim": 1152, "lang_adaptor": "mlp2x_gelu", "lang_token_dim": 4096, "noise_scheduler": { "beta_schedule": "squaredcos_cap_v2", "clip_sample": false, "num_inference_timesteps": 5, "num_train_timesteps": 1000, "prediction_type": "sample", "type": "ddpm" }, "rdt": { "cond_pos_embed_type": "multimodal", "depth": 28, "hidden_size": 2048, "num_heads": 32 }, "state_adaptor": "mlp3x_gelu", "state_token_dim": 128 }, "enc_type": "vit-huge-patch", "learnable_tokens": 256, "name": "vit_expert" } ], "gate_hidden_dim": 256, "img_cond_len": 4374, "img_pos_embed_config": [ [ "image", [ 2, 3, -729 ] ] ], "img_token_dim": 1152, "lang_pos_embed_config": [ [ "lang", -1024 ] ], "lang_token_dim": 4096, "lora_config": { "alpha": 32, "dropout": 0.1, "r": 16, "target_modules": [ "qkv", "q", "kv", "proj", "fc1", "fc2" ] }, "max_lang_cond_len": 1024, "pred_horizon": 32, "resolution": 256, "state_token_dim": 128, "use_lora": true }