File size: 3,951 Bytes
dbf7e38 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 | {
"accelerator": null,
"action_dim": 128,
"expert_configs": [
{
"checkpoint_path": "/data/user/wsong890/user68/project/RoboTwin/policy/RDT_repa/checkpoints/RDT_theia_half21_stack_bowls_two-clean-5032-0.05/checkpoint-15000",
"config": {
"ema": {
"inv_gamma": 1,
"max_value": 0.9999,
"min_value": 0.0,
"power": 0.95,
"update_after_step": 1000
},
"img_adaptor": "mlp2x_gelu",
"img_token_dim": 1152,
"lang_adaptor": "mlp2x_gelu",
"lang_token_dim": 4096,
"noise_scheduler": {
"beta_schedule": "squaredcos_cap_v2",
"clip_sample": false,
"num_inference_timesteps": 5,
"num_train_timesteps": 1000,
"prediction_type": "sample",
"type": "ddpm"
},
"rdt": {
"cond_pos_embed_type": "multimodal",
"depth": 28,
"hidden_size": 2048,
"num_heads": 32
},
"state_adaptor": "mlp3x_gelu",
"state_token_dim": 128
},
"enc_type": "dinov2-vit-b",
"learnable_tokens": 256,
"name": "dinov2_expert"
},
{
"checkpoint_path": "/data/user/wsong890/user68/project/RoboTwin/policy/RDT_repa/checkpoints/RDT_theia_half21_stack_bowls_two-clean-5032-0.05/checkpoint-15000",
"config": {
"ema": {
"inv_gamma": 1,
"max_value": 0.9999,
"min_value": 0.0,
"power": 0.95,
"update_after_step": 1000
},
"img_adaptor": "mlp2x_gelu",
"img_token_dim": 1152,
"lang_adaptor": "mlp2x_gelu",
"lang_token_dim": 4096,
"noise_scheduler": {
"beta_schedule": "squaredcos_cap_v2",
"clip_sample": false,
"num_inference_timesteps": 5,
"num_train_timesteps": 1000,
"prediction_type": "sample",
"type": "ddpm"
},
"rdt": {
"cond_pos_embed_type": "multimodal",
"depth": 28,
"hidden_size": 2048,
"num_heads": 32
},
"state_adaptor": "mlp3x_gelu",
"state_token_dim": 128
},
"enc_type": "clip-vit-h",
"learnable_tokens": 256,
"name": "clip_expert"
},
{
"checkpoint_path": "/data/user/wsong890/user68/project/RoboTwin/policy/RDT_repa/checkpoints/RDT_theia_half21_stack_bowls_two-clean-5032-0.05/checkpoint-15000",
"config": {
"ema": {
"inv_gamma": 1,
"max_value": 0.9999,
"min_value": 0.0,
"power": 0.95,
"update_after_step": 1000
},
"img_adaptor": "mlp2x_gelu",
"img_token_dim": 1152,
"lang_adaptor": "mlp2x_gelu",
"lang_token_dim": 4096,
"noise_scheduler": {
"beta_schedule": "squaredcos_cap_v2",
"clip_sample": false,
"num_inference_timesteps": 5,
"num_train_timesteps": 1000,
"prediction_type": "sample",
"type": "ddpm"
},
"rdt": {
"cond_pos_embed_type": "multimodal",
"depth": 28,
"hidden_size": 2048,
"num_heads": 32
},
"state_adaptor": "mlp3x_gelu",
"state_token_dim": 128
},
"enc_type": "vit-huge-patch",
"learnable_tokens": 256,
"name": "vit_expert"
}
],
"gate_hidden_dim": 256,
"img_cond_len": 4374,
"img_pos_embed_config": [
[
"image",
[
2,
3,
-729
]
]
],
"img_token_dim": 1152,
"lang_pos_embed_config": [
[
"lang",
-1024
]
],
"lang_token_dim": 4096,
"lora_config": {
"alpha": 32,
"dropout": 0.1,
"r": 16,
"target_modules": [
"qkv",
"q",
"kv",
"proj",
"fc1",
"fc2"
]
},
"max_lang_cond_len": 1024,
"pred_horizon": 32,
"resolution": 256,
"state_token_dim": 128,
"use_lora": true
} |