cosmos-policy
jwgu's picture
Add config.json for download tracking (#2)
68e3f92
{
"model_type": "cosmos-policy",
"architecture": "diffusion-transformer",
"base_model": "nvidia/Cosmos-Predict2-2B-Video2World",
"num_parameters": "2B",
"input_spec": {
"text": {
"type": "string",
"description": "Natural language task description"
},
"images": {
"format": "RGB",
"resolution": [224, 224],
"views": ["top_down", "left_wrist", "right_wrist"]
},
"proprioception": {
"dim": 14,
"components": ["left_arm_joints", "right_arm_joints"],
"joints_per_arm": 7
}
},
"output_spec": {
"actions": {
"dim": 14,
"horizon": 50,
"components": ["left_arm_6dof", "left_gripper", "right_arm_6dof", "right_gripper"],
"control_frequency_hz": 25
},
"future_proprioception": {
"dim": 14
},
"future_images": {
"resolution": [224, 224],
"views": 3
},
"value": {
"dim": 1
}
},
"diffusion_config": {
"denoising_steps": 10,
"sigma_min": 4.0,
"sigma_max": 80.0,
"generation_mode": "parallel"
},
"training": {
"dataset": "ALOHA-Cosmos-Policy",
"gradient_steps": 50000,
"batch_size": 200,
"hardware": "8x H100",
"action_chunk_size": 50,
"num_demonstrations": 185
},
"benchmark_results": {
"put_x_on_plate": 1.0,
"fold_shirt": 0.995,
"put_candies_in_bowl": 0.896,
"put_candy_in_ziploc_bag": 0.854,
"average": 0.936
},
"inference": {
"precision": "bf16",
"vram_gb": 6.0
},
"robot_platform": "ALOHA 2 (ViperX 300 S dual arms)"
}