jwgu's picture
Add config.json for download tracking (#2)
160f84b
{
"model_type": "cosmos-policy-planning",
"architecture": "diffusion-transformer",
"base_model": "nvidia/Cosmos-Policy-ALOHA-Predict2-2B",
"num_parameters": "2B",
"input_spec": {
"text": {
"type": "string",
"description": "Natural language task description"
},
"images": {
"format": "RGB",
"resolution": [224, 224],
"views": ["top_down", "left_wrist", "right_wrist"]
},
"proprioception": {
"dim": 14,
"components": ["left_arm_joints", "right_arm_joints"],
"joints_per_arm": 7
},
"actions": {
"dim": 14,
"horizon": 50,
"description": "Candidate action sequence to evaluate"
}
},
"output_spec": {
"future_proprioception": {
"dim": 14
},
"future_images": {
"resolution": [224, 224],
"views": 3
},
"value": {
"dim": 1,
"description": "Expected cumulative reward for action sequence"
}
},
"diffusion_config": {
"denoising_steps": 10,
"sigma_min": 4.0,
"sigma_max": 80.0
},
"planning_config": {
"ensemble_world_model_queries": 3,
"ensemble_value_queries": 5,
"total_predictions_per_action": 15,
"best_of_n_search": 8
},
"training": {
"dataset": "ALOHA policy rollouts",
"num_episodes": 648,
"hardware": "8x H100",
"batch_split": {
"policy": 0.1,
"world_model": 0.45,
"value_function": 0.45
}
},
"benchmark_results": {
"put_candies_in_bowl": 0.60,
"put_candy_in_ziploc_bag": 0.84,
"average": 0.72,
"improvement_over_base": 0.125
},
"inference": {
"precision": "bf16",
"latency_seconds": 4.9,
"recommended_gpus": 8
},
"robot_platform": "ALOHA 2 (ViperX 300 S dual arms)",
"control_frequency_hz": 25
}