{ "model_type": "cosmos-policy-planning", "architecture": "diffusion-transformer", "base_model": "nvidia/Cosmos-Policy-ALOHA-Predict2-2B", "num_parameters": "2B", "input_spec": { "text": { "type": "string", "description": "Natural language task description" }, "images": { "format": "RGB", "resolution": [224, 224], "views": ["top_down", "left_wrist", "right_wrist"] }, "proprioception": { "dim": 14, "components": ["left_arm_joints", "right_arm_joints"], "joints_per_arm": 7 }, "actions": { "dim": 14, "horizon": 50, "description": "Candidate action sequence to evaluate" } }, "output_spec": { "future_proprioception": { "dim": 14 }, "future_images": { "resolution": [224, 224], "views": 3 }, "value": { "dim": 1, "description": "Expected cumulative reward for action sequence" } }, "diffusion_config": { "denoising_steps": 10, "sigma_min": 4.0, "sigma_max": 80.0 }, "planning_config": { "ensemble_world_model_queries": 3, "ensemble_value_queries": 5, "total_predictions_per_action": 15, "best_of_n_search": 8 }, "training": { "dataset": "ALOHA policy rollouts", "num_episodes": 648, "hardware": "8x H100", "batch_split": { "policy": 0.1, "world_model": 0.45, "value_function": 0.45 } }, "benchmark_results": { "put_candies_in_bowl": 0.60, "put_candy_in_ziploc_bag": 0.84, "average": 0.72, "improvement_over_base": 0.125 }, "inference": { "precision": "bf16", "latency_seconds": 4.9, "recommended_gpus": 8 }, "robot_platform": "ALOHA 2 (ViperX 300 S dual arms)", "control_frequency_hz": 25 }