{ "model_type": "cosmos-policy", "architecture": "diffusion-transformer", "base_model": "nvidia/Cosmos-Predict2-2B-Video2World", "num_parameters": "2B", "input_spec": { "text": { "type": "string", "description": "Natural language task description" }, "images": { "format": "RGB", "resolution": [224, 224], "views": ["top_down", "left_wrist", "right_wrist"] }, "proprioception": { "dim": 14, "components": ["left_arm_joints", "right_arm_joints"], "joints_per_arm": 7 } }, "output_spec": { "actions": { "dim": 14, "horizon": 50, "components": ["left_arm_6dof", "left_gripper", "right_arm_6dof", "right_gripper"], "control_frequency_hz": 25 }, "future_proprioception": { "dim": 14 }, "future_images": { "resolution": [224, 224], "views": 3 }, "value": { "dim": 1 } }, "diffusion_config": { "denoising_steps": 10, "sigma_min": 4.0, "sigma_max": 80.0, "generation_mode": "parallel" }, "training": { "dataset": "ALOHA-Cosmos-Policy", "gradient_steps": 50000, "batch_size": 200, "hardware": "8x H100", "action_chunk_size": 50, "num_demonstrations": 185 }, "benchmark_results": { "put_x_on_plate": 1.0, "fold_shirt": 0.995, "put_candies_in_bowl": 0.896, "put_candy_in_ziploc_bag": 0.854, "average": 0.936 }, "inference": { "precision": "bf16", "vram_gb": 6.0 }, "robot_platform": "ALOHA 2 (ViperX 300 S dual arms)" }