File size: 1,354 Bytes
cb689ec | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | {
"model_type": "cosmos-policy",
"architecture": "diffusion-transformer",
"base_model": "nvidia/Cosmos-Predict2-2B-Video2World",
"num_parameters": "2B",
"input_spec": {
"text": {
"type": "string",
"description": "Natural language task description"
},
"images": {
"format": "RGB",
"resolution": [224, 224],
"views": ["agentview", "eye_in_hand"]
},
"proprioception": {
"dim": 9,
"components": ["gripper_joints", "end_effector_position", "quaternion"]
}
},
"output_spec": {
"actions": {
"dim": 7,
"horizon": 16,
"components": ["end_effector_6dof", "gripper"]
},
"future_proprioception": {
"dim": 9
},
"future_images": {
"resolution": [224, 224]
},
"value": {
"dim": 1
}
},
"diffusion_config": {
"denoising_steps": 5,
"sigma_min": 4.0,
"sigma_max": 80.0,
"generation_mode": "parallel"
},
"training": {
"dataset": "LIBERO-Cosmos-Policy",
"gradient_steps": 40000,
"batch_size": 1920,
"hardware": "64x H100",
"action_chunk_size": 16
},
"benchmark_results": {
"libero_spatial": 0.981,
"libero_object": 1.0,
"libero_goal": 0.982,
"libero_long": 0.976,
"average": 0.985
},
"inference": {
"precision": "bf16",
"vram_gb": 6.8
}
}
|