File size: 1,354 Bytes

cb689ec

{
  "model_type": "cosmos-policy",
  "architecture": "diffusion-transformer",
  "base_model": "nvidia/Cosmos-Predict2-2B-Video2World",
  "num_parameters": "2B",

  "input_spec": {
    "text": {
      "type": "string",
      "description": "Natural language task description"
    },
    "images": {
      "format": "RGB",
      "resolution": [224, 224],
      "views": ["agentview", "eye_in_hand"]
    },
    "proprioception": {
      "dim": 9,
      "components": ["gripper_joints", "end_effector_position", "quaternion"]
    }
  },

  "output_spec": {
    "actions": {
      "dim": 7,
      "horizon": 16,
      "components": ["end_effector_6dof", "gripper"]
    },
    "future_proprioception": {
      "dim": 9
    },
    "future_images": {
      "resolution": [224, 224]
    },
    "value": {
      "dim": 1
    }
  },

  "diffusion_config": {
    "denoising_steps": 5,
    "sigma_min": 4.0,
    "sigma_max": 80.0,
    "generation_mode": "parallel"
  },

  "training": {
    "dataset": "LIBERO-Cosmos-Policy",
    "gradient_steps": 40000,
    "batch_size": 1920,
    "hardware": "64x H100",
    "action_chunk_size": 16
  },

  "benchmark_results": {
    "libero_spatial": 0.981,
    "libero_object": 1.0,
    "libero_goal": 0.982,
    "libero_long": 0.976,
    "average": 0.985
  },

  "inference": {
    "precision": "bf16",
    "vram_gb": 6.8
  }
}