{
  "normalization_mapping": {
    "VISUAL": "IDENTITY",
    "STATE": "MIN_MAX",
    "ACTION": "MIN_MAX"
  },
  "input_features": {
    "observation.images.cam_high": {
      "type": "VISUAL",
      "shape": [
        3,
        240,
        320
      ]
    },
    "observation.images.cam_left_wrist": {
      "type": "VISUAL",
      "shape": [
        3,
        240,
        320
      ]
    },
    "observation.images.cam_right_wrist": {
      "type": "VISUAL",
      "shape": [
        3,
        240,
        320
      ]
    },
    "observation.state": {
      "type": "STATE",
      "shape": [
        14
      ]
    }
  },
  "output_features": {
    "action": {
      "type": "ACTION",
      "shape": [
        14
      ]
    }
  },
  "device": "cuda",
  "backbone": "Qwen/Qwen3-VL-4B-Instruct",
  "chunk_size": 60,
  "n_action_steps": 60,
  "max_state_dim": 32,
  "max_action_dim": 32,
  "proj_width": 2560,
  "num_steps": 10,
  "attention_implementation": "eager",
  "dit_hidden_size": 1536,
  "dit_num_heads": 32,
  "dit_num_layers": 16,
  "dit_interleave_self_attention": true,
  "dit_cross_attention_dim": 2560,
  "num_noise_per_sample": 1,
  "type": "spirit_vla"
}