| { |
| "type": "lewam", |
| "n_obs_steps": 1, |
| "input_features": { |
| "observation.images.image1": { |
| "type": "VISUAL", |
| "shape": [ |
| 3, |
| 480, |
| 640 |
| ] |
| }, |
| "observation.images.image2": { |
| "type": "VISUAL", |
| "shape": [ |
| 3, |
| 480, |
| 640 |
| ] |
| }, |
| "observation.state": { |
| "type": "STATE", |
| "shape": [ |
| 6 |
| ] |
| } |
| }, |
| "output_features": { |
| "action": { |
| "type": "ACTION", |
| "shape": [ |
| 6 |
| ] |
| } |
| }, |
| "device": "cuda", |
| "use_amp": false, |
| "use_peft": false, |
| "push_to_hub": true, |
| "repo_id": null, |
| "private": null, |
| "tags": null, |
| "license": null, |
| "pretrained_path": null, |
| "model_dim": 512, |
| "depth": 12, |
| "num_heads": 8, |
| "mlp_ratio": 4.0, |
| "vlm_model_id": "HuggingFaceTB/SmolVLM2-256M-Video-Instruct", |
| "vlm_num_layers": 4, |
| "norm_strategy": "q2_q98", |
| "num_ode_steps": 10, |
| "smooth_actions": true, |
| "crop_size": 224, |
| "fps": 5, |
| "action_fps": 30, |
| "num_context_frames": 32, |
| "num_future_frames": 8, |
| "n_action_steps": 48, |
| "action_weight": 1.0, |
| "lang_drop_rate": 0.1, |
| "normalization_mapping": { |
| "VISUAL": "IDENTITY", |
| "STATE": "IDENTITY", |
| "ACTION": "IDENTITY" |
| } |
| } |