{ "type": "pi0_fast", "n_obs_steps": 1, "input_features": { "observation.images.base_0_rgb": { "type": "VISUAL", "shape": [ 3, 224, 224 ] }, "observation.images.left_wrist_0_rgb": { "type": "VISUAL", "shape": [ 3, 224, 224 ] }, "observation.images.right_wrist_0_rgb": { "type": "VISUAL", "shape": [ 3, 224, 224 ] }, "observation.state": { "type": "STATE", "shape": [ 32 ] } }, "output_features": { "action": { "type": "ACTION", "shape": [ 16 ] } }, "device": "cuda", "use_amp": false, "push_to_hub": true, "repo_id": "jadechoghari/pi0fast-folding", "private": null, "tags": null, "license": null, "pretrained_path": "/fsx/jade_choghari/models/pi0fast-base", "paligemma_variant": "gemma_2b", "action_expert_variant": "gemma_300m", "dtype": "bfloat16", "chunk_size": 10, "n_action_steps": 10, "max_state_dim": 32, "max_action_dim": 32, "max_action_tokens": 256, "rtc_config": null, "image_resolution": [ 224, 224 ], "empty_cameras": 0, "tokenizer_max_length": 200, "text_tokenizer_name": "google/paligemma-3b-pt-224", "action_tokenizer_name": "jadechoghari/fast-folding-tokenizer", "temperature": 0.0, "max_decoding_steps": 256, "fast_skip_tokens": 128, "validate_action_token_prefix": true, "use_kv_cache": true, "normalization_mapping": { "VISUAL": "IDENTITY", "STATE": "MEAN_STD", "ACTION": "MEAN_STD" }, "gradient_checkpointing": true, "compile_model": false, "compile_mode": "max-autotune", "optimizer_lr": 2.5e-05, "optimizer_betas": [ 0.9, 0.95 ], "optimizer_eps": 1e-08, "optimizer_weight_decay": 0.01, "optimizer_grad_clip_norm": 1.0, "scheduler_warmup_steps": 4000, "scheduler_decay_steps": 100000, "scheduler_decay_lr": 1e-05 }