{ "type": "pi05_rl", "n_obs_steps": 1, "input_features": { "observation.images.side": { "type": "VISUAL", "shape": [ 3, 224, 224 ] }, "observation.images.top": { "type": "VISUAL", "shape": [ 3, 224, 224 ] }, "observation.state": { "type": "STATE", "shape": [ 6 ] } }, "output_features": { "action": { "type": "ACTION", "shape": [ 6 ] } }, "device": "cuda", "use_amp": false, "use_peft": false, "push_to_hub": true, "repo_id": "cijerezg/multi-task-toys-merged-v2", "private": null, "tags": null, "license": null, "pretrained_path": null, "paligemma_variant": "gemma_2b", "action_expert_variant": "gemma_300m", "dtype": "bfloat16", "chunk_size": 50, "n_action_steps": 50, "max_state_dim": 6, "max_action_dim": 32, "num_inference_steps": 5, "time_sampling_beta_alpha": 1.5, "time_sampling_beta_beta": 1.0, "time_sampling_scale": 0.999, "time_sampling_offset": 0.001, "min_period": 0.004, "max_period": 4.0, "rtc_config": { "enabled": true, "prefix_attention_schedule": "LINEAR", "max_guidance_weight": 10.0, "execution_horizon": 10, "debug": false, "debug_maxlen": 100 }, "image_resolution": [ 224, 224 ], "empty_cameras": 0, "use_dataset_stats": false, "normalization_mapping": { "VISUAL": "IDENTITY", "STATE": "MIN_MAX", "ENV": "MIN_MAX", "ACTION": "QUANTILES" }, "action_tokenizer_name": "physical-intelligence/fast", "text_tokenizer_name": "google/paligemma-3b-pt-224", "max_action_tokens": 256, "fast_skip_tokens": 128, "max_decoding_steps": 200, "temperature": 0.0, "subtask_regeneration_interval": 1.5, "gradient_checkpointing": true, "compile_model": false, "compile_mode": "max-autotune", "freeze_vision_encoder": false, "train_expert_only": false, "knowledge_insulation": true, "action_encoding": "anchor", "loss_weight_flow": 1.0, "loss_weight_action_ce": 1.0, "loss_weight_subtask_ce": 1.0, "optimizer_lr": 2.5e-05, "optimizer_betas": [ 0.9, 0.95 ], "optimizer_eps": 1e-08, "optimizer_weight_decay": 0.1, "optimizer_grad_clip_norm": 1.0, "scheduler_warmup_steps": 1000, "scheduler_decay_steps": 30000, "scheduler_decay_lr": 2.5e-06, "tokenizer_max_length": 64, "task": "Pick up the orange cube and place it on the black X marker", "action_dim": 6, "drop_n_last_frames": 2, "critic_target_update_weight": 0.005, "num_critics": 1, "discount": 0.97, "reward_normalization_constant": 5.0, "terminal_failure_reward": -16.0, "online_steps": 20000, "online_buffer_capacity": 5000, "offline_buffer_capacity": 50000, "async_prefetch": false, "online_step_before_learning": 10, "policy_update_freq": 1, "grad_clip_norm": 2.0, "gradient_accumulation_steps": 16, "critic_lr": 5e-05, "actor_lr": 5e-05, "utd_ratio": 1, "actor_device": "cuda:0", "learner_device": "cuda:0", "use_separate_critic": true, "critic_llm_depth": 6, "critic_network_kwargs": { "hidden_dims": [ 256, 256 ], "activate_final": true }, "trainable_params": { "vision_encoder_from_layer": { "vision_tower": 5, "multi_modal_projector": true }, "language_from_layer": 0, "critic_language_from_layer": 5 }, "offline_steps": 10000, "inference_advantage": 1.0, "advantage_scaling": 0.2, "pi05_checkpoint": "outputs/pi05_base", "action_encoding_stats_path": "outputs/stats_jack/action_stats_anchor_jack_cube.pt", "dataset_stats": null, "storage_device": "cpu", "shared_encoder": false, "num_discrete_actions": null, "vision_encoder_name": null, "actor_learner_config": { "learner_host": "192.168.50.1", "learner_port": 50051, "policy_parameters_push_frequency": 180, "queue_get_timeout": 2 }, "concurrency": { "actor": "threads", "learner": "threads" } }