{ "dataset": { "repo_id": "cijerezg/yellow-car-offline-training-v2", "root": "outputs/jack_cube", "episodes": null, "max_episodes": null, "image_transforms": { "enable": false, "max_num_transforms": 3, "random_order": false, "tfs": { "brightness": { "weight": 1.0, "type": "ColorJitter", "kwargs": { "brightness": [ 0.8, 1.2 ] } }, "contrast": { "weight": 1.0, "type": "ColorJitter", "kwargs": { "contrast": [ 0.8, 1.2 ] } }, "saturation": { "weight": 1.0, "type": "ColorJitter", "kwargs": { "saturation": [ 0.5, 1.5 ] } }, "hue": { "weight": 1.0, "type": "ColorJitter", "kwargs": { "hue": [ -0.05, 0.05 ] } }, "sharpness": { "weight": 1.0, "type": "SharpnessJitter", "kwargs": { "sharpness": [ 0.5, 1.5 ] } }, "affine": { "weight": 1.0, "type": "RandomAffine", "kwargs": { "degrees": [ -5.0, 5.0 ], "translate": [ 0.05, 0.05 ] } } } }, "revision": null, "use_imagenet_stats": false, "video_backend": "pyav", "return_uint8": false, "streaming": false, "additional_offline_dataset_paths": [] }, "env": { "type": "gym_manipulator", "task": "Pick up the red truck and put it in the bowl", "fps": 30, "features": { "observation.images.side": { "type": "VISUAL", "shape": [ 3, 128, 128 ] }, "observation.images.top": { "type": "VISUAL", "shape": [ 3, 128, 128 ] }, "observation.state": { "type": "STATE", "shape": [ 6 ] }, "action": { "type": "ACTION", "shape": [ 6 ] } }, "features_map": { "observation.images.side": "observation.images.side", "observation.images.top": "observation.images.top", "observation.state": "observation.state", "action": "action" }, "max_parallel_tasks": 1, "disable_env_checker": true, "robot": { "type": "so100_follower", "port": "/dev/ttyACM0", "disable_torque_on_disconnect": true, "max_relative_target": null, "cameras": { "side": { "type": "opencv", "fps": 30, "width": 640, "height": 480, "index_or_path": 0, "color_mode": "rgb", "rotation": 0, "warmup_s": 1, "fourcc": null, "backend": 0 }, "top": { "type": "opencv", "fps": 30, "width": 640, "height": 480, "index_or_path": 2, "color_mode": "rgb", "rotation": 0, "warmup_s": 1, "fourcc": null, "backend": 0 } }, "use_degrees": true, "id": "follower_arm_v2", "calibration_dir": null }, "teleop": { "type": "so100_leader", "port": "/dev/ttyACM1", "use_degrees": true, "id": "leader_arm_v2", "calibration_dir": null }, "processor": { "control_mode": "leader", "observation": { "add_joint_velocity_to_observation": false, "add_current_to_observation": false, "add_ee_pose_to_observation": false, "display_cameras": false }, "image_preprocessing": { "crop_params_dict": null, "resize_size": [ 224, 224 ] }, "gripper": { "use_gripper": true, "gripper_penalty": 0.0 }, "reset": { "fixed_reset_joint_positions": [ 0.54, -90.69, 99.55, 73.7, -50.23, 42.71 ], "reset_time_s": 10.0, "control_time_s": 200.0, "terminate_on_success": true }, "inverse_kinematics": null, "reward_classifier": { "pretrained_path": null, "success_threshold": 0.5, "success_reward": 1.0 }, "max_gripper_pos": 30.0 }, "name": "real_robot" }, "policy": { "type": "pi05_rl", "n_obs_steps": 1, "input_features": { "observation.images.side": { "type": "VISUAL", "shape": [ 3, 224, 224 ] }, "observation.images.top": { "type": "VISUAL", "shape": [ 3, 224, 224 ] }, "observation.state": { "type": "STATE", "shape": [ 6 ] } }, "output_features": { "action": { "type": "ACTION", "shape": [ 6 ] } }, "device": "cuda", "use_amp": false, "use_peft": false, "push_to_hub": true, "repo_id": "cijerezg/multi-task-toys-merged-v2", "private": null, "tags": null, "license": null, "pretrained_path": null, "paligemma_variant": "gemma_2b", "action_expert_variant": "gemma_300m", "dtype": "bfloat16", "chunk_size": 50, "n_action_steps": 50, "max_state_dim": 6, "max_action_dim": 32, "num_inference_steps": 5, "time_sampling_beta_alpha": 1.5, "time_sampling_beta_beta": 1.0, "time_sampling_scale": 0.999, "time_sampling_offset": 0.001, "min_period": 0.004, "max_period": 4.0, "rtc_config": { "enabled": true, "prefix_attention_schedule": "LINEAR", "max_guidance_weight": 10.0, "execution_horizon": 10, "debug": false, "debug_maxlen": 100 }, "image_resolution": [ 224, 224 ], "empty_cameras": 0, "use_dataset_stats": false, "normalization_mapping": { "VISUAL": "IDENTITY", "STATE": "MIN_MAX", "ENV": "MIN_MAX", "ACTION": "QUANTILES" }, "action_tokenizer_name": "physical-intelligence/fast", "text_tokenizer_name": "google/paligemma-3b-pt-224", "max_action_tokens": 256, "fast_skip_tokens": 128, "max_decoding_steps": 200, "temperature": 0.0, "subtask_regeneration_interval": 1.5, "gradient_checkpointing": true, "compile_model": false, "compile_mode": "max-autotune", "freeze_vision_encoder": false, "train_expert_only": false, "knowledge_insulation": true, "action_encoding": "anchor", "loss_weight_flow": 1.0, "loss_weight_action_ce": 1.0, "loss_weight_subtask_ce": 1.0, "optimizer_lr": 2.5e-05, "optimizer_betas": [ 0.9, 0.95 ], "optimizer_eps": 1e-08, "optimizer_weight_decay": 0.1, "optimizer_grad_clip_norm": 1.0, "scheduler_warmup_steps": 1000, "scheduler_decay_steps": 30000, "scheduler_decay_lr": 2.5e-06, "tokenizer_max_length": 64, "task": "Pick up the orange cube and place it on the black X marker", "action_dim": 6, "drop_n_last_frames": 2, "critic_target_update_weight": 0.005, "num_critics": 1, "discount": 0.97, "reward_normalization_constant": 5.0, "terminal_failure_reward": -16.0, "online_steps": 20000, "online_buffer_capacity": 5000, "offline_buffer_capacity": 50000, "async_prefetch": false, "online_step_before_learning": 10, "policy_update_freq": 1, "grad_clip_norm": 2.0, "gradient_accumulation_steps": 16, "critic_lr": 5e-05, "actor_lr": 5e-05, "utd_ratio": 1, "actor_device": "cuda:0", "learner_device": "cuda:0", "use_separate_critic": true, "critic_llm_depth": 6, "critic_network_kwargs": { "hidden_dims": [ 256, 256 ], "activate_final": true }, "trainable_params": { "vision_encoder_from_layer": { "vision_tower": 5, "multi_modal_projector": true }, "language_from_layer": 0, "critic_language_from_layer": 5 }, "offline_steps": 10000, "inference_advantage": 1.0, "advantage_scaling": 0.2, "pi05_checkpoint": "outputs/pi05_base", "action_encoding_stats_path": "outputs/stats_jack/action_stats_anchor_jack_cube.pt", "dataset_stats": null, "storage_device": "cpu", "shared_encoder": false, "num_discrete_actions": null, "vision_encoder_name": null, "actor_learner_config": { "learner_host": "192.168.50.1", "learner_port": 50051, "policy_parameters_push_frequency": 180, "queue_get_timeout": 2 }, "concurrency": { "actor": "threads", "learner": "threads" } }, "reward_model": null, "output_dir": "outputs/jack_pi05_full_offline_training_val_anchor_action_v1", "job_name": "default", "resume": false, "seed": 42, "cudnn_deterministic": false, "num_workers": 4, "batch_size": 8, "prefetch_factor": 4, "persistent_workers": true, "steps": 100000, "eval_freq": 20000, "log_freq": 20, "tolerance_s": 0.0001, "save_checkpoint": true, "save_freq": 100, "use_policy_training_preset": true, "optimizer": { "type": "multi_adam", "lr": 0.001, "weight_decay": 0.1, "grad_clip_norm": 10.0, "optimizer_groups": { "actor": { "lr": 5e-05 }, "critic": { "lr": 5e-05 } } }, "scheduler": null, "eval": { "n_episodes": 50, "batch_size": 22, "use_async_envs": true }, "wandb": { "enable": true, "disable_artifact": true, "project": "so101_real_offline-v1", "entity": null, "notes": null, "run_id": "bfi8fh35", "mode": null, "offline_project": "so101_real_offline-v1", "add_tags": true }, "peft": null, "sample_weighting": null, "rename_map": {}, "checkpoint_path": null, "offline_output_dir": "outputs/jack_pi05_full_offline_training_val_anchor_action_v1", "offline_save_freq": 400, "buffer_cache_dir": null, "use_rerun": true, "video_logging_cameras": [ "top", "wrist" ], "episode_logging_freq": 4, "episode_save_freq": 10, "probe_parameters": { "enable_actions": true, "enable_representations": true, "enable_attention": true, "enable_offline_inference": true, "enable_spatial_memorization": true, "enable_action_drift_jacobian": true, "enable_spatial_memorization_jacobian": true, "output_dir": "outputs/probe", "mode": "all", "max_episodes": 1, "n_frames_per_episode": 128, "offline_inference_n_frames": 5, "random_seed": 42, "timestep": 0.5, "ref_max_episodes": 2, "ref_n_frames_per_episode": 256, "action_pca_dims": 50, "repr_pca_dims": 100, "umap_n_neighbors": 15, "umap_min_dist": 0.1, "umap_seed": 42, "sites": "prefix,suffix", "ep_3d_a": 0, "ep_3d_b": 1, "subtask_injection": false, "validation_batch_size": 32, "attn_eval_episodes": null, "attn_eval_subsample": 2, "spatial_layers": "0,9,17", "spatial_n_frames": 32 }, "val_dataset_path": "outputs/annotated_dataset_validation", "val_split": 0.0, "val_freq": 400, "val_on_start": false, "skip_critic": true }