output_dir: ./runs/libero_uncond_2cam224_1e-4/2026-04-22_12x12_h100x2_trainableonly_resume_from2000_nw8_pin_on batch_size: 32 num_workers: 8 pin_memory: true lr_scheduler_type: cosine learning_rate: 6.0e-05 num_epochs: 30 max_steps: null log_every: 10 save_every: 0 eval_every: 1000 eval_num_inference_steps: 10 eval_enable_video: false eval_save_video: false eval_enable_action_metrics: true pre_save_cleanup: true pre_save_cleanup_sleep_seconds: 5.0 pre_save_cleanup_malloc_trim: true gradient_accumulation_steps: 1 mixed_precision: bf16 seed: 42 max_grad_norm: 1.0 weight_decay: 0.01 resume: null init_checkpoint: ./checkpoints/fastwam_release/libero_uncond_2cam224.pt resume_training_state: ./runs/libero_uncond_2cam224_1e-4/2026-04-22_12x12_h100x2_trainableonly_resume_from2000_nw8_pin_on/checkpoints/latest_training.pt checkpoint: policy: auto lightweight_resume_backend: trainable_only trainable_only_include_optimizer_state: false save_latest: true save_best_action_l1: true save_best_action_l2: true wandb: enabled: false workspace: null project: fast-wam name: libero_12x12_trainableonly_resume_from65000_20260425 group: null mode: online data: train: _target_: fastwam.datasets.lerobot.robot_video_dataset.RobotVideoDataset dataset_dirs: - ./data/libero_mujoco3.3.2/libero_spatial_no_noops_lerobot - ./data/libero_mujoco3.3.2/libero_object_no_noops_lerobot - ./data/libero_mujoco3.3.2/libero_goal_no_noops_lerobot - ./data/libero_mujoco3.3.2/libero_10_no_noops_lerobot shape_meta: images: - key: image raw_shape: - 3 - 512 - 512 shape: - 3 - 224 - 224 - key: wrist_image raw_shape: - 3 - 512 - 512 shape: - 3 - 224 - 224 action: - key: default raw_shape: 7 shape: 7 state: - key: default raw_shape: 8 shape: 8 num_frames: 33 global_sample_stride: 1 action_video_freq_ratio: 4 video_size: - 224 - 448 camera_key: null val_set_proportion: 0.0 is_training_set: true skip_padding_as_possible: false concat_multi_camera: horizontal processor: _target_: fastwam.datasets.lerobot.processors.fastwam_processor.FastWAMProcessor shape_meta: images: - key: image raw_shape: - 3 - 512 - 512 shape: - 3 - 224 - 224 - key: wrist_image raw_shape: - 3 - 512 - 512 shape: - 3 - 224 - 224 action: - key: default raw_shape: 7 shape: 7 state: - key: default raw_shape: 8 shape: 8 num_obs_steps: 33 num_output_cameras: 2 action_output_dim: 7 proprio_output_dim: 8 delta_action_dim_mask: default: - true - true - true - true - true - true - false action_state_transforms: null use_stepwise_action_norm: false norm_default_mode: min/max norm_exception_mode: null action_state_merger: _target_: fastwam.datasets.lerobot.transforms.action_state_merger.ConcatLeftAlign train_transforms: - _target_: fastwam.datasets.lerobot.transforms.image.ToTensor - _target_: torchvision.transforms.Resize size: - 224 - 224 val_transforms: - _target_: fastwam.datasets.lerobot.transforms.image.ToTensor - _target_: torchvision.transforms.Resize size: - 224 - 224 text_embedding_cache_dir: ./data/text_embeds_cache/libero context_len: 128 model: _target_: fastwam.runtime.create_fastwam model_id: Wan-AI/Wan2.2-TI2V-5B tokenizer_model_id: Wan-AI/Wan2.1-T2V-1.3B tokenizer_max_len: 128 load_text_encoder: false proprio_dim: 8 redirect_common_files: true mot_checkpoint_mixed_attn: false action_dit_pretrained_path: checkpoints/ActionDiT_linear_interp_Wan22_alphascale_1024hdim.pt skip_dit_load_from_pretrain: false video_dit_config: has_image_input: false patch_size: - 1 - 2 - 2 in_dim: 48 hidden_dim: 3072 ffn_dim: 14336 freq_dim: 256 text_dim: 4096 out_dim: 48 num_heads: 24 attn_head_dim: 128 num_layers: 30 eps: 1.0e-06 seperated_timestep: true require_clip_embedding: false require_vae_embedding: false fuse_vae_embedding_in_latents: true use_gradient_checkpointing: false video_attention_mask_mode: first_frame_causal action_conditioned: false action_dim: 7 action_group_causal_mask_mode: group_diagonal action_dit_config: action_dim: 7 hidden_dim: 1024 ffn_dim: 4096 num_heads: 24 attn_head_dim: 128 num_layers: 30 text_dim: 4096 freq_dim: 256 eps: 1.0e-06 use_gradient_checkpointing: false video_scheduler: train_shift: 5.0 infer_shift: 5.0 num_train_timesteps: 1000 action_scheduler: train_shift: 5.0 infer_shift: 5.0 num_train_timesteps: 1000 loss: lambda_video: 1.0 lambda_action: 1.0 pfd: enabled: true stage: s1 training_mode: action512_partial adapter: type: mlp hidden_dim: 512 depth: 3 freq_dim: 256 partial_unfreeze: action_last_layers: 12 video_last_layers: 12 lambda_gt: 1.0 lambda_res: 0.5 lambda_teacher: 0.1