checkpoint: broadcast_via_filesystem: 'False' dcp_allow_mismatched_size: 'False' dcp_async_mode_enabled: 'False' jit: device: cuda dtype: bfloat16 enabled: 'False' input_shape: null strict: 'True' keys_not_to_resume: [] load_ema_to_reg: 'False' load_path: '' load_training_state: 'False' only_load_scheduler_state: 'False' save_iter: '100' strict_resume: 'True' type: _target_: callbacks: null verbose: 'True' data_config: null dataloader_train: _target_: batch_sampler: null batch_size: '2' collate_fn: null dataset: _target_: data_fps: '30.0' dataset_dir: /home/ubuntu/pdt-mimic/data/push-mimic exclude_with_substring: null include_only_with_substrings: null is_multi_img: 'False' is_val: 'False' num_frames: '61' obs_history: '5' val_ratio: '0.0' video_size: - '480' - '640' drop_last: 'True' generator: null in_order: 'False' multiprocessing_context: null num_workers: '12' persistent_workers: 'True' pin_memory: 'True' pin_memory_device: '' prefetch_factor: '8' sampler: _target_: dataset: _target_: data_fps: '30.0' dataset_dir: /home/ubuntu/pdt-mimic/data/push-mimic exclude_with_substring: null include_only_with_substrings: null is_multi_img: 'False' is_val: 'False' num_frames: '61' obs_history: '5' val_ratio: '0.0' video_size: - '480' - '640' shuffle: null timeout: '0' worker_init_fn: null dataloader_val: _target_: batch_sampler: null batch_size: '1' collate_fn: null dataset: _target_: data_fps: '30.0' dataset_dir: /home/ubuntu/pdt-mimic/data/push-mimic exclude_with_substring: null include_only_with_substrings: null is_multi_img: 'False' is_val: 'True' num_frames: '61' obs_history: '5' val_ratio: '0.0' video_size: - '480' - '640' drop_last: 'False' generator: null in_order: 'False' multiprocessing_context: null num_workers: '0' persistent_workers: 'False' pin_memory: 'False' pin_memory_device: '' prefetch_factor: null sampler: _target_: dataset: _target_: data_fps: '30.0' dataset_dir: /home/ubuntu/pdt-mimic/data/push-mimic exclude_with_substring: null include_only_with_substrings: null is_multi_img: 'False' is_val: 'True' num_frames: '61' obs_history: '5' val_ratio: '0.0' video_size: - '480' - '640' shuffle: null timeout: '0' worker_init_fn: null defaults: - _self_ - data_config: null - video_dataset_train: null - video_dataset_val: null - dataloader_train: null - dataloader_val: null - world2action_pipe: null - optimizer: fusedadamw - scheduler: constant - model: null - callbacks: - basic - net: null - ema: null - checkpoint: null - ckpt_type: null - experiment: null job: group: video2world name: v2w_push_lora_rank32_lr1.778e-04_bsz32 project: posttraining model: _recursive_: 'False' _target_: config: adjust_video_noise: true debug_without_randomness: false fsdp_shard_size: 0 high_sigma_ratio: 0.05 init_lora_weights: true input_image_key: images input_video_key: video lora_alpha: 32 lora_rank: 32 lora_target_modules: q_proj,k_proj,v_proj,output_proj,x_embedder.proj.1,linear_1,linear_2,mlp.layer1,mlp.layer2 loss_reduce: mean loss_scale: 100.0 model_manager_config: _target_: cosmos_predict2.models.video2world_model.Predict2ModelManagerConfig dit_path: /home/ubuntu/pdt-mimic/mimic-video/model/checkpoints/video_backbone//v2w_pretrained_cosmos.pt text_encoder_path: '' pipe_config: adjust_video_noise: true conditioner: _target_: fps: _target_: dropout_rate: '0.0' dtype: null input_key: fps output_key: fps padding_mask: _target_: dropout_rate: '0.0' dtype: null input_key: padding_mask output_key: padding_mask text: _target_: dropout_rate: '0.0' input_key: - obs/language_embedding use_video_condition: _target_: dropout_rate: '0.0' input_key: fps output_key: use_video_condition conditioning_strategy: frame_replace ema: _target_: cosmos_predict2.configs.defaults.ema.EMAConfig enabled: 'False' iteration_shift: '0' rate: '0.1' guardrail_config: checkpoint_dir: /home/ubuntu/pdt-mimic/mimic-video/model/checkpoints enabled: false offload_model_to_cpu: true input_image_key: images input_video_key: video max_num_conditional_frames: 2 min_num_conditional_frames: 1 net: _target_: adaln_lora_dim: '256' atten_backend: minimal_a2a concat_padding_mask: 'True' extra_per_block_abs_pos_emb: 'False' in_channels: '16' max_frames: '128' max_img_h: '240' max_img_w: '240' model_channels: '2048' num_blocks: '28' num_heads: '16' out_channels: '16' patch_spatial: '2' patch_temporal: '1' pos_emb_cls: rope3d pos_emb_interpolation: crop pos_emb_learnable: 'True' rope_enable_fps_modulation: 'False' rope_h_extrapolation_ratio: '3.0' rope_t_extrapolation_ratio: '1.0' rope_w_extrapolation_ratio: '3.0' sac_config: _target_: cosmos_predict2.models.text2image_dit.SACConfig every_n_blocks: '1' mode: predict2_2b_720 use_adaln_lora: 'True' precision: bfloat16 rectified_flow_loss_weight_uniform: true rectified_flow_t_scaling_factor: 1.0 resize_online: false resolution: '480' sigma_conditional: 0.0001 sigma_data: 1.0 state_ch: 16 state_t: 16 text_encoder: cls: !!python/object/apply:imaginaire.constants.TextEncoderClass - t5 t5: ckpt_path: /home/ubuntu/pdt-mimic/mimic-video/model/checkpoints/text_encoder/t5-11b embed_dim: 1024 num_tokens: 512 timestamps: is_forward: false nfe: 35 order: 7.0 t_max: 80.0 t_min: 0.002 tokenizer: _target_: chunk_duration: '81' load_mean_std: 'False' name: tokenizer temporal_window: '16' vae_pth: /home/ubuntu/pdt-mimic/mimic-video/model/checkpoints/video_backbone//tokenizer/tokenizer.pth precision: bfloat16 train_architecture: lora model_parallel: _cpu_offloading_context: null async_tensor_model_parallel_allreduce: false autocast_dtype: torch.float32 barrier_with_L1_time: true batch_p2p_comm: true batch_p2p_sync: true bf16: false context_parallel_size: 1 cpu_offloading: false cpu_offloading_activations: false cpu_offloading_num_layers: 0 cpu_offloading_weights: false cross_entropy_fusion_impl: native cross_entropy_loss_fusion: false deallocate_pipeline_outputs: false defer_embedding_wgrad_compute: false deterministic_mode: false enable_autocast: false expert_model_parallel_size: 1 expert_tensor_parallel_size: 1 finalize_model_grads_func: null fp16: false grad_scale_func: null grad_sync_func: null gradient_accumulation_fusion: false hierarchical_context_parallel_sizes: null microbatch_group_size_per_vp_stage: 1 moe_extended_tp: false no_sync_func: null num_microbatches_with_partial_activation_checkpoints: null overlap_p2p_comm: false overlap_p2p_comm_warmup_flush: false param_sync_func: null params_dtype: torch.float32 perform_initialization: true pipeline_dtype: null pipeline_model_parallel_comm_backend: null pipeline_model_parallel_size: 1 pipeline_model_parallel_split_rank: null sequence_parallel: false tensor_model_parallel_size: 1 timers: null tp_comm_atomic_ag: false tp_comm_atomic_rs: false tp_comm_bootstrap_backend: nccl tp_comm_bulk_dgrad: true tp_comm_bulk_wgrad: true tp_comm_overlap: false tp_comm_overlap_ag: true tp_comm_overlap_disable_fc1: false tp_comm_overlap_disable_qkv: false tp_comm_overlap_rs: true tp_comm_overlap_rs_dgrad: false tp_comm_split_ag: true tp_comm_split_rs: true use_cpu_initialization: false use_ring_exchange_p2p: false use_te_rng_tracker: false variable_seq_lengths: false virtual_pipeline_model_parallel_size: null wgrad_deferral_limit: 0 optimizer: _target_: betas: - '0.9' - '0.99' capturable: 'True' eps: 1e-08 lr: '4.445e-05' master_weights: 'True' model: null optim_type: fusedadam weight_decay: '0.1' scheduler: _target_: trainer: callbacks: device_monitor: _target_: every_n: '1000' log_memory_detail: 'True' step_size: '1' ema: _target_: config: null trainer: null grad_clip: _target_: clip_norm: '10.0' force_finite: 'True' log_wandb: 'False' iter_speed: _target_: every_n: '1000' hit_thres: '5' low_prec: _target_: config: null trainer: null update_iter: '1' manual_gc: _target_: every_n: '5' warm_up: '5' progress_bar: _target_: config: null trainer: null video_eval: _target_: fuse_lora: 'True' cudnn: benchmark: 'True' deterministic: 'False' ddp: broadcast_buffers: 'True' find_unused_parameters: 'False' static_graph: 'True' distributed_parallelism: ddp grad_accum_iter: '4' grad_scaler_args: enabled: 'False' logging_iter: '1000' max_iter: '500' max_val_iter: null memory_format: torch.preserve_format profiling: enable_memory_snapshot: 'False' enable_profiling: 'False' first_n_rank: '4' profile_freq: '1' profile_memory: 'True' record_shape: 'True' with_modules: 'True' with_stack: 'True' run_validation: 'False' seed: '0' timeout_period: '999999999' type: validation_iter: '999999999' video_dataset_train: _target_: data_fps: '30.0' dataset_dir: /home/ubuntu/pdt-mimic/data/push-mimic exclude_with_substring: null include_only_with_substrings: null is_multi_img: 'False' is_val: 'False' num_frames: '61' obs_history: '5' val_ratio: '0.0' video_size: - '480' - '640' video_dataset_val: _target_: data_fps: '30.0' dataset_dir: /home/ubuntu/pdt-mimic/data/push-mimic exclude_with_substring: null include_only_with_substrings: null is_multi_img: 'False' is_val: 'True' num_frames: '61' obs_history: '5' val_ratio: '0.0' video_size: - '480' - '640' world2action_pipe: null