| algorithm: | |
| _target_: gengaze.algorithms.GRPO | |
| group_size: 1 | |
| std_normalize: false | |
| discount_factor: 1 | |
| has_loss_on_eos_tokens: false | |
| optimize_task_loss_prediction: false | |
| dataset: | |
| _target_: gengaze.datasets.video_folder.VideoFolder | |
| root: /home/baifengs/baifengs/data/gengaze/100DoH_res448_250K,/home/baifengs/baifengs/data/gengaze/Ego4D_res448_250K,/home/baifengs/baifengs/data/gengaze/InternVid_res448_250K,/home/baifengs/baifengs/data/gengaze/scanning_SAM_res448_50K,/home/baifengs/baifengs/data/gengaze/scanning_idl_res448_50K | |
| clip_len: 16 | |
| frame_sample_rate: 1 | |
| gt_gazing_pos_paths: | |
| train: null | |
| val: null | |
| random_sample_frame: false | |
| model: | |
| _target_: gengaze.models.video_random_gaze.VideoRandomGaze | |
| gazing_ratio_config: | |
| sample_strategy: exponential | |
| fixed: | |
| gazing_ratio: 0.5 | |
| uniform: | |
| gazing_ratio_min: 0 | |
| gazing_ratio_max: 1 | |
| exponential: | |
| gazing_ratio_min: 0.02 | |
| gazing_ratio_max: 0.15 | |
| lambda: 10 | |
| gazing_ratio_each_frame_config: | |
| sample_strategy: dirichlet | |
| dirichlet: | |
| alpha: 10,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3 | |
| gazing_prob_each_scale_config: | |
| sample_strategy: dirichlet | |
| dirichlet: | |
| alpha: 0.5 | |
| scales: 32+64+112+224 | |
| num_vision_tokens_each_frame: 265 | |
| frame_sampling_rate: 1 | |
| task: | |
| _target_: gengaze.tasks.video_mae_reconstruction.VideoMAEReconstruction | |
| recon_model: facebook/vit-mae-large | |
| recon_sample_rate: 0.125 | |
| attn_mode: flash_attention_2 | |
| recon_model_config: | |
| scale_embed: true | |
| max_num_frames: 256 | |
| time_embed: true | |
| causal: true | |
| loss_type: l1+dinov2_reg+siglip2 | |
| loss_weights: 1+0.3+0.3 | |
| l1_loss_config: null | |
| dinov2_reg_loss_config: | |
| model: facebook/dinov2-with-registers-base | |
| siglip2_loss_config: | |
| model: google/siglip2-base-patch16-224 | |
| scales: 32+64+112+224 | |
| trainer: | |
| _target_: gengaze.trainer.Trainer | |
| batch_size: 512 | |
| per_gpu_max_batch_size: 8 | |
| lr: 0.0002 | |
| min_lr: 1.0e-05 | |
| lr_schedule: linear_w_warmup | |
| optimizer: adam | |
| train_gaze: false | |
| train_task: true | |
| train_w_ntp: false | |
| val_nsteps: 2000 | |
| n_epochs: 50 | |
| logdir: exps/ | |
| exp_name_prefix: '' | |
| exp_name: '250819_1751' | |
| exp_name_suffix: '' | |
| resume: auto | |
| gaze_weights: null | |
| task_weights: null | |
| seed: 666 | |
| val_only: false | |
| temp_schedule_args: | |
| mode: exp | |
| exp: | |
| temp_start: 10000.0 | |
| temp_end: 1.0 | |
| neg_cosine: | |
| temp_min: 1.0 | |
| temp_max: 10000.0 | |
| num_period: 1 | |
| val_args: | |
| sample_gaze_for_reconstruction_oracle: 0 | |