VideoMAE_AutoGaze / config.yaml
bfshi's picture
Upload folder using huggingface_hub
81bdb87 verified
algorithm:
_target_: gengaze.algorithms.GRPO
group_size: 1
std_normalize: false
discount_factor: 1
has_loss_on_eos_tokens: false
optimize_task_loss_prediction: false
dataset:
_target_: gengaze.datasets.video_folder.VideoFolder
root: /home/baifengs/baifengs/data/gengaze/100DoH_res448_250K,/home/baifengs/baifengs/data/gengaze/Ego4D_res448_250K,/home/baifengs/baifengs/data/gengaze/InternVid_res448_250K,/home/baifengs/baifengs/data/gengaze/scanning_SAM_res448_50K,/home/baifengs/baifengs/data/gengaze/scanning_idl_res448_50K
clip_len: 16
frame_sample_rate: 1
gt_gazing_pos_paths:
train: null
val: null
random_sample_frame: false
model:
_target_: gengaze.models.video_random_gaze.VideoRandomGaze
gazing_ratio_config:
sample_strategy: exponential
fixed:
gazing_ratio: 0.5
uniform:
gazing_ratio_min: 0
gazing_ratio_max: 1
exponential:
gazing_ratio_min: 0.02
gazing_ratio_max: 0.15
lambda: 10
gazing_ratio_each_frame_config:
sample_strategy: dirichlet
dirichlet:
alpha: 10,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3
gazing_prob_each_scale_config:
sample_strategy: dirichlet
dirichlet:
alpha: 0.5
scales: 32+64+112+224
num_vision_tokens_each_frame: 265
frame_sampling_rate: 1
task:
_target_: gengaze.tasks.video_mae_reconstruction.VideoMAEReconstruction
recon_model: facebook/vit-mae-large
recon_sample_rate: 0.125
attn_mode: flash_attention_2
recon_model_config:
scale_embed: true
max_num_frames: 256
time_embed: true
causal: true
loss_type: l1+dinov2_reg+siglip2
loss_weights: 1+0.3+0.3
l1_loss_config: null
dinov2_reg_loss_config:
model: facebook/dinov2-with-registers-base
siglip2_loss_config:
model: google/siglip2-base-patch16-224
scales: 32+64+112+224
trainer:
_target_: gengaze.trainer.Trainer
batch_size: 512
per_gpu_max_batch_size: 8
lr: 0.0002
min_lr: 1.0e-05
lr_schedule: linear_w_warmup
optimizer: adam
train_gaze: false
train_task: true
train_w_ntp: false
val_nsteps: 2000
n_epochs: 50
logdir: exps/
exp_name_prefix: ''
exp_name: '250819_1751'
exp_name_suffix: ''
resume: auto
gaze_weights: null
task_weights: null
seed: 666
val_only: false
temp_schedule_args:
mode: exp
exp:
temp_start: 10000.0
temp_end: 1.0
neg_cosine:
temp_min: 1.0
temp_max: 10000.0
num_period: 1
val_args:
sample_gaze_for_reconstruction_oracle: 0