bfshi commited on
Commit
81bdb87
·
verified ·
1 Parent(s): cfe86bf

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. config.yaml +91 -0
  2. videomae.pt +3 -0
config.yaml ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ algorithm:
2
+ _target_: gengaze.algorithms.GRPO
3
+ group_size: 1
4
+ std_normalize: false
5
+ discount_factor: 1
6
+ has_loss_on_eos_tokens: false
7
+ optimize_task_loss_prediction: false
8
+ dataset:
9
+ _target_: gengaze.datasets.video_folder.VideoFolder
10
+ root: /home/baifengs/baifengs/data/gengaze/100DoH_res448_250K,/home/baifengs/baifengs/data/gengaze/Ego4D_res448_250K,/home/baifengs/baifengs/data/gengaze/InternVid_res448_250K,/home/baifengs/baifengs/data/gengaze/scanning_SAM_res448_50K,/home/baifengs/baifengs/data/gengaze/scanning_idl_res448_50K
11
+ clip_len: 16
12
+ frame_sample_rate: 1
13
+ gt_gazing_pos_paths:
14
+ train: null
15
+ val: null
16
+ random_sample_frame: false
17
+ model:
18
+ _target_: gengaze.models.video_random_gaze.VideoRandomGaze
19
+ gazing_ratio_config:
20
+ sample_strategy: exponential
21
+ fixed:
22
+ gazing_ratio: 0.5
23
+ uniform:
24
+ gazing_ratio_min: 0
25
+ gazing_ratio_max: 1
26
+ exponential:
27
+ gazing_ratio_min: 0.02
28
+ gazing_ratio_max: 0.15
29
+ lambda: 10
30
+ gazing_ratio_each_frame_config:
31
+ sample_strategy: dirichlet
32
+ dirichlet:
33
+ alpha: 10,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3
34
+ gazing_prob_each_scale_config:
35
+ sample_strategy: dirichlet
36
+ dirichlet:
37
+ alpha: 0.5
38
+ scales: 32+64+112+224
39
+ num_vision_tokens_each_frame: 265
40
+ frame_sampling_rate: 1
41
+ task:
42
+ _target_: gengaze.tasks.video_mae_reconstruction.VideoMAEReconstruction
43
+ recon_model: facebook/vit-mae-large
44
+ recon_sample_rate: 0.125
45
+ attn_mode: flash_attention_2
46
+ recon_model_config:
47
+ scale_embed: true
48
+ max_num_frames: 256
49
+ time_embed: true
50
+ causal: true
51
+ loss_type: l1+dinov2_reg+siglip2
52
+ loss_weights: 1+0.3+0.3
53
+ l1_loss_config: null
54
+ dinov2_reg_loss_config:
55
+ model: facebook/dinov2-with-registers-base
56
+ siglip2_loss_config:
57
+ model: google/siglip2-base-patch16-224
58
+ scales: 32+64+112+224
59
+ trainer:
60
+ _target_: gengaze.trainer.Trainer
61
+ batch_size: 512
62
+ per_gpu_max_batch_size: 8
63
+ lr: 0.0002
64
+ min_lr: 1.0e-05
65
+ lr_schedule: linear_w_warmup
66
+ optimizer: adam
67
+ train_gaze: false
68
+ train_task: true
69
+ train_w_ntp: false
70
+ val_nsteps: 2000
71
+ n_epochs: 50
72
+ logdir: exps/
73
+ exp_name_prefix: ''
74
+ exp_name: '250819_1751'
75
+ exp_name_suffix: ''
76
+ resume: auto
77
+ gaze_weights: null
78
+ task_weights: null
79
+ seed: 666
80
+ val_only: false
81
+ temp_schedule_args:
82
+ mode: exp
83
+ exp:
84
+ temp_start: 10000.0
85
+ temp_end: 1.0
86
+ neg_cosine:
87
+ temp_min: 1.0
88
+ temp_max: 10000.0
89
+ num_period: 1
90
+ val_args:
91
+ sample_gaze_for_reconstruction_oracle: 0
videomae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef08ef3468e0409460ad5505fd8208cd1cccb124691448aaf06ded24345f7e3e
3
+ size 2038058888