andyjzhao commited on
Commit
d9ebe71
·
verified ·
1 Parent(s): fee41b8

Create hydra_cfg.yaml

Browse files
Files changed (1) hide show
  1. hydra_cfg.yaml +243 -0
hydra_cfg.yaml ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+ seed: 0
3
+ mode: Stage1
4
+ alias: GeneZip-7R_12.8K-100B
5
+ data_alias: ${.data}_${max_len}
6
+ model_alias: ${oc.select:model.name,UnknownModel}
7
+ uid: ofagcfej
8
+ project_root: ${hydra:runtime.cwd}
9
+ version: NA
10
+ device: cuda
11
+ use_routing_floor: true
12
+ cluster: mila
13
+ dirs:
14
+ data_cache: ${project_root}/data_cache/
15
+ data_storage: ${project_root}/data/
16
+ temp: ${project_root}/temp/working_dir/${uid}/
17
+ output: ${project_root}/output/${data_alias}/${alias}/
18
+ hydra: ${project_root}/temp/hydra/
19
+ wandb_cache: ${oc.env:WANDB_CACHE_DIR,${project_root}/temp/wandb_cache/}
20
+ use_wandb: true
21
+ wandb:
22
+ name: GeneZip-7R_12.8K-100B
23
+ project: ${oc.select:env.vars.wandb_proj,DNAFM}
24
+ entity: ${oc.select:env.vars.wandb_entity,${oc.env:WANDB_ENTITY,null}}
25
+ tags:
26
+ - ${mode}
27
+ dir: ${dirs.wandb_cache}
28
+ mode: online
29
+ id: ofagcfej
30
+ step_metric: null
31
+ url: https://wandb.ai/liuxxiiiii/DNAFM/runs/ofagcfej
32
+ logging:
33
+ level: info
34
+ log_wandb_metric_to_stdout: true
35
+ max_data_samples: null
36
+ data: gencode_human_12.8k
37
+ dataset: ${_dataset_cfg_lookup[${data}]}
38
+ max_len: 12800
39
+ source: ${dataset.type}
40
+ max_length: ${max_len}
41
+ valid_test_downsample: null
42
+ tokenizer: fast
43
+ RC_augmentation: false
44
+ reference_loss: null
45
+ hf_token: ${oc.env:HUGGINGFACE_HUB_TOKEN,null}
46
+ _dataset_cfg_lookup:
47
+ hg38_cds_4m:
48
+ type: refseq
49
+ path: data/hg38_cds_dataset_4m_filtered
50
+ hf_path: null
51
+ hg38_12k:
52
+ type: refseq
53
+ path: data/hg38_cds_anchored_len12800_mincds150_1000000samples
54
+ hf_path: jzshared/hg38_12800
55
+ hg38_128k:
56
+ type: refseq
57
+ path: data/hg38_cds_anchored_128000
58
+ hf_path: jzshared/hg38_cds_anchored_128000
59
+ gencode128k_debug:
60
+ type: refseq
61
+ path: data/gencode128k_debug
62
+ hf_path: jzshared/gencode128k_debug
63
+ gencode_human_12.8k:
64
+ type: refseq
65
+ path: data/gencode_human_12.8k
66
+ hf_path: jzshared/gencode_human_12.8k
67
+ gencode_human_128k:
68
+ type: refseq
69
+ path: data/gencode_human_128k
70
+ hf_path: jzshared/gencode_human_128k
71
+ gencode128k_basic:
72
+ type: refseq
73
+ path: data/gencode128k_basic
74
+ hf_path: jzshared/gencode128k_basic
75
+ hf_repo: jzshared/GeneZip-7R_12.8K-100B
76
+ hf_repo_owner: jzshared
77
+ upload_to_hf: true
78
+ private: false
79
+ name: hnet_base
80
+ training_alias: ${mode}_glm_s1_${region_info}_bp${bp_per_token}_aw${warmup_steps}_amax${alpha_max}_smax${strictness_max}_lr${lr}_e${epochs}_ms${max_train_steps}_maxlen${max_len}
81
+ lr: 4.068919838510263e-08
82
+ grad_acc_steps: 1
83
+ eval_steps: 500
84
+ save_steps: 1000
85
+ epochs: 200
86
+ max_train_steps: 30600
87
+ max_eval_samples: 1000
88
+ batch_size: 32
89
+ eval_batch_size: ${batch_size}
90
+ training:
91
+ learning_rate: ${lr}
92
+ do_train: true
93
+ group_by_length: false
94
+ remove_unused_columns: false
95
+ label_names:
96
+ - input_ids
97
+ disable_tqdm: false
98
+ num_train_epochs: ${epochs}
99
+ max_train_steps: ${max_train_steps}
100
+ eval_strategy: steps
101
+ eval_steps: ${eval_steps}
102
+ save_strategy: steps
103
+ save_steps: ${save_steps}
104
+ logging_steps: 10
105
+ report_to: null
106
+ per_device_train_batch_size: ${batch_size}
107
+ per_device_eval_batch_size: ${eval_batch_size}
108
+ gradient_accumulation_steps: ${grad_acc_steps}
109
+ dataloader_num_workers: 1
110
+ dataloader_drop_last: true
111
+ gradient_checkpointing: false
112
+ max_grad_norm: 2.0
113
+ weight_decay: 0.1
114
+ use_lr_multiplier: true
115
+ lr_scheduler_type: linear
116
+ warmup_steps: 500
117
+ adam_beta1: 0.9
118
+ adam_beta2: 0.95
119
+ bf16: true
120
+ overrides: {}
121
+ output_dir: ${dirs.output}
122
+ region_info: promoter1_cds2_utr4_exon4_intron16_nig8_dig32
123
+ bp_per_token: 128
124
+ warmup_steps: 0
125
+ alpha_max: 0.03
126
+ alpha_exp: 1.0
127
+ strictness_max: 1.0
128
+ strictness_exp: 1.0
129
+ arch: hnet
130
+ config_path: null
131
+ model:
132
+ name: hnet_mamba_64m_2dc
133
+ arch: hnet
134
+ r_hi: 0.3
135
+ r_low: 0.0
136
+ min_routing_tokens: 8
137
+ r_warm_up_start: 200
138
+ r_warm_up_end: 750
139
+ model_cfg:
140
+ arch_layout:
141
+ - m2
142
+ - - m2
143
+ - - m15
144
+ - m2
145
+ - m2
146
+ n_gpt: 1.0
147
+ d_model:
148
+ - 512
149
+ - 512
150
+ - 768
151
+ d_intermediate:
152
+ - 0
153
+ - 0
154
+ - 2048
155
+ vocab_size: 12
156
+ ssm_cfg:
157
+ chunk_size: 256
158
+ head_dim: 64
159
+ d_conv: 4
160
+ d_state: 64
161
+ expand: 2
162
+ attn_cfg:
163
+ num_heads:
164
+ - 8
165
+ - 8
166
+ - 12
167
+ rotary_emb_dim:
168
+ - 16
169
+ - 16
170
+ - 24
171
+ window_size:
172
+ - 511
173
+ - 511
174
+ - -1
175
+ tie_embeddings: true
176
+ r_hi: ${r_hi}
177
+ r_low: ${r_low}
178
+ min_routing_tokens: ${min_routing_tokens}
179
+ r_warm_up_start: ${r_warm_up_start}
180
+ r_warm_up_end: ${r_warm_up_end}
181
+ rank: 0
182
+ local_rank: 0
183
+ world_size: 8
184
+ is_distributed: true
185
+ device_type: GPU
186
+ cmd: python src/scripts/train_genezip_v1.py exp=glm/stage1 data=gencode_human_12.8k
187
+ model=hnet/mamba_64m_2dc max_len=12800 batch_size=32 grad_acc_steps=1 max_train_steps=30600
188
+ eval_steps=500 bp_per_token=128 region_info=promoter1_cds2_utr4_exon4_intron16_nig8_dig32
189
+ alias=GeneZip-7R_12.8K-100B use_wandb=true upload_to_hf=true hf_repo=jzshared/GeneZip-7R_12.8K-100B
190
+ master_port: '40617'
191
+ summary:
192
+ model_size: 69.8M
193
+ max_gpu_mem: 55.857
194
+ model_size: 69.8M
195
+ max_gpu_mem: 55.857
196
+ loss: 1.0907
197
+ grad_norm: 0.03689846768975258
198
+ loss_ce: 1.0496642589569092
199
+ router_selected_tokens_s0: 460.53125
200
+ router_trigger_rate_s0: 0.0
201
+ router_avg_deficit_s0: 0.0
202
+ router_selected_tokens_s1: 17.875
203
+ router_trigger_rate_s1: 0.03125
204
+ router_avg_deficit_s1: 1.0
205
+ loss_region: 0.060342203825712204
206
+ comp_strictness: 1.0
207
+ comp_rl_weight: 0.03
208
+ loss_total: 1.1100064516067505
209
+ tokens_trained: 100.260990464
210
+ epoch: 8.680851063829786
211
+ step: 30600
212
+ eval_runtime: 0.5367
213
+ eval_ppl: 2.8175418770540306
214
+ eval_bp_per_token_promoter: 261.42059336823735
215
+ eval_bp_per_token_cds: 332.3425925925926
216
+ eval_bp_per_token_utr: 752.6538461538462
217
+ eval_bp_per_token_exon: 690.0
218
+ eval_bp_per_token_intron: 1128.3223388305846
219
+ eval_bp_per_token_nig: 1050.814049586777
220
+ eval_ppl_promoter: 3.124982450778065
221
+ eval_ppl_cds: 3.7295588949788616
222
+ eval_ppl_utr: 3.440777494381466
223
+ eval_ppl_exon: 3.106529824327725
224
+ eval_ppl_intron: 2.9497514633424493
225
+ eval_ppl_nig: 2.570548504715622
226
+ eval_F: 0.0013347396932051983
227
+ eval_G: 0.004359940369588607
228
+ eval_avg_bp_per_token: 749.209756097561
229
+ eval_F_promoter: 0.003825253347931159
230
+ eval_G_promoter: 0.009358177417820473
231
+ eval_F_cds: 0.0030089432479870728
232
+ eval_G_cds: 0.007192112232886635
233
+ eval_F_utr: 0.0013286320200316829
234
+ eval_G_utr: 0.004329032845316572
235
+ eval_F_exon: 0.0014492753623188406
236
+ eval_G_exon: 0.004401843674516908
237
+ eval_F_intron: 0.0008862715605156054
238
+ eval_G_intron: 0.0035734517902154027
239
+ eval_F_nig: 0.0009516431574104296
240
+ eval_G_nig: 0.003343231843572673
241
+ train_runtime: 10669.8105
242
+ total_flos: 0.0
243
+ train_loss: 1.2369615290523355