andyjzhao commited on
Commit
e349d05
·
verified ·
1 Parent(s): 436934f

Upload hydra_cfg.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. hydra_cfg.yaml +332 -0
hydra_cfg.yaml ADDED
@@ -0,0 +1,332 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ RC_augmentation: false
2
+ _dataset_cfg_lookup:
3
+ dlb_cmp_gm12878:
4
+ eval_split: validation
5
+ hf_path: jzshared/dlb_cmp_gm12878
6
+ label_key: label_ut
7
+ mask_key: mask_ut
8
+ num_workers: 0
9
+ path: data/dlb_cmp_gm12878
10
+ pin_memory: true
11
+ reference_id: hg38
12
+ sequence_format: string
13
+ sequence_key: sequence
14
+ shuffle: true
15
+ test_split: test
16
+ train_split: train
17
+ type: cmp_seq
18
+ dlb_cmp_h1hesc:
19
+ eval_split: validation
20
+ hf_path: jzshared/dlb_cmp_h1hesc
21
+ label_key: label_ut
22
+ mask_key: mask_ut
23
+ num_workers: 0
24
+ path: data/dlb_cmp_h1hesc
25
+ pin_memory: true
26
+ reference_id: hg38
27
+ sequence_format: string
28
+ sequence_key: sequence
29
+ shuffle: true
30
+ test_split: test
31
+ train_split: train
32
+ type: cmp_seq
33
+ dlb_cmp_hct116:
34
+ eval_split: validation
35
+ hf_path: jzshared/dlb_cmp_hct116
36
+ label_key: label_ut
37
+ mask_key: mask_ut
38
+ num_workers: 0
39
+ path: data/dlb_cmp_hct116
40
+ pin_memory: true
41
+ reference_id: hg38
42
+ sequence_format: string
43
+ sequence_key: sequence
44
+ shuffle: true
45
+ test_split: test
46
+ train_split: train
47
+ type: cmp_seq
48
+ dlb_cmp_hff:
49
+ eval_split: validation
50
+ hf_path: jzshared/dlb_cmp_hff
51
+ label_key: label_ut
52
+ mask_key: mask_ut
53
+ num_workers: 0
54
+ path: data/dlb_cmp_hff
55
+ pin_memory: true
56
+ reference_id: hg38
57
+ sequence_format: string
58
+ sequence_key: sequence
59
+ shuffle: true
60
+ test_split: test
61
+ train_split: train
62
+ type: cmp_seq
63
+ dlb_cmp_imr90:
64
+ eval_split: validation
65
+ hf_path: jzshared/dlb_cmp_imr90
66
+ label_key: label_ut
67
+ mask_key: mask_ut
68
+ num_workers: 0
69
+ path: data/dlb_cmp_imr90
70
+ pin_memory: true
71
+ reference_id: hg38
72
+ sequence_format: string
73
+ sequence_key: sequence
74
+ shuffle: true
75
+ test_split: test
76
+ train_split: train
77
+ type: cmp_seq
78
+ gencode128k_basic:
79
+ hf_path: jzshared/gencode128k_basic
80
+ path: data/gencode128k_basic
81
+ type: refseq
82
+ gencode128k_debug:
83
+ hf_path: jzshared/gencode128k_debug
84
+ path: data/gencode128k_debug
85
+ type: refseq
86
+ gencode_human_12.8k:
87
+ hf_path: jzshared/gencode_human_12.8k
88
+ path: data/gencode_human_12.8k
89
+ type: refseq
90
+ gencode_human_128k:
91
+ hf_path: jzshared/gencode_human_128k
92
+ path: data/gencode_human_128k
93
+ type: refseq
94
+ hg38_128k:
95
+ hf_path: jzshared/hg38_cds_anchored_128000
96
+ path: data/hg38_cds_anchored_128000
97
+ type: refseq
98
+ hg38_12k:
99
+ hf_path: jzshared/hg38_12800
100
+ path: data/hg38_cds_anchored_len12800_mincds150_1000000samples
101
+ type: refseq
102
+ hg38_cds_4m:
103
+ hf_path: null
104
+ path: data/hg38_cds_dataset_4m_filtered
105
+ type: refseq
106
+ orca32m_cmp_seq:
107
+ eval_split: validation
108
+ hf_path: jzshared/orca32m_cmp
109
+ label_key: label_ut
110
+ mask_key: mask_ut
111
+ num_workers: 0
112
+ path: data/orca32m_cmp_seq
113
+ pin_memory: true
114
+ reference_id: hg38
115
+ sequence_format: string
116
+ sequence_key: sequence
117
+ shuffle: true
118
+ test_split: test
119
+ train_split: train
120
+ type: cmp_seq
121
+ _unimportant_cfg:
122
+ fields:
123
+ - gpus
124
+ - debug
125
+ - wandb
126
+ - env
127
+ - uid
128
+ - local_rank
129
+ - is_distributed
130
+ - master_port
131
+ - device_type
132
+ - cluster
133
+ - world_size
134
+ - train_dataset
135
+ - eval_datasets
136
+ - user_cfg
137
+ - rank
138
+ - device
139
+ - hf_access_token
140
+ - hf_private
141
+ - hf_repo
142
+ - hf_user
143
+ - hf_token
144
+ - save_every
145
+ - eval_steps
146
+ - save_steps
147
+ - upload_to_hf
148
+ - logging
149
+ - log_every
150
+ - use_wandb
151
+ - project_root
152
+ - version
153
+ postfix:
154
+ - _path
155
+ - _file
156
+ - _dir
157
+ - _alias
158
+ - _prefix
159
+ prefix:
160
+ - _
161
+ alias: HNet_Ori-BPT3
162
+ alpha_exp: 1.0
163
+ alpha_max: 0.03
164
+ arch: hnet
165
+ batch_size: 8
166
+ bp_per_token: 3
167
+ cluster: mila
168
+ cmd: python src/scripts/train_genezip_v1.py exp=glm/stage1 data=gencode_human_12.8k
169
+ model=hnet/mamba_64m max_len=12800 batch_size=8 grad_acc_steps=4 max_train_steps=30600
170
+ eval_steps=500 num_valid_samples=3000 upload_to_hf=true wandb.project=DNAFM_v2 alpha_max=0.03
171
+ use_routing_floor=false strictness_max=0 region_info=promoter1_cds1_utr1_exon1_intron1_nig1_dig1
172
+ alias=HNet_Ori-BPT3 bp_per_token=3 use_wandb=true hf_repo=jzshared/HNet_Ori-BPT3
173
+ config_path: null
174
+ data: gencode_human_12.8k
175
+ data_alias: ${.data}_${max_len}
176
+ dataset: ${_dataset_cfg_lookup[${data}]}
177
+ device: cuda
178
+ device_type: GPU
179
+ dirs:
180
+ data_cache: ${project_root}/data_cache/
181
+ data_storage: ${project_root}/data/
182
+ hydra: ${project_root}/temp/hydra/
183
+ output: ${project_root}/output/${data_alias}/${alias}/
184
+ temp: ${project_root}/temp/working_dir/${uid}/
185
+ wandb_cache: ${oc.env:WANDB_CACHE_DIR,${project_root}/temp/wandb_cache/}
186
+ epochs: 200
187
+ eval_batch_size: ${batch_size}
188
+ eval_steps: 500
189
+ grad_acc_steps: 4
190
+ hf_private: false
191
+ hf_repo: jzshared/HNet_Ori-BPT3
192
+ hf_user: jzshared
193
+ is_distributed: true
194
+ local_rank: 0
195
+ log_every: 10
196
+ logging:
197
+ level: info
198
+ log_wandb_metric_to_stdout: true
199
+ lr: 0.001
200
+ master_port: '45521'
201
+ max_data_samples: null
202
+ max_eval_samples: ${num_valid_samples}
203
+ max_grad_norm: 2.0
204
+ max_len: 12800
205
+ max_length: ${max_len}
206
+ max_routing_tokens: 0
207
+ max_train_steps: 30600
208
+ min_routing_tokens: 8
209
+ mixed_precision: bf16
210
+ mode: Formal
211
+ model:
212
+ arch: hnet
213
+ name: hnet_mamba_64m
214
+ model_alias: ${oc.select:model.name,UnknownModel}
215
+ model_cfg:
216
+ arch_layout:
217
+ - m4
218
+ - - m15
219
+ - m4
220
+ attn_cfg:
221
+ num_heads:
222
+ - 8
223
+ - 12
224
+ rotary_emb_dim:
225
+ - 16
226
+ - 24
227
+ window_size:
228
+ - 511
229
+ - -1
230
+ d_intermediate:
231
+ - 0
232
+ - 2048
233
+ d_model:
234
+ - 512
235
+ - 768
236
+ max_routing_tokens: ${max_routing_tokens}
237
+ min_routing_tokens: ${min_routing_tokens}
238
+ n_gpt: 1.0
239
+ r_hi: ${r_hi}
240
+ r_low: ${r_low}
241
+ r_warm_up_end: ${r_warm_up_end}
242
+ r_warm_up_start: ${r_warm_up_start}
243
+ ssm_cfg:
244
+ chunk_size: 256
245
+ d_conv: 4
246
+ d_state: 64
247
+ expand: 2
248
+ head_dim: 64
249
+ tie_embeddings: true
250
+ vocab_size: 12
251
+ name: hnet_base
252
+ num_test_samples: 0
253
+ num_train_samples: 0
254
+ num_valid_samples: 3000
255
+ pretrained_ckpt: null
256
+ project_root: ${hydra:runtime.cwd}
257
+ r_hi: 0.3
258
+ r_low: 0.0
259
+ r_warm_up_end: 750
260
+ r_warm_up_start: 200
261
+ rank: 0
262
+ reference_loss: null
263
+ region_info: promoter1_cds1_utr1_exon1_intron1_nig1_dig1
264
+ region_label_map:
265
+ cds: 1
266
+ dig: 6
267
+ exon: 3
268
+ intron: 4
269
+ nig: 5
270
+ promoter: 0
271
+ utr: 2
272
+ save_steps: 3000
273
+ seed: 0
274
+ source: ${dataset.type}
275
+ strictness_exp: 1.0
276
+ strictness_max: 0
277
+ tokenizer: fast
278
+ train_steps: 9999999
279
+ training:
280
+ adam_beta1: 0.9
281
+ adam_beta2: 0.95
282
+ bf16: true
283
+ dataloader_drop_last: true
284
+ dataloader_num_workers: 1
285
+ disable_tqdm: false
286
+ do_train: true
287
+ eval_steps: ${eval_steps}
288
+ eval_strategy: steps
289
+ gradient_accumulation_steps: ${grad_acc_steps}
290
+ gradient_checkpointing: false
291
+ group_by_length: false
292
+ hnet_initializer_range: 0.02
293
+ hnet_lr_multiplier: null
294
+ label_names:
295
+ - input_ids
296
+ learning_rate: ${lr}
297
+ logging_steps: ${log_every}
298
+ lr_scheduler_type: linear
299
+ max_grad_norm: ${max_grad_norm}
300
+ max_train_steps: ${max_train_steps}
301
+ num_train_epochs: ${epochs}
302
+ output_dir: ${dirs.output}
303
+ overrides: {}
304
+ per_device_eval_batch_size: ${eval_batch_size}
305
+ per_device_train_batch_size: ${batch_size}
306
+ remove_unused_columns: false
307
+ report_to: null
308
+ save_steps: ${save_steps}
309
+ save_strategy: steps
310
+ use_lr_multiplier: true
311
+ warmup_steps: 500
312
+ weight_decay: 0.1
313
+ training_alias: ${mode}_glm_s1_${region_info}_bp${bp_per_token}_aw${warmup_steps}_amax${alpha_max}_smax${strictness_max}_lr${lr}_e${epochs}_ms${max_train_steps}_maxlen${max_len}
314
+ uid: y8ucb1be
315
+ upload_to_hf: true
316
+ use_routing_ceiling: false
317
+ use_routing_floor: false
318
+ use_wandb: true
319
+ valid_test_downsample: null
320
+ version: NA
321
+ wandb:
322
+ dir: ${dirs.wandb_cache}
323
+ entity: ${oc.select:env.vars.wandb_entity,${oc.env:WANDB_ENTITY,null}}
324
+ id: y8ucb1be
325
+ mode: online
326
+ name: HNet_Ori-BPT3
327
+ project: DNAFM_v2
328
+ step_metric: null
329
+ tags: []
330
+ url: https://wandb.ai/jzshared/DNAFM_v2/runs/y8ucb1be
331
+ warmup_steps: 0
332
+ world_size: 8