andyjzhao commited on
Commit
12bf15f
·
verified ·
1 Parent(s): e92bf6a

Upload hydra_cfg.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. hydra_cfg.yaml +32 -10
hydra_cfg.yaml CHANGED
@@ -28,6 +28,21 @@ _dataset_cfg_lookup:
28
  hf_path: null
29
  path: data/hg38_cds_dataset_4m_filtered
30
  type: refseq
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  alias: Mamba2_12.8K-100B
32
  alpha_exp: 1.0
33
  alpha_max: 0.03
@@ -37,8 +52,8 @@ bp_per_token: 128
37
  cluster: mila
38
  cmd: python src/scripts/train_genezip_v1.py exp=glm/stage1 data=gencode_human_12.8k
39
  model=hnet/mamba2 max_len=12800 batch_size=8 grad_acc_steps=4 max_train_steps=30600
40
- eval_steps=100 use_routing_floor=false strictness_max=0 alias=Mamba2_12.8K-100B
41
- use_wandb=true upload_to_hf=true hf_repo=jzshared/Mamba2_12.8K-100B
42
  config_path: null
43
  data: gencode_human_12.8k
44
  data_alias: ${.data}_${max_len}
@@ -60,17 +75,20 @@ hf_repo: jzshared/Mamba2_12.8K-100B
60
  hf_user: jzshared
61
  is_distributed: true
62
  local_rank: 0
 
63
  logging:
64
  level: info
65
  log_wandb_metric_to_stdout: true
66
  lr: 0.001
67
- master_port: '37739'
68
  max_data_samples: null
69
- max_eval_samples: 1000
 
70
  max_len: 12800
71
  max_length: ${max_len}
72
  max_train_steps: 30600
73
  min_routing_tokens: 8
 
74
  mode: Stage1
75
  model:
76
  arch: hnet
@@ -105,6 +123,9 @@ model_cfg:
105
  tie_embeddings: true
106
  vocab_size: 12
107
  name: hnet_base
 
 
 
108
  private: false
109
  project_root: ${hydra:runtime.cwd}
110
  r_hi: 0.3
@@ -120,6 +141,7 @@ source: ${dataset.type}
120
  strictness_exp: 1.0
121
  strictness_max: 0
122
  tokenizer: fast
 
123
  training:
124
  adam_beta1: 0.9
125
  adam_beta2: 0.95
@@ -136,9 +158,9 @@ training:
136
  label_names:
137
  - input_ids
138
  learning_rate: ${lr}
139
- logging_steps: 10
140
  lr_scheduler_type: linear
141
- max_grad_norm: 2.0
142
  max_train_steps: ${max_train_steps}
143
  num_train_epochs: ${epochs}
144
  output_dir: ${dirs.output}
@@ -153,7 +175,7 @@ training:
153
  warmup_steps: 500
154
  weight_decay: 0.1
155
  training_alias: ${mode}_glm_s1_${region_info}_bp${bp_per_token}_aw${warmup_steps}_amax${alpha_max}_smax${strictness_max}_lr${lr}_e${epochs}_ms${max_train_steps}_maxlen${max_len}
156
- uid: tkh89gtk
157
  upload_to_hf: true
158
  use_routing_floor: false
159
  use_wandb: true
@@ -162,13 +184,13 @@ version: NA
162
  wandb:
163
  dir: ${dirs.wandb_cache}
164
  entity: ${oc.select:env.vars.wandb_entity,${oc.env:WANDB_ENTITY,null}}
165
- id: tkh89gtk
166
  mode: online
167
  name: Mamba2_12.8K-100B
168
- project: ${oc.select:env.vars.wandb_proj,DNAFM}
169
  step_metric: null
170
  tags:
171
  - ${mode}
172
- url: https://wandb.ai/jzshared/DNAFM/runs/tkh89gtk
173
  warmup_steps: 0
174
  world_size: 8
 
28
  hf_path: null
29
  path: data/hg38_cds_dataset_4m_filtered
30
  type: refseq
31
+ orca32m_cmp_seq:
32
+ eval_split: validation
33
+ hf_path: jzshared/orca32m_cmp
34
+ label_key: label_ut
35
+ mask_key: mask_ut
36
+ num_workers: 0
37
+ path: data/orca32m_cmp_seq
38
+ pin_memory: true
39
+ reference_id: hg38
40
+ sequence_format: string
41
+ sequence_key: sequence
42
+ shuffle: true
43
+ test_split: test
44
+ train_split: train
45
+ type: cmp_seq
46
  alias: Mamba2_12.8K-100B
47
  alpha_exp: 1.0
48
  alpha_max: 0.03
 
52
  cluster: mila
53
  cmd: python src/scripts/train_genezip_v1.py exp=glm/stage1 data=gencode_human_12.8k
54
  model=hnet/mamba2 max_len=12800 batch_size=8 grad_acc_steps=4 max_train_steps=30600
55
+ eval_steps=100 upload_to_hf=true wandb.project=DNAFM_v2 use_routing_floor=false
56
+ strictness_max=0 alias=Mamba2_12.8K-100B use_wandb=true hf_repo=jzshared/Mamba2_12.8K-100B
57
  config_path: null
58
  data: gencode_human_12.8k
59
  data_alias: ${.data}_${max_len}
 
75
  hf_user: jzshared
76
  is_distributed: true
77
  local_rank: 0
78
+ log_every: 10
79
  logging:
80
  level: info
81
  log_wandb_metric_to_stdout: true
82
  lr: 0.001
83
+ master_port: '43837'
84
  max_data_samples: null
85
+ max_eval_samples: ${num_valid_samples}
86
+ max_grad_norm: 2.0
87
  max_len: 12800
88
  max_length: ${max_len}
89
  max_train_steps: 30600
90
  min_routing_tokens: 8
91
+ mixed_precision: bf16
92
  mode: Stage1
93
  model:
94
  arch: hnet
 
123
  tie_embeddings: true
124
  vocab_size: 12
125
  name: hnet_base
126
+ num_test_samples: 0
127
+ num_train_samples: 0
128
+ num_valid_samples: 0
129
  private: false
130
  project_root: ${hydra:runtime.cwd}
131
  r_hi: 0.3
 
141
  strictness_exp: 1.0
142
  strictness_max: 0
143
  tokenizer: fast
144
+ train_steps: 9999999
145
  training:
146
  adam_beta1: 0.9
147
  adam_beta2: 0.95
 
158
  label_names:
159
  - input_ids
160
  learning_rate: ${lr}
161
+ logging_steps: ${log_every}
162
  lr_scheduler_type: linear
163
+ max_grad_norm: ${max_grad_norm}
164
  max_train_steps: ${max_train_steps}
165
  num_train_epochs: ${epochs}
166
  output_dir: ${dirs.output}
 
175
  warmup_steps: 500
176
  weight_decay: 0.1
177
  training_alias: ${mode}_glm_s1_${region_info}_bp${bp_per_token}_aw${warmup_steps}_amax${alpha_max}_smax${strictness_max}_lr${lr}_e${epochs}_ms${max_train_steps}_maxlen${max_len}
178
+ uid: 8vsgvsbs
179
  upload_to_hf: true
180
  use_routing_floor: false
181
  use_wandb: true
 
184
  wandb:
185
  dir: ${dirs.wandb_cache}
186
  entity: ${oc.select:env.vars.wandb_entity,${oc.env:WANDB_ENTITY,null}}
187
+ id: 8vsgvsbs
188
  mode: online
189
  name: Mamba2_12.8K-100B
190
+ project: DNAFM_v2
191
  step_metric: null
192
  tags:
193
  - ${mode}
194
+ url: https://wandb.ai/jzshared/DNAFM_v2/runs/8vsgvsbs
195
  warmup_steps: 0
196
  world_size: 8