File size: 4,622 Bytes
aff9928
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fcaef65
aff9928
 
 
 
 
fcaef65
aff9928
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fcaef65
aff9928
 
 
 
 
 
 
 
fcaef65
aff9928
 
 
 
 
 
fcaef65
aff9928
fcaef65
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
RC_augmentation: false
_dataset_cfg_lookup:
  gencode128k_basic:
    hf_path: jzshared/gencode128k_basic
    path: data/gencode128k_basic
    type: refseq
  gencode128k_debug:
    hf_path: jzshared/gencode128k_debug
    path: data/gencode128k_debug
    type: refseq
  gencode_human_12.8k:
    hf_path: jzshared/gencode_human_12.8k
    path: data/gencode_human_12.8k
    type: refseq
  gencode_human_128k:
    hf_path: jzshared/gencode_human_128k
    path: data/gencode_human_128k
    type: refseq
  hg38_128k:
    hf_path: jzshared/hg38_cds_anchored_128000
    path: data/hg38_cds_anchored_128000
    type: refseq
  hg38_12k:
    hf_path: jzshared/hg38_12800
    path: data/hg38_cds_anchored_len12800_mincds150_1000000samples
    type: refseq
  hg38_cds_4m:
    hf_path: null
    path: data/hg38_cds_dataset_4m_filtered
    type: refseq
alias: CKPT_DEBUG
alpha_exp: 1.0
alpha_max: 0.03
arch: hnet
batch_size: 32
bp_per_token: 3
cluster: mila
cmd: python src/scripts/train_genezip_v1.py exp=glm/stage1 data=gencode_human_12.8k
  model=hnet/mamba_64m_2dc max_len=12800 batch_size=32 grad_acc_steps=1 max_train_steps=20
  eval_steps=10 save_steps=10 alpha_max=0.03 use_routing_floor=false strictness_max=0
  region_info=promoter1_cds1_utr1_exon1_intron1_nig1_dig1 alias=CKPT_DEBUG bp_per_token=3
  use_wandb=true upload_to_hf=true hf_repo=jzshared/ckpt_debug
config_path: null
data: gencode_human_12.8k
data_alias: ${.data}_${max_len}
dataset: ${_dataset_cfg_lookup[${data}]}
device: cuda
device_type: GPU
dirs:
  data_cache: ${project_root}/data_cache/
  data_storage: ${project_root}/data/
  hydra: ${project_root}/temp/hydra/
  output: ${project_root}/output/${data_alias}/${alias}/
  temp: ${project_root}/temp/working_dir/${uid}/
  wandb_cache: ${oc.env:WANDB_CACHE_DIR,${project_root}/temp/wandb_cache/}
epochs: 200
eval_batch_size: ${batch_size}
eval_steps: 10
grad_acc_steps: 1
hf_repo: jzshared/ckpt_debug
hf_repo_owner: jzshared
is_distributed: true
local_rank: 0
logging:
  level: info
  log_wandb_metric_to_stdout: true
lr: 0.001
master_port: '46235'
max_data_samples: null
max_eval_samples: 1000
max_len: 12800
max_length: ${max_len}
max_train_steps: 20
min_routing_tokens: 8
mode: Stage1
model:
  arch: hnet
  name: hnet_mamba_64m_2dc
model_alias: ${oc.select:model.name,UnknownModel}
model_cfg:
  arch_layout:
  - m2
  - - m2
    - - m15
    - m2
  - m2
  attn_cfg:
    num_heads:
    - 8
    - 8
    - 12
    rotary_emb_dim:
    - 16
    - 16
    - 24
    window_size:
    - 511
    - 511
    - -1
  d_intermediate:
  - 0
  - 0
  - 2048
  d_model:
  - 512
  - 512
  - 768
  min_routing_tokens: ${min_routing_tokens}
  n_gpt: 1.0
  r_hi: ${r_hi}
  r_low: ${r_low}
  r_warm_up_end: ${r_warm_up_end}
  r_warm_up_start: ${r_warm_up_start}
  ssm_cfg:
    chunk_size: 256
    d_conv: 4
    d_state: 64
    expand: 2
    head_dim: 64
  tie_embeddings: true
  vocab_size: 12
name: hnet_base
private: false
project_root: ${hydra:runtime.cwd}
r_hi: 0.3
r_low: 0.0
r_warm_up_end: 750
r_warm_up_start: 200
rank: 0
reference_loss: null
region_info: promoter1_cds1_utr1_exon1_intron1_nig1_dig1
save_steps: 10
seed: 0
source: ${dataset.type}
strictness_exp: 1.0
strictness_max: 0
tokenizer: fast
training:
  adam_beta1: 0.9
  adam_beta2: 0.95
  bf16: true
  dataloader_drop_last: true
  dataloader_num_workers: 1
  disable_tqdm: false
  do_train: true
  eval_steps: ${eval_steps}
  eval_strategy: steps
  gradient_accumulation_steps: ${grad_acc_steps}
  gradient_checkpointing: false
  group_by_length: false
  label_names:
  - input_ids
  learning_rate: ${lr}
  logging_steps: 10
  lr_scheduler_type: linear
  max_grad_norm: 2.0
  max_train_steps: ${max_train_steps}
  num_train_epochs: ${epochs}
  output_dir: ${dirs.output}
  overrides: {}
  per_device_eval_batch_size: ${eval_batch_size}
  per_device_train_batch_size: ${batch_size}
  remove_unused_columns: false
  report_to: null
  save_steps: ${save_steps}
  save_strategy: steps
  use_lr_multiplier: true
  warmup_steps: 500
  weight_decay: 0.1
training_alias: ${mode}_glm_s1_${region_info}_bp${bp_per_token}_aw${warmup_steps}_amax${alpha_max}_smax${strictness_max}_lr${lr}_e${epochs}_ms${max_train_steps}_maxlen${max_len}
uid: 98p9y5w8
upload_to_hf: true
use_routing_floor: false
use_wandb: true
valid_test_downsample: null
version: NA
wandb:
  dir: ${dirs.wandb_cache}
  entity: ${oc.select:env.vars.wandb_entity,${oc.env:WANDB_ENTITY,null}}
  id: 98p9y5w8
  mode: online
  name: CKPT_DEBUG
  project: ${oc.select:env.vars.wandb_proj,DNAFM}
  step_metric: null
  tags:
  - ${mode}
  url: https://wandb.ai/jzshared/DNAFM/runs/98p9y5w8
warmup_steps: 0
world_size: 4