Upload hydra_cfg.yaml with huggingface_hub
Browse files- hydra_cfg.yaml +32 -10
hydra_cfg.yaml
CHANGED
|
@@ -28,6 +28,21 @@ _dataset_cfg_lookup:
|
|
| 28 |
hf_path: null
|
| 29 |
path: data/hg38_cds_dataset_4m_filtered
|
| 30 |
type: refseq
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
alias: Mamba2_12.8K-100B
|
| 32 |
alpha_exp: 1.0
|
| 33 |
alpha_max: 0.03
|
|
@@ -37,8 +52,8 @@ bp_per_token: 128
|
|
| 37 |
cluster: mila
|
| 38 |
cmd: python src/scripts/train_genezip_v1.py exp=glm/stage1 data=gencode_human_12.8k
|
| 39 |
model=hnet/mamba2 max_len=12800 batch_size=8 grad_acc_steps=4 max_train_steps=30600
|
| 40 |
-
eval_steps=100
|
| 41 |
-
|
| 42 |
config_path: null
|
| 43 |
data: gencode_human_12.8k
|
| 44 |
data_alias: ${.data}_${max_len}
|
|
@@ -60,17 +75,20 @@ hf_repo: jzshared/Mamba2_12.8K-100B
|
|
| 60 |
hf_user: jzshared
|
| 61 |
is_distributed: true
|
| 62 |
local_rank: 0
|
|
|
|
| 63 |
logging:
|
| 64 |
level: info
|
| 65 |
log_wandb_metric_to_stdout: true
|
| 66 |
lr: 0.001
|
| 67 |
-
master_port: '
|
| 68 |
max_data_samples: null
|
| 69 |
-
max_eval_samples:
|
|
|
|
| 70 |
max_len: 12800
|
| 71 |
max_length: ${max_len}
|
| 72 |
max_train_steps: 30600
|
| 73 |
min_routing_tokens: 8
|
|
|
|
| 74 |
mode: Stage1
|
| 75 |
model:
|
| 76 |
arch: hnet
|
|
@@ -105,6 +123,9 @@ model_cfg:
|
|
| 105 |
tie_embeddings: true
|
| 106 |
vocab_size: 12
|
| 107 |
name: hnet_base
|
|
|
|
|
|
|
|
|
|
| 108 |
private: false
|
| 109 |
project_root: ${hydra:runtime.cwd}
|
| 110 |
r_hi: 0.3
|
|
@@ -120,6 +141,7 @@ source: ${dataset.type}
|
|
| 120 |
strictness_exp: 1.0
|
| 121 |
strictness_max: 0
|
| 122 |
tokenizer: fast
|
|
|
|
| 123 |
training:
|
| 124 |
adam_beta1: 0.9
|
| 125 |
adam_beta2: 0.95
|
|
@@ -136,9 +158,9 @@ training:
|
|
| 136 |
label_names:
|
| 137 |
- input_ids
|
| 138 |
learning_rate: ${lr}
|
| 139 |
-
logging_steps:
|
| 140 |
lr_scheduler_type: linear
|
| 141 |
-
max_grad_norm:
|
| 142 |
max_train_steps: ${max_train_steps}
|
| 143 |
num_train_epochs: ${epochs}
|
| 144 |
output_dir: ${dirs.output}
|
|
@@ -153,7 +175,7 @@ training:
|
|
| 153 |
warmup_steps: 500
|
| 154 |
weight_decay: 0.1
|
| 155 |
training_alias: ${mode}_glm_s1_${region_info}_bp${bp_per_token}_aw${warmup_steps}_amax${alpha_max}_smax${strictness_max}_lr${lr}_e${epochs}_ms${max_train_steps}_maxlen${max_len}
|
| 156 |
-
uid:
|
| 157 |
upload_to_hf: true
|
| 158 |
use_routing_floor: false
|
| 159 |
use_wandb: true
|
|
@@ -162,13 +184,13 @@ version: NA
|
|
| 162 |
wandb:
|
| 163 |
dir: ${dirs.wandb_cache}
|
| 164 |
entity: ${oc.select:env.vars.wandb_entity,${oc.env:WANDB_ENTITY,null}}
|
| 165 |
-
id:
|
| 166 |
mode: online
|
| 167 |
name: Mamba2_12.8K-100B
|
| 168 |
-
project:
|
| 169 |
step_metric: null
|
| 170 |
tags:
|
| 171 |
- ${mode}
|
| 172 |
-
url: https://wandb.ai/jzshared/
|
| 173 |
warmup_steps: 0
|
| 174 |
world_size: 8
|
|
|
|
| 28 |
hf_path: null
|
| 29 |
path: data/hg38_cds_dataset_4m_filtered
|
| 30 |
type: refseq
|
| 31 |
+
orca32m_cmp_seq:
|
| 32 |
+
eval_split: validation
|
| 33 |
+
hf_path: jzshared/orca32m_cmp
|
| 34 |
+
label_key: label_ut
|
| 35 |
+
mask_key: mask_ut
|
| 36 |
+
num_workers: 0
|
| 37 |
+
path: data/orca32m_cmp_seq
|
| 38 |
+
pin_memory: true
|
| 39 |
+
reference_id: hg38
|
| 40 |
+
sequence_format: string
|
| 41 |
+
sequence_key: sequence
|
| 42 |
+
shuffle: true
|
| 43 |
+
test_split: test
|
| 44 |
+
train_split: train
|
| 45 |
+
type: cmp_seq
|
| 46 |
alias: Mamba2_12.8K-100B
|
| 47 |
alpha_exp: 1.0
|
| 48 |
alpha_max: 0.03
|
|
|
|
| 52 |
cluster: mila
|
| 53 |
cmd: python src/scripts/train_genezip_v1.py exp=glm/stage1 data=gencode_human_12.8k
|
| 54 |
model=hnet/mamba2 max_len=12800 batch_size=8 grad_acc_steps=4 max_train_steps=30600
|
| 55 |
+
eval_steps=100 upload_to_hf=true wandb.project=DNAFM_v2 use_routing_floor=false
|
| 56 |
+
strictness_max=0 alias=Mamba2_12.8K-100B use_wandb=true hf_repo=jzshared/Mamba2_12.8K-100B
|
| 57 |
config_path: null
|
| 58 |
data: gencode_human_12.8k
|
| 59 |
data_alias: ${.data}_${max_len}
|
|
|
|
| 75 |
hf_user: jzshared
|
| 76 |
is_distributed: true
|
| 77 |
local_rank: 0
|
| 78 |
+
log_every: 10
|
| 79 |
logging:
|
| 80 |
level: info
|
| 81 |
log_wandb_metric_to_stdout: true
|
| 82 |
lr: 0.001
|
| 83 |
+
master_port: '43837'
|
| 84 |
max_data_samples: null
|
| 85 |
+
max_eval_samples: ${num_valid_samples}
|
| 86 |
+
max_grad_norm: 2.0
|
| 87 |
max_len: 12800
|
| 88 |
max_length: ${max_len}
|
| 89 |
max_train_steps: 30600
|
| 90 |
min_routing_tokens: 8
|
| 91 |
+
mixed_precision: bf16
|
| 92 |
mode: Stage1
|
| 93 |
model:
|
| 94 |
arch: hnet
|
|
|
|
| 123 |
tie_embeddings: true
|
| 124 |
vocab_size: 12
|
| 125 |
name: hnet_base
|
| 126 |
+
num_test_samples: 0
|
| 127 |
+
num_train_samples: 0
|
| 128 |
+
num_valid_samples: 0
|
| 129 |
private: false
|
| 130 |
project_root: ${hydra:runtime.cwd}
|
| 131 |
r_hi: 0.3
|
|
|
|
| 141 |
strictness_exp: 1.0
|
| 142 |
strictness_max: 0
|
| 143 |
tokenizer: fast
|
| 144 |
+
train_steps: 9999999
|
| 145 |
training:
|
| 146 |
adam_beta1: 0.9
|
| 147 |
adam_beta2: 0.95
|
|
|
|
| 158 |
label_names:
|
| 159 |
- input_ids
|
| 160 |
learning_rate: ${lr}
|
| 161 |
+
logging_steps: ${log_every}
|
| 162 |
lr_scheduler_type: linear
|
| 163 |
+
max_grad_norm: ${max_grad_norm}
|
| 164 |
max_train_steps: ${max_train_steps}
|
| 165 |
num_train_epochs: ${epochs}
|
| 166 |
output_dir: ${dirs.output}
|
|
|
|
| 175 |
warmup_steps: 500
|
| 176 |
weight_decay: 0.1
|
| 177 |
training_alias: ${mode}_glm_s1_${region_info}_bp${bp_per_token}_aw${warmup_steps}_amax${alpha_max}_smax${strictness_max}_lr${lr}_e${epochs}_ms${max_train_steps}_maxlen${max_len}
|
| 178 |
+
uid: 8vsgvsbs
|
| 179 |
upload_to_hf: true
|
| 180 |
use_routing_floor: false
|
| 181 |
use_wandb: true
|
|
|
|
| 184 |
wandb:
|
| 185 |
dir: ${dirs.wandb_cache}
|
| 186 |
entity: ${oc.select:env.vars.wandb_entity,${oc.env:WANDB_ENTITY,null}}
|
| 187 |
+
id: 8vsgvsbs
|
| 188 |
mode: online
|
| 189 |
name: Mamba2_12.8K-100B
|
| 190 |
+
project: DNAFM_v2
|
| 191 |
step_metric: null
|
| 192 |
tags:
|
| 193 |
- ${mode}
|
| 194 |
+
url: https://wandb.ai/jzshared/DNAFM_v2/runs/8vsgvsbs
|
| 195 |
warmup_steps: 0
|
| 196 |
world_size: 8
|