File size: 1,608 Bytes
d04a061 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
seed: 42
base_dir: /scratch/sgoel/MeMDLM_v2
lm:
pretrained_esm: facebook/esm2_t33_650M_UR50D
pretrained_evoflow: fredzzp/EvoFlow-650M-context-3070
pretrained_dplm: airkingbd/dplm_650m
ft_evoflow: ft_eflow-3070-650M_steps=50k_layers=3_lr=0.00004_wd=.01_polynom_pwr=1_betas=.9-.98_bsz=8_gclip=1.0
ft_dplm: ft_dplm-650M_steps=5k_layers=3_lr=0.00004_wd=.01_polynom_pwr=1_betas=.9-.98_bsz=32_gclip=1.0
model:
d_model: 1280
num_heads: 2
dropout: 0.5
num_layers: 4
label_pad_value: -100
optim:
type: adamw
lr: 3e-5
lr_end: 1e-5
weight_decay: 0.01
beta1: 0.9
beta2: 0.98
power: 1
training:
mode: test # train / test
n_layers: 4
max_steps: 3000
warmup_steps: 150
log_every_n_steps: 10
num_sanity_val_steps: 2
val_check_interval: 250
enable_progress_bar: true
grad_clip_val: 1.0
devices: [0] # list of GPU IDs from 0-7
guidance:
n_steps: 128
alpha: 3
gamma: 0.3
saliency_eps: 1e-4
saliency_t: 2.0
sampling_t: 0.7
boltzmann_t: 0.3
top_p: 0.2
steps: 128
prior: lm_probs # lm_probs / boltzmann
data:
batch_size: 32
max_seq_len: 1024
train: ${base_dir}/data/classifier/train.csv
test: ${base_dir}/data/classifier/test.csv
val: ${base_dir}/data/classifier/val.csv
wandb:
project: memdlm_guidance
group: programmablebio
name: new_data_cleaned_steps3k_lr3e-5_bsz32_heads2_drpt0.5_layers4
id: ${.name}_${seed}
checkpointing:
save_every_n_steps: 250
save_dir: ${base_dir}/checkpoints/${wandb.name}
resume_ckpt_path: ${checkpointing.save_dir}/last.ckpt
best_ckpt_path: ${checkpointing.save_dir}/best_model.ckpt |