File size: 1,659 Bytes
d04a061 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
seed: 42
base_dir: /scratch/pranamlab/sgoel/MeMDLM_v2
lm:
pretrained_esm: facebook/esm2_t33_650M_UR50D
pretrained_evoflow: fredzzp/EvoFlow-650M-context-3070
pretrained_dplm: airkingbd/dplm_650m
pretrained_progen: hugohrban/progen2-base
num_diffusion_timesteps: 500
weight_type: linear # constant / linear
optim:
type: adamw
scheduler: polynomial
lr: 0.00004
lr_end: 1e-5
warmup_init_lr: 1e-07
weight_decay: 0.01
beta1: 0.9
beta2: 0.98
power: 1
training:
mode: train # train / test / resume_from_checkpoint
n_layers: 3
max_steps: 5000
warmup_steps: 25
log_every_n_steps: 10
num_sanity_val_steps: 2
val_check_interval: 250
enable_progress_bar: true
grad_clip_val: 1.0
devices: [0,1,2] # list of GPU IDs
sampling:
n_steps: 128
data:
batch_size: 8
max_seq_len: 1024
train: ${base_dir}/data/new/train.csv
test: ${base_dir}/data/new/test.csv
val: ${base_dir}/data/new/val.csv
wandb:
project: memdlm
group: programmablebio
name: ft_eflow-3070-650M_steps=5k_layers=3_lr=0.00004_wd=.01_polynom_pwr=1_betas=.9-.98_bsz=8_gclip=1.0_ml=1024
# name: ft_progen-base-764M_steps=50k_layers=2_lr=0.00004_wd=.1_cosine-to-frac_betas=.9-.999_bsz=8_gclip=0.8
# name: ft_dplm-650M_steps=5k_layers=3_lr=0.00004_wd=.01_polynom_pwr=1_betas=.9-.98_bsz=32_gclip=1.0
# name: ft_esm-650M_steps=3k_layers=3_lr=0.00004_wd=.01_polynom_pwr=1_betas=.9-.98_bsz=32_gclip=1.0
id: ${.name}_${seed}
checkpointing:
save_every_n_steps: 250
save_dir: ${base_dir}/checkpoints/${wandb.name}
resume_ckpt_path: ${checkpointing.save_dir}/last.ckpt
best_ckpt_path: ${checkpointing.save_dir}/best_model.ckpt |