| |
|
| |
|
| | seed: 42 |
| | base_dir: /scratch/sgoel/MeMDLM_v2 |
| |
|
| |
|
| | lm: |
| | pretrained_esm: facebook/esm2_t33_650M_UR50D |
| | pretrained_evoflow: fredzzp/EvoFlow-650M-context-3070 |
| | pretrained_dplm: airkingbd/dplm_650m |
| | ft_evoflow: ft_eflow-3070-650M_steps=50k_layers=3_lr=0.00004_wd=.01_polynom_pwr=1_betas=.9-.98_bsz=8_gclip=1.0 |
| | ft_dplm: ft_dplm-650M_steps=5k_layers=3_lr=0.00004_wd=.01_polynom_pwr=1_betas=.9-.98_bsz=32_gclip=1.0 |
| |
|
| | model: |
| | d_model: 1280 |
| | num_heads: 2 |
| | dropout: 0.5 |
| | num_layers: 4 |
| | label_pad_value: -100 |
| |
|
| | optim: |
| | type: adamw |
| | lr: 3e-5 |
| | lr_end: 1e-5 |
| | weight_decay: 0.01 |
| | beta1: 0.9 |
| | beta2: 0.98 |
| | power: 1 |
| |
|
| |
|
| | training: |
| | mode: test |
| | n_layers: 4 |
| | max_steps: 3000 |
| | warmup_steps: 150 |
| | log_every_n_steps: 10 |
| | num_sanity_val_steps: 2 |
| | val_check_interval: 250 |
| | enable_progress_bar: true |
| | grad_clip_val: 1.0 |
| | devices: [0] |
| |
|
| | guidance: |
| | n_steps: 128 |
| | alpha: 3 |
| | gamma: 0.3 |
| | saliency_eps: 1e-4 |
| | saliency_t: 2.0 |
| | sampling_t: 0.7 |
| | boltzmann_t: 0.3 |
| | top_p: 0.2 |
| | steps: 128 |
| | prior: lm_probs |
| |
|
| | data: |
| | batch_size: 32 |
| | max_seq_len: 1024 |
| | train: ${base_dir}/data/classifier/train.csv |
| | test: ${base_dir}/data/classifier/test.csv |
| | val: ${base_dir}/data/classifier/val.csv |
| |
|
| |
|
| | wandb: |
| | project: memdlm_guidance |
| | group: programmablebio |
| | name: new_data_cleaned_steps3k_lr3e-5_bsz32_heads2_drpt0.5_layers4 |
| | id: ${.name}_${seed} |
| |
|
| |
|
| | checkpointing: |
| | save_every_n_steps: 250 |
| | save_dir: ${base_dir}/checkpoints/${wandb.name} |
| | resume_ckpt_path: ${checkpointing.save_dir}/last.ckpt |
| | best_ckpt_path: ${checkpointing.save_dir}/best_model.ckpt |