seed: 42 base_dir: /scratch/sgoel/MeMDLM_v2 lm: pretrained_esm: facebook/esm2_t33_650M_UR50D pretrained_evoflow: fredzzp/EvoFlow-650M-context-3070 pretrained_dplm: airkingbd/dplm_650m ft_evoflow: ft_eflow-3070-650M_steps=50k_layers=3_lr=0.00004_wd=.01_polynom_pwr=1_betas=.9-.98_bsz=8_gclip=1.0 ft_dplm: ft_dplm-650M_steps=5k_layers=3_lr=0.00004_wd=.01_polynom_pwr=1_betas=.9-.98_bsz=32_gclip=1.0 model: d_model: 1280 num_heads: 2 dropout: 0.5 num_layers: 4 label_pad_value: -100 optim: type: adamw lr: 3e-5 lr_end: 1e-5 weight_decay: 0.01 beta1: 0.9 beta2: 0.98 power: 1 training: mode: test # train / test n_layers: 4 max_steps: 3000 warmup_steps: 150 log_every_n_steps: 10 num_sanity_val_steps: 2 val_check_interval: 250 enable_progress_bar: true grad_clip_val: 1.0 devices: [0] # list of GPU IDs from 0-7 guidance: n_steps: 128 alpha: 3 gamma: 0.3 saliency_eps: 1e-4 saliency_t: 2.0 sampling_t: 0.7 boltzmann_t: 0.3 top_p: 0.2 steps: 128 prior: lm_probs # lm_probs / boltzmann data: batch_size: 32 max_seq_len: 1024 train: ${base_dir}/data/classifier/train.csv test: ${base_dir}/data/classifier/test.csv val: ${base_dir}/data/classifier/val.csv wandb: project: memdlm_guidance group: programmablebio name: new_data_cleaned_steps3k_lr3e-5_bsz32_heads2_drpt0.5_layers4 id: ${.name}_${seed} checkpointing: save_every_n_steps: 250 save_dir: ${base_dir}/checkpoints/${wandb.name} resume_ckpt_path: ${checkpointing.save_dir}/last.ckpt best_ckpt_path: ${checkpointing.save_dir}/best_model.ckpt