| experiment_name: esm2-3B_final | |
| seed: 42 | |
| pytorch: | |
| matmul_precision: medium | |
| enable_tf32: true | |
| model: | |
| n_layer: 12 | |
| n_head: 12 | |
| n_embd: 900 | |
| block_size: 2048 | |
| bias: true | |
| dropout: 0.1 | |
| protein_embedding_dim: 2560 | |
| embed_model_path: facebook/esm2_t36_3B_UR50D | |
| protein_layer_index: 30 | |
| use_gated_attention: true | |
| data: | |
| preprocessed_path: ${hydra:runtime.cwd}/preprocessed_data/final_experiment | |
| batch_size: 10 | |
| num_workers: 4 | |
| persistent_workers: true | |
| prefetch_factor: 8 | |
| pin_memory: false | |
| training: | |
| learning_rate: 0.0001 | |
| warmup_fraction: 0.1 | |
| min_lr_ratio: 0.1 | |
| weight_decay: 0.01 | |
| gradient_clip_val: 1.0 | |
| log_generations: false | |
| accumulate_grad_batches: 4 | |
| max_epochs: 100 | |
| check_val_every_n_epoch: 1 | |
| limit_val_batches: 0.5 | |
| num_sanity_val_steps: 0 | |
| log_every_n_steps: 100 | |
| devices: 4 | |
| accelerator: gpu | |
| precision: bf16-mixed | |
| strategy: ddp_find_unused_parameters_true | |
| enable_progress_bar: true | |
| resume: | |
| checkpoint_path: outputs/esm2-3B_final/2025-12-10_00-39-55/checkpoints/last.ckpt | |
| checkpoints: | |
| - monitor: val_f1 | |
| mode: max | |
| save_top_k: 2 | |
| save_last: false | |
| filename: best-{epoch:02d}-{val_f1:.3f} | |
| dirpath: checkpoints/best | |
| every_n_epochs: 1 | |
| - monitor: null | |
| save_top_k: -1 | |
| save_last: false | |
| filename: epoch-{epoch:02d} | |
| dirpath: checkpoints/periodic | |
| every_n_epochs: 10 | |
| save_on_train_epoch_end: true | |
| - monitor: null | |
| save_top_k: 0 | |
| save_last: true | |
| filename: last | |
| dirpath: checkpoints | |
| wandb: | |
| enabled: true | |
| offline: false | |
| project: GO-GPT | |
| group: ${experiment_name} | |
| name: run_${now:%Y-%m-%d_%H-%M-%S} | |
| log_model: false | |
| save_dir: wandb | |