experiment_name: esm2-3B_final seed: 42 pytorch: matmul_precision: medium enable_tf32: true model: n_layer: 12 n_head: 12 n_embd: 900 block_size: 2048 bias: true dropout: 0.1 protein_embedding_dim: 2560 embed_model_path: facebook/esm2_t36_3B_UR50D protein_layer_index: 30 use_gated_attention: true data: preprocessed_path: ${hydra:runtime.cwd}/preprocessed_data/final_experiment batch_size: 10 num_workers: 4 persistent_workers: true prefetch_factor: 8 pin_memory: false training: learning_rate: 0.0001 warmup_fraction: 0.1 min_lr_ratio: 0.1 weight_decay: 0.01 gradient_clip_val: 1.0 log_generations: false accumulate_grad_batches: 4 max_epochs: 100 check_val_every_n_epoch: 1 limit_val_batches: 0.5 num_sanity_val_steps: 0 log_every_n_steps: 100 devices: 4 accelerator: gpu precision: bf16-mixed strategy: ddp_find_unused_parameters_true enable_progress_bar: true resume: checkpoint_path: outputs/esm2-3B_final/2025-12-10_00-39-55/checkpoints/last.ckpt checkpoints: - monitor: val_f1 mode: max save_top_k: 2 save_last: false filename: best-{epoch:02d}-{val_f1:.3f} dirpath: checkpoints/best every_n_epochs: 1 - monitor: null save_top_k: -1 save_last: false filename: epoch-{epoch:02d} dirpath: checkpoints/periodic every_n_epochs: 10 save_on_train_epoch_end: true - monitor: null save_top_k: 0 save_last: true filename: last dirpath: checkpoints wandb: enabled: true offline: false project: GO-GPT group: ${experiment_name} name: run_${now:%Y-%m-%d_%H-%M-%S} log_model: false save_dir: wandb