si264
/

era-directed-evolution

+nn:
+  model: GeometricTransformer
+  dataset: GeometricTransformerDataset
+  dtype: float32
+  device: cuda
+  data_in_memory: false
+  load_model: null
+  batch_size: 128
+  num_workers: 4
+  collate_fn: geometric_transformer_collate_fn
+  model_args:
+    dim_model: 1536
+    unified_transformer_args:
+      n_layers: 48
+      geom_layer_indices:
+      - 0
+      mha_layer_indices:
+      - 0
+      - 1
+      - 2
+      - 3
+      - 4
+      - 5
+      - 6
+      - 7
+      - 8
+      - 9
+      - 10
+      - 11
+      - 12
+      - 13
+      - 14
+      - 15
+      - 16
+      - 17
+      - 18
+      - 19
+      - 20
+      - 21
+      - 22
+      - 23
+      - 24
+      - 25
+      - 26
+      - 27
+      - 28
+      - 29
+      - 30
+      - 31
+      - 32
+      - 33
+      - 34
+      - 35
+      - 36
+      - 37
+      - 38
+      - 39
+      - 40
+      - 41
+      - 42
+      - 43
+      - 44
+      - 45
+      - 46
+      - 47
+      bias: false
+      mha_args:
+        num_heads: 24
+        bias: false
+        qk_layernorm: true
+      gha_args:
+        num_heads: 256
+        num_vector_messages: 1
+        mask_and_zero_frameless: true
+        bias: false
+      scaling_factor: 1.1547005383792515
+      ffn_type: swiglu
+      norm_type: layer_norm
+      expansion_ratio: 2.66666666667
+    struc_token_info:
+      mask: 4096
+      eos: 4097
+      bos: 4098
+      pad: 4099
+      total: 5001
+      max_non_special_token: 4095
+    residue_token_info:
+      mask: 32
+      eos: 2
+      bos: 0
+      pad: 1
+      total: 33
+      max_non_special_token: null
+    sasa_token_info:
+      mask: 0
+      eos: 0
+      bos: 0
+      pad: 0
+      total: null
+      max_non_special_token: null
+    sec_struct_token_info:
+      mask: 0
+      eos: 0
+      bos: 0
+      pad: 0
+      total: null
+      max_non_special_token: null
+    res_annot_token_info:
+      mask: 0
+      eos: 0
+      bos: 0
+      pad: 0
+      total: null
+      max_non_special_token: null
+  dataset_split_args:
+    train: 0.8
+    val: 0.2
+    test: 0.0
+train:
+  lightning_model: TransformerModel
+  resume_training_path: null
+  lightning_model_args:
+    eval_type: sft
+    beta: null
+    gamma: null
+    sampling_temperature: null
+    optimizer: Adam
+    optimizer_args:
+      lr: 0.0004
+      betas:
+      - 0.9
+      - 0.95
+      weight_decay: 0.01
+    lr_scheduler: LinearWarmupCosineAnnealingLR
+    lr_scheduler_args:
+      warmup_epochs: 250000
+      max_epochs: 2500000
+      eta_min: 4.0e-05
+    interval: step
+    monitor: val/CELoss
+    sync_dist: true
+    on_step: true
+  trainer_args:
+    eval_type: era
+    accelerator: cuda
+    devices: 1
+    strategy: auto
+    log_every_n_steps: 500
+    max_epochs: 10000
+    enable_progress_bar: false
+    gradient_clip_val: 1.0
+  logger:
+    loggertype: TensorBoard
+  seed_args:
+    seed: 42
+    workers: true
+global_args:
+  dataset_filename: /scratch/group_scratch/era/directed_evolution/datasets/gb1/gb1_tokenized.h5
+  keys_to_test:
+  - nn.model
+  - nn.model_args