| nn: | |
| model: GeometricTransformer | |
| dataset: GeometricTransformerDataset | |
| dtype: float32 | |
| device: cuda | |
| data_in_memory: false | |
| load_model: null | |
| batch_size: 128 | |
| num_workers: 4 | |
| collate_fn: geometric_transformer_collate_fn | |
| model_args: | |
| dim_model: 1536 | |
| unified_transformer_args: | |
| n_layers: 48 | |
| geom_layer_indices: | |
| - 0 | |
| mha_layer_indices: | |
| - 0 | |
| - 1 | |
| - 2 | |
| - 3 | |
| - 4 | |
| - 5 | |
| - 6 | |
| - 7 | |
| - 8 | |
| - 9 | |
| - 10 | |
| - 11 | |
| - 12 | |
| - 13 | |
| - 14 | |
| - 15 | |
| - 16 | |
| - 17 | |
| - 18 | |
| - 19 | |
| - 20 | |
| - 21 | |
| - 22 | |
| - 23 | |
| - 24 | |
| - 25 | |
| - 26 | |
| - 27 | |
| - 28 | |
| - 29 | |
| - 30 | |
| - 31 | |
| - 32 | |
| - 33 | |
| - 34 | |
| - 35 | |
| - 36 | |
| - 37 | |
| - 38 | |
| - 39 | |
| - 40 | |
| - 41 | |
| - 42 | |
| - 43 | |
| - 44 | |
| - 45 | |
| - 46 | |
| - 47 | |
| bias: false | |
| mha_args: | |
| num_heads: 24 | |
| bias: false | |
| qk_layernorm: true | |
| gha_args: | |
| num_heads: 256 | |
| num_vector_messages: 1 | |
| mask_and_zero_frameless: true | |
| bias: false | |
| scaling_factor: 1.1547005383792515 | |
| ffn_type: swiglu | |
| norm_type: layer_norm | |
| expansion_ratio: 2.66666666667 | |
| struc_token_info: | |
| mask: 4096 | |
| eos: 4097 | |
| bos: 4098 | |
| pad: 4099 | |
| total: 5001 | |
| max_non_special_token: 4095 | |
| residue_token_info: | |
| mask: 32 | |
| eos: 2 | |
| bos: 0 | |
| pad: 1 | |
| total: 33 | |
| max_non_special_token: null | |
| sasa_token_info: | |
| mask: 0 | |
| eos: 0 | |
| bos: 0 | |
| pad: 0 | |
| total: null | |
| max_non_special_token: null | |
| sec_struct_token_info: | |
| mask: 0 | |
| eos: 0 | |
| bos: 0 | |
| pad: 0 | |
| total: null | |
| max_non_special_token: null | |
| res_annot_token_info: | |
| mask: 0 | |
| eos: 0 | |
| bos: 0 | |
| pad: 0 | |
| total: null | |
| max_non_special_token: null | |
| dataset_split_args: | |
| train: 0.8 | |
| val: 0.2 | |
| test: 0.0 | |
| train: | |
| lightning_model: TransformerModel | |
| resume_training_path: null | |
| lightning_model_args: | |
| eval_type: sft | |
| beta: null | |
| gamma: null | |
| sampling_temperature: null | |
| optimizer: Adam | |
| optimizer_args: | |
| lr: 0.0004 | |
| betas: | |
| - 0.9 | |
| - 0.95 | |
| weight_decay: 0.01 | |
| lr_scheduler: LinearWarmupCosineAnnealingLR | |
| lr_scheduler_args: | |
| warmup_epochs: 250000 | |
| max_epochs: 2500000 | |
| eta_min: 4.0e-05 | |
| interval: step | |
| monitor: val/CELoss | |
| sync_dist: true | |
| on_step: true | |
| trainer_args: | |
| eval_type: era | |
| accelerator: cuda | |
| devices: 1 | |
| strategy: auto | |
| log_every_n_steps: 500 | |
| max_epochs: 10000 | |
| enable_progress_bar: false | |
| gradient_clip_val: 1.0 | |
| logger: | |
| loggertype: TensorBoard | |
| seed_args: | |
| seed: 42 | |
| workers: true | |
| global_args: | |
| dataset_filename: /scratch/group_scratch/era/directed_evolution/datasets/gb1/gb1_tokenized.h5 | |
| keys_to_test: | |
| - nn.model | |
| - nn.model_args | |