File size: 2,934 Bytes
6cba9d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
nn:
  model: GeometricTransformer
  dataset: GeometricTransformerDataset
  dtype: float32
  device: cuda
  data_in_memory: false
  load_model: null
  batch_size: 128
  num_workers: 4
  collate_fn: geometric_transformer_collate_fn
  model_args:
    dim_model: 1536
    unified_transformer_args:
      n_layers: 48
      geom_layer_indices:
      - 0
      mha_layer_indices:
      - 0
      - 1
      - 2
      - 3
      - 4
      - 5
      - 6
      - 7
      - 8
      - 9
      - 10
      - 11
      - 12
      - 13
      - 14
      - 15
      - 16
      - 17
      - 18
      - 19
      - 20
      - 21
      - 22
      - 23
      - 24
      - 25
      - 26
      - 27
      - 28
      - 29
      - 30
      - 31
      - 32
      - 33
      - 34
      - 35
      - 36
      - 37
      - 38
      - 39
      - 40
      - 41
      - 42
      - 43
      - 44
      - 45
      - 46
      - 47
      bias: false
      mha_args:
        num_heads: 24
        bias: false
        qk_layernorm: true
      gha_args:
        num_heads: 256
        num_vector_messages: 1
        mask_and_zero_frameless: true
        bias: false
      scaling_factor: 1.1547005383792515
      ffn_type: swiglu
      norm_type: layer_norm
      expansion_ratio: 2.66666666667
    struc_token_info:
      mask: 4096
      eos: 4097
      bos: 4098
      pad: 4099
      total: 5001
      max_non_special_token: 4095
    residue_token_info:
      mask: 32
      eos: 2
      bos: 0
      pad: 1
      total: 33
      max_non_special_token: null
    sasa_token_info:
      mask: 0
      eos: 0
      bos: 0
      pad: 0
      total: null
      max_non_special_token: null
    sec_struct_token_info:
      mask: 0
      eos: 0
      bos: 0
      pad: 0
      total: null
      max_non_special_token: null
    res_annot_token_info:
      mask: 0
      eos: 0
      bos: 0
      pad: 0
      total: null
      max_non_special_token: null
  dataset_split_args:
    train: 0.8
    val: 0.2
    test: 0.0
train:
  lightning_model: TransformerModel
  resume_training_path: null
  lightning_model_args:
    eval_type: sft
    beta: null
    gamma: null
    sampling_temperature: null
    optimizer: Adam
    optimizer_args:
      lr: 0.0004
      betas:
      - 0.9
      - 0.95
      weight_decay: 0.01
    lr_scheduler: LinearWarmupCosineAnnealingLR
    lr_scheduler_args:
      warmup_epochs: 250000
      max_epochs: 2500000
      eta_min: 4.0e-05
    interval: step
    monitor: val/CELoss
    sync_dist: true
    on_step: true
  trainer_args:
    eval_type: era
    accelerator: cuda
    devices: 1
    strategy: auto
    log_every_n_steps: 500
    max_epochs: 10000
    enable_progress_bar: false
    gradient_clip_val: 1.0
  logger:
    loggertype: TensorBoard
  seed_args:
    seed: 42
    workers: true
global_args:
  dataset_filename: /scratch/group_scratch/era/directed_evolution/datasets/gb1/gb1_tokenized.h5
  keys_to_test:
  - nn.model
  - nn.model_args