si264 commited on
Commit
6cba9d9
·
verified ·
1 Parent(s): 8e2ca80

Upload 2 files

Browse files
Files changed (2) hide show
  1. base_model.pt +3 -0
  2. config.yaml +161 -0
base_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74b58709966eba09683b17b66c5c24542921eb4a1c93a64cf9ae98ee2e869c8f
3
+ size 5607163039
config.yaml ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ nn:
2
+ model: GeometricTransformer
3
+ dataset: GeometricTransformerDataset
4
+ dtype: float32
5
+ device: cuda
6
+ data_in_memory: false
7
+ load_model: null
8
+ batch_size: 128
9
+ num_workers: 4
10
+ collate_fn: geometric_transformer_collate_fn
11
+ model_args:
12
+ dim_model: 1536
13
+ unified_transformer_args:
14
+ n_layers: 48
15
+ geom_layer_indices:
16
+ - 0
17
+ mha_layer_indices:
18
+ - 0
19
+ - 1
20
+ - 2
21
+ - 3
22
+ - 4
23
+ - 5
24
+ - 6
25
+ - 7
26
+ - 8
27
+ - 9
28
+ - 10
29
+ - 11
30
+ - 12
31
+ - 13
32
+ - 14
33
+ - 15
34
+ - 16
35
+ - 17
36
+ - 18
37
+ - 19
38
+ - 20
39
+ - 21
40
+ - 22
41
+ - 23
42
+ - 24
43
+ - 25
44
+ - 26
45
+ - 27
46
+ - 28
47
+ - 29
48
+ - 30
49
+ - 31
50
+ - 32
51
+ - 33
52
+ - 34
53
+ - 35
54
+ - 36
55
+ - 37
56
+ - 38
57
+ - 39
58
+ - 40
59
+ - 41
60
+ - 42
61
+ - 43
62
+ - 44
63
+ - 45
64
+ - 46
65
+ - 47
66
+ bias: false
67
+ mha_args:
68
+ num_heads: 24
69
+ bias: false
70
+ qk_layernorm: true
71
+ gha_args:
72
+ num_heads: 256
73
+ num_vector_messages: 1
74
+ mask_and_zero_frameless: true
75
+ bias: false
76
+ scaling_factor: 1.1547005383792515
77
+ ffn_type: swiglu
78
+ norm_type: layer_norm
79
+ expansion_ratio: 2.66666666667
80
+ struc_token_info:
81
+ mask: 4096
82
+ eos: 4097
83
+ bos: 4098
84
+ pad: 4099
85
+ total: 5001
86
+ max_non_special_token: 4095
87
+ residue_token_info:
88
+ mask: 32
89
+ eos: 2
90
+ bos: 0
91
+ pad: 1
92
+ total: 33
93
+ max_non_special_token: null
94
+ sasa_token_info:
95
+ mask: 0
96
+ eos: 0
97
+ bos: 0
98
+ pad: 0
99
+ total: null
100
+ max_non_special_token: null
101
+ sec_struct_token_info:
102
+ mask: 0
103
+ eos: 0
104
+ bos: 0
105
+ pad: 0
106
+ total: null
107
+ max_non_special_token: null
108
+ res_annot_token_info:
109
+ mask: 0
110
+ eos: 0
111
+ bos: 0
112
+ pad: 0
113
+ total: null
114
+ max_non_special_token: null
115
+ dataset_split_args:
116
+ train: 0.8
117
+ val: 0.2
118
+ test: 0.0
119
+ train:
120
+ lightning_model: TransformerModel
121
+ resume_training_path: null
122
+ lightning_model_args:
123
+ eval_type: sft
124
+ beta: null
125
+ gamma: null
126
+ sampling_temperature: null
127
+ optimizer: Adam
128
+ optimizer_args:
129
+ lr: 0.0004
130
+ betas:
131
+ - 0.9
132
+ - 0.95
133
+ weight_decay: 0.01
134
+ lr_scheduler: LinearWarmupCosineAnnealingLR
135
+ lr_scheduler_args:
136
+ warmup_epochs: 250000
137
+ max_epochs: 2500000
138
+ eta_min: 4.0e-05
139
+ interval: step
140
+ monitor: val/CELoss
141
+ sync_dist: true
142
+ on_step: true
143
+ trainer_args:
144
+ eval_type: era
145
+ accelerator: cuda
146
+ devices: 1
147
+ strategy: auto
148
+ log_every_n_steps: 500
149
+ max_epochs: 10000
150
+ enable_progress_bar: false
151
+ gradient_clip_val: 1.0
152
+ logger:
153
+ loggertype: TensorBoard
154
+ seed_args:
155
+ seed: 42
156
+ workers: true
157
+ global_args:
158
+ dataset_filename: /scratch/group_scratch/era/directed_evolution/datasets/gb1/gb1_tokenized.h5
159
+ keys_to_test:
160
+ - nn.model
161
+ - nn.model_args