si264 commited on
Commit
281597c
·
verified ·
1 Parent(s): 6cba9d9

Upload 2 files

Browse files
megascale_pretrain_config.yaml ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ nn:
2
+ model: GeometricTransformer
3
+ dataset: GeometricTransformerERADatasetPretrain
4
+ dtype: float32
5
+ device: cuda
6
+ data_in_memory: false
7
+ load_model: /data2/scratch/group_scratch/era/protein_era/models/esm3clm/esm3_clm.pt
8
+ batch_size: 64
9
+ num_workers: 4
10
+ collate_fn: geometric_transformer_era_pretrain_collate_fn
11
+ model_args:
12
+ dim_model: 1536
13
+ unified_transformer_args:
14
+ n_layers: 48
15
+ geom_layer_indices:
16
+ - 0
17
+ mha_layer_indices:
18
+ - 0
19
+ - 1
20
+ - 2
21
+ - 3
22
+ - 4
23
+ - 5
24
+ - 6
25
+ - 7
26
+ - 8
27
+ - 9
28
+ - 10
29
+ - 11
30
+ - 12
31
+ - 13
32
+ - 14
33
+ - 15
34
+ - 16
35
+ - 17
36
+ - 18
37
+ - 19
38
+ - 20
39
+ - 21
40
+ - 22
41
+ - 23
42
+ - 24
43
+ - 25
44
+ - 26
45
+ - 27
46
+ - 28
47
+ - 29
48
+ - 30
49
+ - 31
50
+ - 32
51
+ - 33
52
+ - 34
53
+ - 35
54
+ - 36
55
+ - 37
56
+ - 38
57
+ - 39
58
+ - 40
59
+ - 41
60
+ - 42
61
+ - 43
62
+ - 44
63
+ - 45
64
+ - 46
65
+ - 47
66
+ bias: false
67
+ mha_args:
68
+ num_heads: 24
69
+ bias: false
70
+ qk_layernorm: true
71
+ gha_args:
72
+ num_heads: 256
73
+ num_vector_messages: 1
74
+ mask_and_zero_frameless: true
75
+ bias: false
76
+ scaling_factor: 1.1547005383792515
77
+ ffn_type: swiglu
78
+ norm_type: layer_norm
79
+ expansion_ratio: 2.66666666667
80
+ ida_layer_indices: []
81
+ struc_token_info:
82
+ mask: 4096
83
+ eos: 4097
84
+ bos: 4098
85
+ pad: 4099
86
+ total: 5001
87
+ max_non_special_token: 4095
88
+ residue_token_info:
89
+ mask: 32
90
+ eos: 2
91
+ bos: 0
92
+ pad: 1
93
+ total: 33
94
+ max_non_special_token: null
95
+ sasa_token_info:
96
+ mask: 0
97
+ eos: 0
98
+ bos: 0
99
+ pad: 0
100
+ total: null
101
+ max_non_special_token: null
102
+ sec_struct_token_info:
103
+ mask: 0
104
+ eos: 0
105
+ bos: 0
106
+ pad: 0
107
+ total: null
108
+ max_non_special_token: null
109
+ res_annot_token_info:
110
+ mask: 0
111
+ eos: 0
112
+ bos: 0
113
+ pad: 0
114
+ total: null
115
+ max_non_special_token: null
116
+ dataset_split_args:
117
+ train: 1.0
118
+ val: 0.0
119
+ test: 0.0
120
+ train:
121
+ lightning_model: BidirectionalModel
122
+ resume_training_path: lightning_logs/version_7673/checkpoints/step_step=100000.ckpt
123
+ lightning_model_args:
124
+ eval_type: era
125
+ beta: -10.0
126
+ gamma: 0
127
+ sampling_temperature: 1.0
128
+ optimizer: AdamW
129
+ optimizer_args:
130
+ lr: 1.0e-06
131
+ betas:
132
+ - 0.9
133
+ - 0.99
134
+ weight_decay: 0.01
135
+ lr_scheduler: null
136
+ interval: step
137
+ monitor: train/ERALoss
138
+ sync_dist: true
139
+ on_step: true
140
+ lr_scheduler_args: null
141
+ trainer_args:
142
+ accelerator: cuda
143
+ devices: 4
144
+ precision: 16-mixed
145
+ log_every_n_steps: 50
146
+ max_epochs: 1
147
+ enable_progress_bar: false
148
+ gradient_clip_val: 1.0
149
+ strategy: DDPStrategy
150
+ strategy_args:
151
+ find_unused_parameters: true
152
+ every_epoch_checkpoint_args:
153
+ filename: step_{step:02d}
154
+ every_n_epochs: null
155
+ every_n_train_steps: 50000
156
+ save_top_k: -1
157
+ best_checkpoint_args:
158
+ filename: best_model
159
+ monitor: train/ERALoss
160
+ mode: min
161
+ save_top_k: 1
162
+ logger:
163
+ loggertype: TensorBoard
164
+ logger_args:
165
+ version: null
166
+ seed_args:
167
+ seed: 42
168
+ workers: true
169
+ global_args:
170
+ dataset_filename: alignment_dataset.h5
171
+ keys_to_test:
172
+ - nn.model
173
+ - nn.model_args
megascale_pretrained_model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac49bba2f7f95a524bf0a7b3bd79bc42f93e553de588fabc6a67a8ae03f9e663
3
+ size 16632521682