judewells commited on
Commit
91516e8
·
verified ·
1 Parent(s): 38a1960

Upload folder using huggingface_hub

Browse files
combined_protein_to_smiles/config.yaml ADDED
@@ -0,0 +1,346 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task_name: CombinedHiQBAggPropPoc2Mol
2
+ tags:
3
+ - combined_data
4
+ train: true
5
+ test: true
6
+ optimized_metric: val/loss
7
+ ckpt_path: logs/CombinedHiQBindCkptFrmPrevCombined/runs/2025-05-06_20-51-46/checkpoints/last.ckpt
8
+ seed: 42
9
+ data:
10
+ train_dataset:
11
+ poc2mol_output_dataset:
12
+ poc2mol_model:
13
+ _target_: src.models.poc2mol.Poc2Mol
14
+ config:
15
+ _target_: src.models.poc2mol.ResUnetConfig
16
+ in_channels: 4
17
+ out_channels: 9
18
+ final_sigmoid: false
19
+ f_maps: 64
20
+ layer_order: gcr
21
+ num_groups: 8
22
+ num_levels: 5
23
+ conv_padding: 1
24
+ conv_upscale: 2
25
+ upsample: default
26
+ dropout_prob: 0.1
27
+ basic_module: ${oc.select:src.models.pytorch3dunet_lib.unet3d.buildingblocks.ResNetBlockSE,
28
+ src.models.pytorch3dunet_lib.unet3d.buildingblocks.ResNetBlockSE}
29
+ loss:
30
+ name: BCEDiceLoss
31
+ weight: null
32
+ normalization: sigmoid
33
+ alpha: 1.0
34
+ beta: 1.0
35
+ matmul_precision: high
36
+ lr: 0.0001
37
+ scheduler:
38
+ type: cosine_with_min_lr
39
+ num_warmup_steps: 50
40
+ min_lr_rate: 0.1
41
+ interval: step
42
+ frequency: 1
43
+ img_save_dir: ${paths.img_save_dir}
44
+ _target_: src.data.vox2smiles.datasets.Poc2MolOutputDataset
45
+ ckpt_path: logs/poc2mol/runs/2025-04-21_18-13-26/checkpoints/epoch_173.ckpt
46
+ complex_dataset:
47
+ _target_: src.data.poc2mol.datasets.ParquetDataset
48
+ data_path: ../hiqbind/parquet/train
49
+ translation: 6.0
50
+ rotate: true
51
+ config:
52
+ _target_: src.data.common.voxelization.config.Poc2MolDataConfig
53
+ batch_size: 1
54
+ has_protein: true
55
+ protein_channel_indices:
56
+ - 0
57
+ - 1
58
+ - 2
59
+ - 3
60
+ protein_channels:
61
+ 0:
62
+ - C
63
+ 1:
64
+ - O
65
+ 2:
66
+ - 'N'
67
+ 3:
68
+ - S
69
+ protein_channel_names:
70
+ - carbon
71
+ - oxygen
72
+ - nitrogen
73
+ - sulphur
74
+ ligand_channel_indices:
75
+ - 4
76
+ - 5
77
+ - 6
78
+ - 7
79
+ - 8
80
+ - 9
81
+ - 10
82
+ - 11
83
+ - 12
84
+ ligand_channels:
85
+ 0:
86
+ - C
87
+ 1:
88
+ - O
89
+ 2:
90
+ - 'N'
91
+ 3:
92
+ - S
93
+ 4:
94
+ - Cl
95
+ 5:
96
+ - F
97
+ 6:
98
+ - I
99
+ 7:
100
+ - Br
101
+ 8:
102
+ - C
103
+ - H
104
+ - O
105
+ - 'N'
106
+ - S
107
+ - Cl
108
+ - F
109
+ - I
110
+ - Br
111
+ ligand_channel_names:
112
+ - carbon
113
+ - oxygen
114
+ - nitrogen
115
+ - sulphur
116
+ - chlorine
117
+ - fluorine
118
+ - iodine
119
+ - bromine
120
+ - other
121
+ vox_size: 0.75
122
+ box_dims:
123
+ - 24.0
124
+ - 24.0
125
+ - 24.0
126
+ random_rotation: true
127
+ random_translation: 6.0
128
+ _target_: src.data.vox2smiles.datasets.CombinedDataset
129
+ ratio: 0.75
130
+ max_poc2mol_loss: 0.78
131
+ vox2smiles_dataset:
132
+ _target_: src.data.vox2smiles.datasets.ParquetVox2SmilesDataset
133
+ data_path: ../zinc20_parquet
134
+ random_rotation: true
135
+ random_translation: 6.0
136
+ config: ${data.config}
137
+ secondary_val_dataset:
138
+ poc2mol_model:
139
+ _target_: src.models.poc2mol.Poc2Mol
140
+ config:
141
+ _target_: src.models.poc2mol.ResUnetConfig
142
+ in_channels: 4
143
+ out_channels: 9
144
+ final_sigmoid: false
145
+ f_maps: 64
146
+ layer_order: gcr
147
+ num_groups: 8
148
+ num_levels: 5
149
+ conv_padding: 1
150
+ conv_upscale: 2
151
+ upsample: default
152
+ dropout_prob: 0.1
153
+ basic_module: ${oc.select:src.models.pytorch3dunet_lib.unet3d.buildingblocks.ResNetBlockSE,
154
+ src.models.pytorch3dunet_lib.unet3d.buildingblocks.ResNetBlockSE}
155
+ loss:
156
+ name: BCEDiceLoss
157
+ weight: null
158
+ normalization: sigmoid
159
+ alpha: 1.0
160
+ beta: 1.0
161
+ matmul_precision: high
162
+ lr: 0.0001
163
+ scheduler:
164
+ type: cosine_with_min_lr
165
+ num_warmup_steps: 50
166
+ min_lr_rate: 0.1
167
+ interval: step
168
+ frequency: 1
169
+ img_save_dir: ${paths.img_save_dir}
170
+ _target_: src.data.vox2smiles.datasets.Poc2MolOutputDataset
171
+ ckpt_path: logs/poc2mol/runs/2025-04-21_18-13-26/checkpoints/epoch_173.ckpt
172
+ complex_dataset:
173
+ _target_: src.data.poc2mol.datasets.ParquetDataset
174
+ data_path: ../hiqbind/parquet/val
175
+ translation: 6.0
176
+ rotate: true
177
+ config:
178
+ _target_: src.data.common.voxelization.config.Poc2MolDataConfig
179
+ batch_size: 1
180
+ has_protein: true
181
+ protein_channel_indices:
182
+ - 0
183
+ - 1
184
+ - 2
185
+ - 3
186
+ protein_channels:
187
+ 0:
188
+ - C
189
+ 1:
190
+ - O
191
+ 2:
192
+ - 'N'
193
+ 3:
194
+ - S
195
+ protein_channel_names:
196
+ - carbon
197
+ - oxygen
198
+ - nitrogen
199
+ - sulphur
200
+ ligand_channel_indices:
201
+ - 4
202
+ - 5
203
+ - 6
204
+ - 7
205
+ - 8
206
+ - 9
207
+ - 10
208
+ - 11
209
+ - 12
210
+ ligand_channels:
211
+ 0:
212
+ - C
213
+ 1:
214
+ - O
215
+ 2:
216
+ - 'N'
217
+ 3:
218
+ - S
219
+ 4:
220
+ - Cl
221
+ 5:
222
+ - F
223
+ 6:
224
+ - I
225
+ 7:
226
+ - Br
227
+ 8:
228
+ - C
229
+ - H
230
+ - O
231
+ - 'N'
232
+ - S
233
+ - Cl
234
+ - F
235
+ - I
236
+ - Br
237
+ ligand_channel_names:
238
+ - carbon
239
+ - oxygen
240
+ - nitrogen
241
+ - sulphur
242
+ - chlorine
243
+ - fluorine
244
+ - iodine
245
+ - bromine
246
+ - other
247
+ vox_size: 0.75
248
+ box_dims:
249
+ - 24.0
250
+ - 24.0
251
+ - 24.0
252
+ random_rotation: true
253
+ random_translation: 6.0
254
+ _target_: src.data.vox2smiles.data_module.Vox2SmilesDataModule
255
+ data_path: ../geom/rdkit_folder/drugs
256
+ num_workers: 0
257
+ config:
258
+ _target_: src.data.common.voxelization.config.Vox2SmilesDataConfig
259
+ batch_size: 2
260
+ max_smiles_len: 200
261
+ has_protein: false
262
+ include_hydrogens: false
263
+ model:
264
+ _target_: src.models.vox2smiles.VoxToSmilesModel
265
+ config:
266
+ hidden_size: 768
267
+ num_hidden_layers: 8
268
+ num_attention_heads: 8
269
+ intermediate_size: 3072
270
+ hidden_act: gelu
271
+ hidden_dropout_prob: 0.0
272
+ attention_probs_dropout_prob: 0.0
273
+ initializer_range: 0.02
274
+ layer_norm_eps: 1.0e-12
275
+ image_size:
276
+ - 32
277
+ - 32
278
+ - 32
279
+ patch_size: 4
280
+ num_channels: 9
281
+ qkv_bias: true
282
+ encoder_stride: 2
283
+ lr: 0.0001
284
+ torch_dtype: bfloat16
285
+ scheduler:
286
+ type: warmup_stable_decay
287
+ num_warmup_steps: 2000
288
+ num_stable_steps: 400000
289
+ num_decay_steps: 400000
290
+ min_lr_ratio: 0.3
291
+ interval: step
292
+ override_optimizer_on_load: true
293
+ callbacks:
294
+ model_checkpoint:
295
+ _target_: lightning.pytorch.callbacks.ModelCheckpoint
296
+ dirpath: ${paths.output_dir}/checkpoints
297
+ filename: epoch_{epoch:03d}
298
+ monitor: val/loss
299
+ verbose: false
300
+ save_last: true
301
+ save_top_k: 1
302
+ mode: min
303
+ auto_insert_metric_name: false
304
+ save_weights_only: false
305
+ every_n_train_steps: null
306
+ train_time_interval: null
307
+ every_n_epochs: null
308
+ save_on_train_epoch_end: null
309
+ model_summary:
310
+ max_depth: 3
311
+ logger:
312
+ wandb:
313
+ _target_: lightning.pytorch.loggers.wandb.WandbLogger
314
+ save_dir: ${paths.output_dir}
315
+ offline: false
316
+ id: null
317
+ anonymous: null
318
+ project: voxelSmiles
319
+ log_model: false
320
+ prefix: ''
321
+ group: vox2smilesZinc
322
+ tags:
323
+ - zinc
324
+ - hiqbind
325
+ job_type: train
326
+ entity: cath
327
+ trainer:
328
+ _target_: lightning.pytorch.trainer.Trainer
329
+ default_root_dir: ${paths.output_dir}
330
+ max_epochs: 50000
331
+ accelerator: gpu
332
+ devices: 1
333
+ precision: bf16-mixed
334
+ check_val_every_n_epoch: 1
335
+ val_check_interval: 6000
336
+ deterministic: false
337
+ log_every_n_steps: 50
338
+ accumulate_grad_batches: 2
339
+ gradient_clip_val: 1.0
340
+ paths:
341
+ root_dir: ${oc.env:PROJECT_ROOT}
342
+ data_dir: ${paths.root_dir}/../data/
343
+ log_dir: ${paths.root_dir}/logs/
344
+ output_dir: ${hydra:runtime.output_dir}
345
+ work_dir: ${hydra:runtime.cwd}
346
+ img_save_dir: ${paths.output_dir}/images
combined_protein_to_smiles/epoch_000.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f70b224e5e6341db53d31335918cddc550e7a1f7bcf07873a1fc0054efe80794
3
+ size 1032531935
poc_vox_to_mol_vox/config.yaml ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task_name: poc2mol
2
+ tags:
3
+ - dev
4
+ train: true
5
+ test: true
6
+ optimized_metric: val/loss
7
+ ckpt_path: null
8
+ seed: 42
9
+ data:
10
+ _target_: src.data.poc2mol.data_module.ComplexDataModule
11
+ pdb_dir: null
12
+ val_pdb_dir: null
13
+ num_workers: 3
14
+ train_dataset:
15
+ _target_: src.data.poc2mol.datasets.ParquetDataset
16
+ data_path: ../hiqbind/parquet/train
17
+ config: ${data.config}
18
+ val_dataset:
19
+ _target_: src.data.poc2mol.datasets.ParquetDataset
20
+ data_path: ../hiqbind/parquet/val
21
+ config: ${data.config}
22
+ test_dataset:
23
+ _target_: src.data.poc2mol.datasets.ParquetDataset
24
+ data_path: ../hiqbind/parquet/test
25
+ config: ${data.config}
26
+ config:
27
+ _target_: src.data.common.voxelization.config.Poc2MolDataConfig
28
+ remove_hydrogens: true
29
+ batch_size: 2
30
+ target_samples_per_batch: 128
31
+ ligand_channel_names:
32
+ - carbon
33
+ - oxygen
34
+ - nitrogen
35
+ - sulfur
36
+ - chlorine
37
+ - fluorine
38
+ - iodine
39
+ - bromine
40
+ - other
41
+ protein_channel_names:
42
+ - carbon
43
+ - oxygen
44
+ - nitrogen
45
+ - sulfur
46
+ fnames: null
47
+ protein_channel_indices:
48
+ - 0
49
+ - 1
50
+ - 2
51
+ - 3
52
+ ligand_channel_indices:
53
+ - 4
54
+ - 5
55
+ - 6
56
+ - 7
57
+ - 8
58
+ - 9
59
+ - 10
60
+ - 11
61
+ - 12
62
+ vox_size: 0.75
63
+ box_dims:
64
+ - 24.0
65
+ - 24.0
66
+ - 24.0
67
+ random_rotation: true
68
+ random_translation: 6.0
69
+ has_protein: true
70
+ protein_channels:
71
+ 0:
72
+ - C
73
+ 1:
74
+ - O
75
+ 2:
76
+ - 'N'
77
+ 3:
78
+ - S
79
+ ligand_channels:
80
+ 0:
81
+ - C
82
+ 1:
83
+ - O
84
+ 2:
85
+ - 'N'
86
+ 3:
87
+ - S
88
+ 4:
89
+ - Cl
90
+ 5:
91
+ - F
92
+ 6:
93
+ - I
94
+ 7:
95
+ - Br
96
+ 8:
97
+ - C
98
+ - H
99
+ - O
100
+ - 'N'
101
+ - S
102
+ - Cl
103
+ - F
104
+ - I
105
+ - Br
106
+ max_atom_dist: 32.0
107
+ dtype: ${oc.select:torch.bfloat16,torch.bfloat16}
108
+ model:
109
+ _target_: src.models.poc2mol.Poc2Mol
110
+ config:
111
+ _target_: src.models.poc2mol.ResUnetConfig
112
+ in_channels: 4
113
+ out_channels: 9
114
+ final_sigmoid: false
115
+ f_maps: 64
116
+ layer_order: gcr
117
+ num_groups: 8
118
+ num_levels: 5
119
+ conv_padding: 1
120
+ conv_upscale: 2
121
+ upsample: default
122
+ dropout_prob: 0.1
123
+ basic_module: ${oc.select:src.models.pytorch3dunet_lib.unet3d.buildingblocks.ResNetBlockSE,
124
+ src.models.pytorch3dunet_lib.unet3d.buildingblocks.ResNetBlockSE}
125
+ loss:
126
+ name: BCEDiceLoss
127
+ weight: null
128
+ normalization: sigmoid
129
+ alpha: 1.0
130
+ beta: 1.0
131
+ matmul_precision: high
132
+ lr: 0.0001
133
+ scheduler_name: constant_with_warmup
134
+ num_warmup_steps: 180
135
+ img_save_dir: ${paths.img_save_dir}
136
+ override_optimizer_on_load: false
137
+ callbacks:
138
+ model_checkpoint:
139
+ _target_: lightning.pytorch.callbacks.ModelCheckpoint
140
+ dirpath: ${paths.output_dir}/checkpoints
141
+ filename: epoch_{epoch:03d}
142
+ monitor: val/loss
143
+ verbose: false
144
+ save_last: true
145
+ save_top_k: 1
146
+ mode: min
147
+ auto_insert_metric_name: false
148
+ save_weights_only: false
149
+ every_n_train_steps: null
150
+ train_time_interval: null
151
+ every_n_epochs: null
152
+ save_on_train_epoch_end: null
153
+ model_summary:
154
+ max_depth: 3
155
+ logger:
156
+ wandb:
157
+ _target_: lightning.pytorch.loggers.wandb.WandbLogger
158
+ save_dir: ${paths.output_dir}
159
+ offline: false
160
+ id: null
161
+ anonymous: null
162
+ project: poc2mol
163
+ log_model: false
164
+ prefix: ''
165
+ group: poc2mol
166
+ tags:
167
+ - dev
168
+ job_type: train
169
+ entity: cath
170
+ trainer:
171
+ _target_: lightning.pytorch.trainer.Trainer
172
+ default_root_dir: ${paths.output_dir}
173
+ max_epochs: 50000
174
+ accelerator: gpu
175
+ devices: 1
176
+ precision: bf16-mixed
177
+ check_val_every_n_epoch: 2
178
+ val_check_interval: null
179
+ deterministic: false
180
+ log_every_n_steps: 50
181
+ accumulate_grad_batches: 32
182
+ gradient_clip_val: 1.0
183
+ num_sanity_val_steps: 0
184
+ paths:
185
+ root_dir: ${oc.env:PROJECT_ROOT}
186
+ data_dir: ${paths.root_dir}/../data/
187
+ log_dir: ${paths.root_dir}/logs/
188
+ output_dir: ${hydra:runtime.output_dir}
189
+ work_dir: ${hydra:runtime.cwd}
190
+ img_save_dir: ${paths.output_dir}/images
poc_vox_to_mol_vox/epoch_173.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1efa85a57a326cea0ab586cc0e1ad71150102049342c4ba15efb7061e0e7f49c
3
+ size 1406981136