File size: 14,021 Bytes
1ed78cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
adam:
  beta1: 0.9
  beta2: 0.95
  lr: 0.0018
  use_adamw: false
  weight_decay: 1.0e-08
af3_lr_scheduler:
  decay_every_n_steps: 50000
  decay_factor: 0.95
  lr: 0.0018
  warmup_steps: 10
atom_permutation:
  global_align_wo_symmetric_atom: false
  permute_by_pocket: true
  test:
    diffusion_sample: true
  train:
    diffusion_sample: false
    mini_rollout: true
base_dir: /home/toolkit/ewpo/outputs/sampling
blocks_per_ckpt: 1
c_atom: 128
c_atompair: 16
c_s: 384
c_s_inputs: 449
c_token: 384
c_z: 128
chain_permutation:
  configs:
    accept_it_as_it_is: false
    enumerate_all_anchor_pairs: false
    find_gt_anchor_first: false
    selection_metric: aligned_rmsd
    use_center_rmsd: false
  permute_by_pocket: true
  test:
    diffusion_sample: true
  train:
    diffusion_sample: false
    mini_rollout: true
checkpoint_interval: -1
data:
  atlas:
    base_info:
      bioassembly_dict_dir: /home/toolkit/ewpo/data_atlas/mmcif_bioassembly
      indices_fpath: /home/toolkit/ewpo/data_atlas/indices.csv
      max_n_token: -1
      mmcif_dir: /home/toolkit/ewpo/data_atlas/mmcif
      pdb_list: ''
      random_sample_if_failed: true
      use_reference_chains_only: true
    cropping_configs:
      crop_size: 256
      method_weights:
      - 0.5
      - 0.5
      - 0.0
    lig_atom_rename: false
    limits: -1
    precomputed_emb_dir: /home/toolkit/ewpo/dumps
    sample_weight: 0.5
    sampler_configs:
      sampler_type: uniform
    shuffle_mols: false
    shuffle_sym_ids: false
  atlas_repr:
    base_info:
      bioassembly_dict_dir: /home/toolkit/ewpo/data_atlas/mmcif_bioassembly
      indices_fpath: /home/toolkit/ewpo/data_atlas/indices_repr.csv
      max_n_token: -1
      mmcif_dir: /home/toolkit/ewpo/data_atlas/mmcif
      pdb_list: ''
      random_sample_if_failed: true
      use_reference_chains_only: true
    cropping_configs:
      crop_size: -1
      method_weights:
      - 0.0
      - 0.0
      - 1.0
    lig_atom_rename: false
    limits: -1
    precomputed_emb_dir: /home/toolkit/ewpo/dumps
    sample_weight: 0.5
    sampler_configs:
      sampler_type: uniform
    shuffle_mols: false
    shuffle_sym_ids: false
  atlas_test:
    base_info:
      bioassembly_dict_dir: /home/toolkit/ewpo/data_atlas/mmcif_bioassembly
      indices_fpath: /home/toolkit/ewpo/data_atlas/indices_test.csv
      max_n_token: -1
      mmcif_dir: /home/toolkit/ewpo/data_atlas/mmcif
      pdb_list: ''
      random_sample_if_failed: true
      use_reference_chains_only: true
    cropping_configs:
      crop_size: -1
      method_weights:
      - 0.0
      - 0.0
      - 1.0
    lig_atom_rename: false
    limits: -1
    precomputed_emb_dir: /home/toolkit/ewpo/dumps
    sample_weight: 0.5
    sampler_configs:
      sampler_type: uniform
    shuffle_mols: false
    shuffle_sym_ids: false
  atlas_test_repr:
    base_info:
      bioassembly_dict_dir: /home/toolkit/ewpo/data_atlas/mmcif_bioassembly
      indices_fpath: /home/toolkit/ewpo/data_atlas/indices_test_repr.csv
      max_n_token: -1
      mmcif_dir: /home/toolkit/ewpo/data_atlas/mmcif
      pdb_list: ''
      random_sample_if_failed: true
      use_reference_chains_only: true
    cropping_configs:
      crop_size: -1
      method_weights:
      - 0.0
      - 0.0
      - 1.0
    lig_atom_rename: false
    limits: -1
    precomputed_emb_dir: /home/toolkit/ewpo/dumps
    sample_weight: 0.5
    sampler_configs:
      sampler_type: uniform
    shuffle_mols: false
    shuffle_sym_ids: false
  atlas_train:
    base_info:
      bioassembly_dict_dir: /home/toolkit/ewpo/data_atlas/mmcif_bioassembly
      indices_fpath: /home/toolkit/ewpo/data_atlas/indices_train.csv
      max_n_token: -1
      mmcif_dir: /home/toolkit/ewpo/data_atlas/mmcif
      pdb_list: ''
      random_sample_if_failed: true
      use_reference_chains_only: true
    cropping_configs:
      crop_size: 256
      method_weights:
      - 0.5
      - 0.5
      - 0.0
    lig_atom_rename: false
    limits: -1
    precomputed_emb_dir: /home/toolkit/ewpo/dumps
    sample_weight: 0.5
    sampler_configs:
      sampler_type: uniform
    shuffle_mols: false
    shuffle_sym_ids: false
  atlas_train_dpo:
    base_info:
      annotation_csv: /home/toolkit/ewpo/data_atlas/minimization_results_all.csv
      bioassembly_dict_dir: /home/toolkit/ewpo/data_atlas/mmcif_bioassembly
      indices_fpath: /home/toolkit/ewpo/data_atlas/indices_train.csv
      max_n_token: 384
      mmcif_dir: /home/toolkit/ewpo/data_atlas/mmcif
      pdb_list: ''
      preference_mode: true
      random_sample_if_failed: true
      retrieval_k: 2
      use_reference_chains_only: true
    cropping_configs:
      crop_size: -1
      method_weights:
      - 0.5
      - 0.5
      - 0.0
    lig_atom_rename: false
    limits: -1
    precomputed_emb_dir: /home/toolkit/ewpo/dumps
    sample_weight: 0.5
    sampler_configs:
      sampler_type: uniform
    shuffle_mols: false
    shuffle_sym_ids: false
  atlas_val:
    base_info:
      bioassembly_dict_dir: /home/toolkit/ewpo/data_atlas/mmcif_bioassembly
      indices_fpath: /home/toolkit/ewpo/data_atlas/indices_val.csv
      max_n_token: -1
      mmcif_dir: /home/toolkit/ewpo/data_atlas/mmcif
      pdb_list: ''
      random_sample_if_failed: true
      use_reference_chains_only: true
    cropping_configs:
      crop_size: -1
      method_weights:
      - 0.0
      - 0.0
      - 1.0
    lig_atom_rename: false
    limits: -1
    precomputed_emb_dir: /home/toolkit/ewpo/dumps
    sample_weight: 0.5
    sampler_configs:
      sampler_type: uniform
    shuffle_mols: false
    shuffle_sym_ids: false
  ccd_components_file: /home/toolkit/ewpo/ewpo/release_data/ccd_cache/components.v20240608.cif
  ccd_components_rdkit_mol_file: /home/toolkit/ewpo/ewpo/release_data/ccd_cache/components.v20240608.cif.rdkit_mol.pkl
  epoch_size: 10000
  msa:
    enable: true
    enable_rna_msa: false
    max_size:
      test: 16384
      train: 16384
    merge_method: dense_max
    min_size:
      test: 2048
      train: 1
    prot:
      indexing_method: sequence
      non_pairing_db: mmseqs_other
      pairing_db: uniref100
      pdb_mmseqs_dir: /home/toolkit/ewpo/release_data/mmcif_msa
      seq_to_pdb_idx_path: /home/toolkit/ewpo/release_data/seq_to_pdb_index.json
    rna:
      indexing_method: sequence
      rna_msa_dir: ''
      seq_to_pdb_idx_path: ''
    sample_cutoff:
      test: 2048
      train: 2048
    strategy: random
  num_dl_workers: 16
  posebusters_0925:
    base_info:
      bioassembly_dict_dir: /home/toolkit/ewpo/release_data/posebusters_bioassembly
      find_all_pockets: false
      find_pocket: true
      indices_fpath: /home/toolkit/ewpo/release_data/indices/posebusters_indices_mainchain_interface.csv
      max_n_token: -1
      mmcif_dir: /home/toolkit/ewpo/release_data/posebusters_mmcif
      pdb_list: ''
    cropping_configs:
      crop_size: -1
      method_weights:
      - 0.0
      - 0.0
      - 1.0
    lig_atom_rename: false
    sampler_configs:
      sampler_type: uniform
    shuffle_mols: false
    shuffle_sym_ids: false
  recentPDB_1536_sample384_0925:
    base_info:
      bioassembly_dict_dir: /home/toolkit/ewpo/release_data/recentPDB_bioassembly
      find_eval_chain_interface: true
      group_by_pdb_id: true
      indices_fpath: /home/toolkit/ewpo/release_data/indices/recentPDB_low_homology_maxtoken1536.csv
      max_n_token: -1
      mmcif_dir: /home/toolkit/ewpo/release_data/mmcif
      pdb_list: /home/toolkit/ewpo/release_data/indices/recentPDB_low_homology_maxtoken1024_sample384_pdb_id.txt
      sort_by_n_token: false
    cropping_configs:
      crop_size: -1
      method_weights:
      - 0.0
      - 0.0
      - 1.0
    lig_atom_rename: false
    sampler_configs:
      sampler_type: uniform
    shuffle_mols: false
    shuffle_sym_ids: false
  template:
    enable: false
  test_ref_pos_augment: true
  test_sets:
  - atlas_test_repr
  train_ref_pos_augment: true
  train_sampler:
    sampler_type: weighted
    train_sample_weights:
    - 1.0
  train_sets:
  - atlas_train
  weightedPDB_before2109_wopb_nometalc_0925:
    base_info:
      bioassembly_dict_dir: /home/toolkit/ewpo/release_data/mmcif_bioassembly
      exclusion:
        mol_1_type:
        - ions
        mol_2_type:
        - ions
      indices_fpath: /home/toolkit/ewpo/release_data/indices/weightedPDB_indices_before_2021-09-30_wo_posebusters_resolution_below_9.csv.gz
      max_n_token: -1
      mmcif_dir: /home/toolkit/ewpo/release_data/mmcif
      pdb_list: ''
      random_sample_if_failed: true
      use_reference_chains_only: false
    cropping_configs:
      crop_size: 256
      method_weights:
      - 0.2
      - 0.4
      - 0.4
    lig_atom_rename: false
    limits: -1
    sample_weight: 0.5
    sampler_configs:
      alpha_dict:
        ligand: 1
        nuc: 3
        prot: 3
      beta_dict:
        chain: 0.5
        interface: 1
      force_recompute_weight: true
      sampler_type: weighted
    shuffle_mols: false
    shuffle_sym_ids: false
decay_every_n_steps: 50000
deterministic: false
diffusion_batch_size: 48
diffusion_chunk_size: 4
dpo_training: false
dtype: bf16
dump_embeddings: false
ema_decay: 0.999
ema_mutable_param_keywords:
- ''
eval_ema_only: false
eval_first: false
eval_interval: 400
eval_only: false
find_unused_parameters: false
grad_clip_norm: 10
infer_setting:
  chunk_size: 64
  lddt_metrics_chunk_size: 1
  lddt_metrics_sparse_enable: true
  sample_diffusion_chunk_size: 4
inference_noise_scheduler:
  rho: 7
  s_max: 160.0
  s_min: 0.0004
  sigma_data: 16.0
iters_to_accumulate: 1
load_checkpoint_path: /home/toolkit/ewpo/outputs/dpo/dpo_train_12999_ewpo_20250128_013045/checkpoints/999.pt
load_ema_checkpoint_path: /home/toolkit/ewpo/outputs/dpo/dpo_train_12999_ewpo_20250128_013045/checkpoints/999.pt
load_params_only: true
load_strict: true
log_interval: 50
loss:
  diffusion:
    beta_dpo: 1
    beta_ewpo: 0.4
    bond:
      eps: 1.0e-06
    dpo_enabled: false
    ewpo_enabled: false
    ewpo_with_ref: false
    linear_ew: false
    mse:
      eps: 1.0e-06
      weight_dna: 5.0
      weight_ligand: 10.0
      weight_mse: 0.3333333333333333
      weight_rna: 5.0
    norm_by_len: false
    smooth_lddt:
      eps: 1.0e-06
  diffusion_bond_chunk_size: 1
  diffusion_chunk_size_outer: -1
  diffusion_lddt_chunk_size: 1
  diffusion_lddt_loss_dense: true
  diffusion_sparse_loss_enable: true
  distogram:
    eps: 1.0e-06
    max_bin: 21.6875
    min_bin: 2.3125
    no_bins: 64
  pae:
    eps: 1.0e-06
    max_bin: 32
    min_bin: 0
    no_bins: 64
  pde:
    eps: 1.0e-06
    max_bin: 32
    min_bin: 0
    no_bins: 64
  plddt:
    eps: 1.0e-06
    max_bin: 1.0
    min_bin: 0
    no_bins: 50
    normalize: true
  resolution:
    max: 4.0
    min: 0.1
  resolved:
    eps: 1.0e-06
  weight:
    alpha_bond: 0.0
    alpha_confidence: 0.0001
    alpha_diffusion: 4.0
    alpha_distogram: 0.03
    alpha_except_pae: 1.0
    alpha_pae: 0.0
    smooth_lddt: 1.0
loss_metrics_sparse_enable: true
lr: 0.0018
lr_scheduler: af3
max_atoms_per_token: 24
max_steps: 100000
metrics:
  chain_ranker_keys:
  - chain_ptm
  - chain_plddt
  clash:
    af3_clash_threshold: 1.1
    vdw_clash_threshold: 0.75
  complex_ranker_keys:
  - plddt
  - gpde
  - ranking_score
  interface_ranker_keys:
  - chain_pair_iptm
  - chain_pair_iptm_global
  - chain_pair_plddt
  lddt:
    eps: 1.0e-06
min_lr_ratio: 0.1
model:
  N_cycle: 4
  N_model_seed: 1
  confidence_head:
    blocks_per_ckpt: 1
    c_s: 384
    c_s_inputs: 449
    c_z: 128
    distance_bin_end: 52.0
    distance_bin_start: 3.25
    distance_bin_step: 1.25
    max_atoms_per_token: 24
    n_blocks: 4
    pairformer_dropout: 0.0
    stop_gradient: true
  diffusion_module:
    atom_decoder:
      n_blocks: 3
      n_heads: 4
    atom_encoder:
      n_blocks: 3
      n_heads: 4
    blocks_per_ckpt: 1
    c_atom: 128
    c_atompair: 16
    c_s: 384
    c_s_inputs: 449
    c_token: 768
    c_z: 128
    initialization:
      glorot_init_self_attention: false
      he_normal_init_atom_encoder_output: false
      he_normal_init_atom_encoder_small_mlp: false
      zero_init_adaln: true
      zero_init_atom_decoder_linear: false
      zero_init_atom_encoder_residual_linear: false
      zero_init_condition_transition: false
      zero_init_dit_output: true
      zero_init_residual_condition_transition: false
    sigma_data: 16.0
    transformer:
      n_blocks: 24
      n_heads: 16
    use_fine_grained_checkpoint: true
  distogram_head:
    c_z: 128
    no_bins: 64
  input_embedder:
    c_atom: 128
    c_atompair: 16
    c_token: 384
  msa_module:
    blocks_per_ckpt: 1
    c_m: 64
    c_s_inputs: 449
    c_z: 128
    msa_dropout: 0.15
    n_blocks: 4
    pair_dropout: 0.25
  pairformer:
    blocks_per_ckpt: 1
    c_s: 384
    c_z: 128
    dropout: 0.25
    n_blocks: 48
    n_heads: 16
  relative_position_encoding:
    c_z: 128
    r_max: 32
    s_max: 2
  template_embedder:
    blocks_per_ckpt: 1
    c: 64
    c_z: 128
    dropout: 0.25
    n_blocks: 0
n_blocks: 48
need_atom_confidence: false
no_bins: 64
predict_only: true
project: protenix
run_name: '13045_250'
sample_diffusion:
  N_sample: 250
  N_sample_mini_rollout: 1
  N_step: 20
  N_step_mini_rollout: 20
  gamma0: 0.8
  gamma_min: 1.0
  noise_scale_lambda: 1.75
  step_scale_eta: 1.25
seed: 42
sigma_data: 16.0
skip_amp:
  confidence_head: true
  loss: true
  sample_diffusion: true
  sample_diffusion_training: true
skip_confidence_and_permutation: false
skip_load_optimizer: false
skip_load_scheduler: false
skip_load_step: false
sorted_by_ranking_score: false
test_lig_atom_rename: false
test_max_n_token: -1
test_shuffle_mols: false
test_shuffle_sym_ids: false
train_confidence_only: false
train_crop_size: 256
train_lig_atom_rename: false
train_noise_sampler:
  p_mean: -1.2
  p_std: 1.5
  sigma_data: 16.0
train_shuffle_mols: false
train_shuffle_sym_ids: false
use_deepspeed_evo_attention: false
use_flash: false
use_lma: false
use_memory_efficient_kernel: false
use_wandb: false
use_xformer: false
wandb_id: ''
warmup_steps: 10