ckpt / config.yaml
chenyn66's picture
Upload folder using huggingface_hub
1ed78cb verified
adam:
beta1: 0.9
beta2: 0.95
lr: 0.0018
use_adamw: false
weight_decay: 1.0e-08
af3_lr_scheduler:
decay_every_n_steps: 50000
decay_factor: 0.95
lr: 0.0018
warmup_steps: 10
atom_permutation:
global_align_wo_symmetric_atom: false
permute_by_pocket: true
test:
diffusion_sample: true
train:
diffusion_sample: false
mini_rollout: true
base_dir: /home/toolkit/ewpo/outputs/sampling
blocks_per_ckpt: 1
c_atom: 128
c_atompair: 16
c_s: 384
c_s_inputs: 449
c_token: 384
c_z: 128
chain_permutation:
configs:
accept_it_as_it_is: false
enumerate_all_anchor_pairs: false
find_gt_anchor_first: false
selection_metric: aligned_rmsd
use_center_rmsd: false
permute_by_pocket: true
test:
diffusion_sample: true
train:
diffusion_sample: false
mini_rollout: true
checkpoint_interval: -1
data:
atlas:
base_info:
bioassembly_dict_dir: /home/toolkit/ewpo/data_atlas/mmcif_bioassembly
indices_fpath: /home/toolkit/ewpo/data_atlas/indices.csv
max_n_token: -1
mmcif_dir: /home/toolkit/ewpo/data_atlas/mmcif
pdb_list: ''
random_sample_if_failed: true
use_reference_chains_only: true
cropping_configs:
crop_size: 256
method_weights:
- 0.5
- 0.5
- 0.0
lig_atom_rename: false
limits: -1
precomputed_emb_dir: /home/toolkit/ewpo/dumps
sample_weight: 0.5
sampler_configs:
sampler_type: uniform
shuffle_mols: false
shuffle_sym_ids: false
atlas_repr:
base_info:
bioassembly_dict_dir: /home/toolkit/ewpo/data_atlas/mmcif_bioassembly
indices_fpath: /home/toolkit/ewpo/data_atlas/indices_repr.csv
max_n_token: -1
mmcif_dir: /home/toolkit/ewpo/data_atlas/mmcif
pdb_list: ''
random_sample_if_failed: true
use_reference_chains_only: true
cropping_configs:
crop_size: -1
method_weights:
- 0.0
- 0.0
- 1.0
lig_atom_rename: false
limits: -1
precomputed_emb_dir: /home/toolkit/ewpo/dumps
sample_weight: 0.5
sampler_configs:
sampler_type: uniform
shuffle_mols: false
shuffle_sym_ids: false
atlas_test:
base_info:
bioassembly_dict_dir: /home/toolkit/ewpo/data_atlas/mmcif_bioassembly
indices_fpath: /home/toolkit/ewpo/data_atlas/indices_test.csv
max_n_token: -1
mmcif_dir: /home/toolkit/ewpo/data_atlas/mmcif
pdb_list: ''
random_sample_if_failed: true
use_reference_chains_only: true
cropping_configs:
crop_size: -1
method_weights:
- 0.0
- 0.0
- 1.0
lig_atom_rename: false
limits: -1
precomputed_emb_dir: /home/toolkit/ewpo/dumps
sample_weight: 0.5
sampler_configs:
sampler_type: uniform
shuffle_mols: false
shuffle_sym_ids: false
atlas_test_repr:
base_info:
bioassembly_dict_dir: /home/toolkit/ewpo/data_atlas/mmcif_bioassembly
indices_fpath: /home/toolkit/ewpo/data_atlas/indices_test_repr.csv
max_n_token: -1
mmcif_dir: /home/toolkit/ewpo/data_atlas/mmcif
pdb_list: ''
random_sample_if_failed: true
use_reference_chains_only: true
cropping_configs:
crop_size: -1
method_weights:
- 0.0
- 0.0
- 1.0
lig_atom_rename: false
limits: -1
precomputed_emb_dir: /home/toolkit/ewpo/dumps
sample_weight: 0.5
sampler_configs:
sampler_type: uniform
shuffle_mols: false
shuffle_sym_ids: false
atlas_train:
base_info:
bioassembly_dict_dir: /home/toolkit/ewpo/data_atlas/mmcif_bioassembly
indices_fpath: /home/toolkit/ewpo/data_atlas/indices_train.csv
max_n_token: -1
mmcif_dir: /home/toolkit/ewpo/data_atlas/mmcif
pdb_list: ''
random_sample_if_failed: true
use_reference_chains_only: true
cropping_configs:
crop_size: 256
method_weights:
- 0.5
- 0.5
- 0.0
lig_atom_rename: false
limits: -1
precomputed_emb_dir: /home/toolkit/ewpo/dumps
sample_weight: 0.5
sampler_configs:
sampler_type: uniform
shuffle_mols: false
shuffle_sym_ids: false
atlas_train_dpo:
base_info:
annotation_csv: /home/toolkit/ewpo/data_atlas/minimization_results_all.csv
bioassembly_dict_dir: /home/toolkit/ewpo/data_atlas/mmcif_bioassembly
indices_fpath: /home/toolkit/ewpo/data_atlas/indices_train.csv
max_n_token: 384
mmcif_dir: /home/toolkit/ewpo/data_atlas/mmcif
pdb_list: ''
preference_mode: true
random_sample_if_failed: true
retrieval_k: 2
use_reference_chains_only: true
cropping_configs:
crop_size: -1
method_weights:
- 0.5
- 0.5
- 0.0
lig_atom_rename: false
limits: -1
precomputed_emb_dir: /home/toolkit/ewpo/dumps
sample_weight: 0.5
sampler_configs:
sampler_type: uniform
shuffle_mols: false
shuffle_sym_ids: false
atlas_val:
base_info:
bioassembly_dict_dir: /home/toolkit/ewpo/data_atlas/mmcif_bioassembly
indices_fpath: /home/toolkit/ewpo/data_atlas/indices_val.csv
max_n_token: -1
mmcif_dir: /home/toolkit/ewpo/data_atlas/mmcif
pdb_list: ''
random_sample_if_failed: true
use_reference_chains_only: true
cropping_configs:
crop_size: -1
method_weights:
- 0.0
- 0.0
- 1.0
lig_atom_rename: false
limits: -1
precomputed_emb_dir: /home/toolkit/ewpo/dumps
sample_weight: 0.5
sampler_configs:
sampler_type: uniform
shuffle_mols: false
shuffle_sym_ids: false
ccd_components_file: /home/toolkit/ewpo/ewpo/release_data/ccd_cache/components.v20240608.cif
ccd_components_rdkit_mol_file: /home/toolkit/ewpo/ewpo/release_data/ccd_cache/components.v20240608.cif.rdkit_mol.pkl
epoch_size: 10000
msa:
enable: true
enable_rna_msa: false
max_size:
test: 16384
train: 16384
merge_method: dense_max
min_size:
test: 2048
train: 1
prot:
indexing_method: sequence
non_pairing_db: mmseqs_other
pairing_db: uniref100
pdb_mmseqs_dir: /home/toolkit/ewpo/release_data/mmcif_msa
seq_to_pdb_idx_path: /home/toolkit/ewpo/release_data/seq_to_pdb_index.json
rna:
indexing_method: sequence
rna_msa_dir: ''
seq_to_pdb_idx_path: ''
sample_cutoff:
test: 2048
train: 2048
strategy: random
num_dl_workers: 16
posebusters_0925:
base_info:
bioassembly_dict_dir: /home/toolkit/ewpo/release_data/posebusters_bioassembly
find_all_pockets: false
find_pocket: true
indices_fpath: /home/toolkit/ewpo/release_data/indices/posebusters_indices_mainchain_interface.csv
max_n_token: -1
mmcif_dir: /home/toolkit/ewpo/release_data/posebusters_mmcif
pdb_list: ''
cropping_configs:
crop_size: -1
method_weights:
- 0.0
- 0.0
- 1.0
lig_atom_rename: false
sampler_configs:
sampler_type: uniform
shuffle_mols: false
shuffle_sym_ids: false
recentPDB_1536_sample384_0925:
base_info:
bioassembly_dict_dir: /home/toolkit/ewpo/release_data/recentPDB_bioassembly
find_eval_chain_interface: true
group_by_pdb_id: true
indices_fpath: /home/toolkit/ewpo/release_data/indices/recentPDB_low_homology_maxtoken1536.csv
max_n_token: -1
mmcif_dir: /home/toolkit/ewpo/release_data/mmcif
pdb_list: /home/toolkit/ewpo/release_data/indices/recentPDB_low_homology_maxtoken1024_sample384_pdb_id.txt
sort_by_n_token: false
cropping_configs:
crop_size: -1
method_weights:
- 0.0
- 0.0
- 1.0
lig_atom_rename: false
sampler_configs:
sampler_type: uniform
shuffle_mols: false
shuffle_sym_ids: false
template:
enable: false
test_ref_pos_augment: true
test_sets:
- atlas_test_repr
train_ref_pos_augment: true
train_sampler:
sampler_type: weighted
train_sample_weights:
- 1.0
train_sets:
- atlas_train
weightedPDB_before2109_wopb_nometalc_0925:
base_info:
bioassembly_dict_dir: /home/toolkit/ewpo/release_data/mmcif_bioassembly
exclusion:
mol_1_type:
- ions
mol_2_type:
- ions
indices_fpath: /home/toolkit/ewpo/release_data/indices/weightedPDB_indices_before_2021-09-30_wo_posebusters_resolution_below_9.csv.gz
max_n_token: -1
mmcif_dir: /home/toolkit/ewpo/release_data/mmcif
pdb_list: ''
random_sample_if_failed: true
use_reference_chains_only: false
cropping_configs:
crop_size: 256
method_weights:
- 0.2
- 0.4
- 0.4
lig_atom_rename: false
limits: -1
sample_weight: 0.5
sampler_configs:
alpha_dict:
ligand: 1
nuc: 3
prot: 3
beta_dict:
chain: 0.5
interface: 1
force_recompute_weight: true
sampler_type: weighted
shuffle_mols: false
shuffle_sym_ids: false
decay_every_n_steps: 50000
deterministic: false
diffusion_batch_size: 48
diffusion_chunk_size: 4
dpo_training: false
dtype: bf16
dump_embeddings: false
ema_decay: 0.999
ema_mutable_param_keywords:
- ''
eval_ema_only: false
eval_first: false
eval_interval: 400
eval_only: false
find_unused_parameters: false
grad_clip_norm: 10
infer_setting:
chunk_size: 64
lddt_metrics_chunk_size: 1
lddt_metrics_sparse_enable: true
sample_diffusion_chunk_size: 4
inference_noise_scheduler:
rho: 7
s_max: 160.0
s_min: 0.0004
sigma_data: 16.0
iters_to_accumulate: 1
load_checkpoint_path: /home/toolkit/ewpo/outputs/dpo/dpo_train_12999_ewpo_20250128_013045/checkpoints/999.pt
load_ema_checkpoint_path: /home/toolkit/ewpo/outputs/dpo/dpo_train_12999_ewpo_20250128_013045/checkpoints/999.pt
load_params_only: true
load_strict: true
log_interval: 50
loss:
diffusion:
beta_dpo: 1
beta_ewpo: 0.4
bond:
eps: 1.0e-06
dpo_enabled: false
ewpo_enabled: false
ewpo_with_ref: false
linear_ew: false
mse:
eps: 1.0e-06
weight_dna: 5.0
weight_ligand: 10.0
weight_mse: 0.3333333333333333
weight_rna: 5.0
norm_by_len: false
smooth_lddt:
eps: 1.0e-06
diffusion_bond_chunk_size: 1
diffusion_chunk_size_outer: -1
diffusion_lddt_chunk_size: 1
diffusion_lddt_loss_dense: true
diffusion_sparse_loss_enable: true
distogram:
eps: 1.0e-06
max_bin: 21.6875
min_bin: 2.3125
no_bins: 64
pae:
eps: 1.0e-06
max_bin: 32
min_bin: 0
no_bins: 64
pde:
eps: 1.0e-06
max_bin: 32
min_bin: 0
no_bins: 64
plddt:
eps: 1.0e-06
max_bin: 1.0
min_bin: 0
no_bins: 50
normalize: true
resolution:
max: 4.0
min: 0.1
resolved:
eps: 1.0e-06
weight:
alpha_bond: 0.0
alpha_confidence: 0.0001
alpha_diffusion: 4.0
alpha_distogram: 0.03
alpha_except_pae: 1.0
alpha_pae: 0.0
smooth_lddt: 1.0
loss_metrics_sparse_enable: true
lr: 0.0018
lr_scheduler: af3
max_atoms_per_token: 24
max_steps: 100000
metrics:
chain_ranker_keys:
- chain_ptm
- chain_plddt
clash:
af3_clash_threshold: 1.1
vdw_clash_threshold: 0.75
complex_ranker_keys:
- plddt
- gpde
- ranking_score
interface_ranker_keys:
- chain_pair_iptm
- chain_pair_iptm_global
- chain_pair_plddt
lddt:
eps: 1.0e-06
min_lr_ratio: 0.1
model:
N_cycle: 4
N_model_seed: 1
confidence_head:
blocks_per_ckpt: 1
c_s: 384
c_s_inputs: 449
c_z: 128
distance_bin_end: 52.0
distance_bin_start: 3.25
distance_bin_step: 1.25
max_atoms_per_token: 24
n_blocks: 4
pairformer_dropout: 0.0
stop_gradient: true
diffusion_module:
atom_decoder:
n_blocks: 3
n_heads: 4
atom_encoder:
n_blocks: 3
n_heads: 4
blocks_per_ckpt: 1
c_atom: 128
c_atompair: 16
c_s: 384
c_s_inputs: 449
c_token: 768
c_z: 128
initialization:
glorot_init_self_attention: false
he_normal_init_atom_encoder_output: false
he_normal_init_atom_encoder_small_mlp: false
zero_init_adaln: true
zero_init_atom_decoder_linear: false
zero_init_atom_encoder_residual_linear: false
zero_init_condition_transition: false
zero_init_dit_output: true
zero_init_residual_condition_transition: false
sigma_data: 16.0
transformer:
n_blocks: 24
n_heads: 16
use_fine_grained_checkpoint: true
distogram_head:
c_z: 128
no_bins: 64
input_embedder:
c_atom: 128
c_atompair: 16
c_token: 384
msa_module:
blocks_per_ckpt: 1
c_m: 64
c_s_inputs: 449
c_z: 128
msa_dropout: 0.15
n_blocks: 4
pair_dropout: 0.25
pairformer:
blocks_per_ckpt: 1
c_s: 384
c_z: 128
dropout: 0.25
n_blocks: 48
n_heads: 16
relative_position_encoding:
c_z: 128
r_max: 32
s_max: 2
template_embedder:
blocks_per_ckpt: 1
c: 64
c_z: 128
dropout: 0.25
n_blocks: 0
n_blocks: 48
need_atom_confidence: false
no_bins: 64
predict_only: true
project: protenix
run_name: '13045_250'
sample_diffusion:
N_sample: 250
N_sample_mini_rollout: 1
N_step: 20
N_step_mini_rollout: 20
gamma0: 0.8
gamma_min: 1.0
noise_scale_lambda: 1.75
step_scale_eta: 1.25
seed: 42
sigma_data: 16.0
skip_amp:
confidence_head: true
loss: true
sample_diffusion: true
sample_diffusion_training: true
skip_confidence_and_permutation: false
skip_load_optimizer: false
skip_load_scheduler: false
skip_load_step: false
sorted_by_ranking_score: false
test_lig_atom_rename: false
test_max_n_token: -1
test_shuffle_mols: false
test_shuffle_sym_ids: false
train_confidence_only: false
train_crop_size: 256
train_lig_atom_rename: false
train_noise_sampler:
p_mean: -1.2
p_std: 1.5
sigma_data: 16.0
train_shuffle_mols: false
train_shuffle_sym_ids: false
use_deepspeed_evo_attention: false
use_flash: false
use_lma: false
use_memory_efficient_kernel: false
use_wandb: false
use_xformer: false
wandb_id: ''
warmup_steps: 10