File size: 2,703 Bytes
d9df210
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# Experiment setup 
job_key: ''
run_name: 'filip_large'
run_details: ""
project_name: ''
wandb_entity_name: 'mass-spec-ml'
no_wandb: True
seed: 0
debug: False
checkpoint_pth: #'../pretrained_models/msgym_formSpec.ckpt'

# Training setup
max_epochs: 2000
accelerator: 'gpu'
devices: [1]
log_every_n_steps: 250
val_check_interval: 1.0

# Data paths
candidates_pth: ../data/sample/candidates_mass.json
dataset_pth: ../data/MassSpecGym/data/sample_data.tsv
subformula_dir_pth: ../data/MassSpecGym/data/subformulae_default
split_pth: 
fp_dir_pth: '../data/MassSpecGym/data/morganfp_r5_1024.pickle'
cons_spec_dir_pth: "../data/MassSpecGym/data/sample_consensus_formSpec.pkl"
NL_spec_dir_pth: ""
partial_checkpoint: ""

# General hyperparameters
batch_size: 64
lr: 5.0e-05 
weight_decay: 0
contr_temp: 0.05
early_stopping_patience: 300
loss_strategy: 'static'
num_workers: 50


############################## Data transforms ##############################
# - Spectra 
spectra_view: SpecFormula
#  1. Binner
max_mz: 1000
bin_width: 1
mask_peak_ratio: 0.00

# 2. SpecFormula
element_list: ['H', 'C',  'O', 'N', 'P', 'S', 'Cl', 'F', 'Br', 'I', 'B', 'As', 'Si', 'Se']
add_intensities: True
mask_precursor: False

# - Molecule
molecule_view: "MolGraph"
atom_feature: 'full'
bond_feature: 'full'


############################## Views ##############################
# contrastive
use_contr: False
contr_wt: 1
contr_wt_update: {} 

# consensus spectra
use_cons_spec: False
cons_spec_wt: 3
cons_spec_wt_update: {} 
cons_loss_type: 'l2' # cosine, l2

# fp prediction/usage
pred_fp: False
use_fp: False
fp_loss_type: 'cosine' #cosine, bce
fp_wt: 3
fp_wt_update: {} 
fp_size: 1024
fp_radius: 5
fp_dropout: 0.4

# candidates
aug_cands: False
aug_cands_wt: 0.1
aug_cands_update: {} 
aug_cands_size: 3

# neutral loss
use_NL: False


############################## Task and model ##############################
task: 'retrieval'
spec_enc: Transformer_Formula
mol_enc: "GNN"
model:  MultiviewContrastive
contr_views: [['spec_enc', 'mol_enc'], ['spec_enc', 'NL_spec_enc'], ['mol_enc', 'NL_spec_enc']] #[['spec_enc', 'mol_enc'], ['mol_enc', 'cons_spec_enc'], ['cons_spec_enc', 'spec_enc'], ['fp_enc', 'mol_enc'], ['fp_enc', 'spec_enc'], ['fp_enc', 'cons_spec_enc']]
log_only_loss_at_stages: []
df_test_path: ""

# - Spectra encoder 
final_embedding_dim: 512
fc_dropout: 0.4

# - Spectra Token encoder
hidden_dims: [64, 128]
peak_dropout: 0.2

# - Formula-based spec encoders
formula_dropout: 0.2
formula_dims: [64, 128, 256]
cross_attn_heads: 2
use_cls: False

# -- GAT params
attn_heads: [12,12,12]

# - Molecule encoder (GNN)
gnn_channels: [64,128,256] 
gnn_type: "gcn"
num_gnn_layers: 3
gnn_hidden_dim: 512
gnn_dropout: 0.3