# Experiment setup job_key: '' run_name: 'filip_large' run_details: "" project_name: '' wandb_entity_name: 'mass-spec-ml' no_wandb: True seed: 0 debug: False checkpoint_pth: #'../pretrained_models/msgym_formSpec.ckpt' # Training setup max_epochs: 2000 accelerator: 'gpu' devices: [1] log_every_n_steps: 250 val_check_interval: 1.0 # Data paths candidates_pth: ../data/sample/candidates_mass.json dataset_pth: ../data/MassSpecGym/data/sample_data.tsv subformula_dir_pth: ../data/MassSpecGym/data/subformulae_default split_pth: fp_dir_pth: '../data/MassSpecGym/data/morganfp_r5_1024.pickle' cons_spec_dir_pth: "../data/MassSpecGym/data/sample_consensus_formSpec.pkl" NL_spec_dir_pth: "" partial_checkpoint: "" # General hyperparameters batch_size: 64 lr: 5.0e-05 weight_decay: 0 contr_temp: 0.05 early_stopping_patience: 300 loss_strategy: 'static' num_workers: 50 ############################## Data transforms ############################## # - Spectra spectra_view: SpecFormula # 1. Binner max_mz: 1000 bin_width: 1 mask_peak_ratio: 0.00 # 2. SpecFormula element_list: ['H', 'C', 'O', 'N', 'P', 'S', 'Cl', 'F', 'Br', 'I', 'B', 'As', 'Si', 'Se'] add_intensities: True mask_precursor: False # - Molecule molecule_view: "MolGraph" atom_feature: 'full' bond_feature: 'full' ############################## Views ############################## # contrastive use_contr: False contr_wt: 1 contr_wt_update: {} # consensus spectra use_cons_spec: False cons_spec_wt: 3 cons_spec_wt_update: {} cons_loss_type: 'l2' # cosine, l2 # fp prediction/usage pred_fp: False use_fp: False fp_loss_type: 'cosine' #cosine, bce fp_wt: 3 fp_wt_update: {} fp_size: 1024 fp_radius: 5 fp_dropout: 0.4 # candidates aug_cands: False aug_cands_wt: 0.1 aug_cands_update: {} aug_cands_size: 3 # neutral loss use_NL: False ############################## Task and model ############################## task: 'retrieval' spec_enc: Transformer_Formula mol_enc: "GNN" model: MultiviewContrastive contr_views: [['spec_enc', 'mol_enc'], ['spec_enc', 'NL_spec_enc'], ['mol_enc', 'NL_spec_enc']] #[['spec_enc', 'mol_enc'], ['mol_enc', 'cons_spec_enc'], ['cons_spec_enc', 'spec_enc'], ['fp_enc', 'mol_enc'], ['fp_enc', 'spec_enc'], ['fp_enc', 'cons_spec_enc']] log_only_loss_at_stages: [] df_test_path: "" # - Spectra encoder final_embedding_dim: 512 fc_dropout: 0.4 # - Spectra Token encoder hidden_dims: [64, 128] peak_dropout: 0.2 # - Formula-based spec encoders formula_dropout: 0.2 formula_dims: [64, 128, 256] cross_attn_heads: 2 use_cls: False # -- GAT params attn_heads: [12,12,12] # - Molecule encoder (GNN) gnn_channels: [64,128,256] gnn_type: "gcn" num_gnn_layers: 3 gnn_hidden_dim: 512 gnn_dropout: 0.3