# Experiment setup job_key: '' run_name: 'binnedSpec_experiment' run_details: "" project_name: '' wandb_entity_name: 'mass-spec-ml' no_wandb: True seed: 0 debug: False checkpoint_pth: "" # Training setup max_epochs: 1000 accelerator: 'gpu' devices: [1] log_every_n_steps: 250 val_check_interval: 1.0 # Data paths candidates_pth: ../data/sample/candidates_mass.json dataset_pth: "../data/sample/data.tsv" subformula_dir_pth: "" split_pth: fp_dir_pth: '../data/sample/morganfp_r5_1024.pickle' cons_spec_dir_pth: "../data/sample/consensus_binnedSpec.pkl" NL_spec_dir_pth: "" partial_checkpoint: "" # General hyperparameters batch_size: 64 lr: 5.0e-4 weight_decay: 0 contr_temp: 0.05 early_stopping_patience: 300 loss_strategy: 'static' # static, linear, manual num_workers: 50 ############################## Data transforms ############################## # - Spectra spectra_view: SpecBinnerLog max_mz: 1000 bin_width: 1 mask_peak_ratio: 0.00 # 2. SpecFormula element_list: ['H', 'C', 'O', 'N', 'P', 'S', 'Cl', 'F', 'Br', 'I', 'B', 'As', 'Si', 'Se'] add_intensities: True mask_precursor: False # - Molecule molecule_view: "MolGraph" atom_feature: 'full' bond_feature: 'full' ############################## Views ############################## # contrastive use_contr: True contr_wt: 1 contr_wt_update: {} # consensus spectra use_cons_spec: False cons_spec_wt: 3 cons_spec_wt_update: {} cons_loss_type: 'l2' # cosine, l2 # fp prediction/usage pred_fp: False use_fp: False fp_loss_type: 'cosine' #cosine, bce fp_wt: 3 fp_wt_update: {} fp_size: 1024 fp_radius: 5 fp_dropout: 0.4 # candidates aug_cands: False aug_cands_wt: 0.1 aug_cands_update: {} aug_cands_size: 3 # neutral loss use_NL: False ############################## Task and model ############################## task: 'retrieval' spec_enc: MLP_BIN mol_enc: "GNN" model: "MultiviewContrastive" contr_views: [['spec_enc', 'mol_enc']] log_only_loss_at_stages: [] df_test_path: "" # - Spectra encoder final_embedding_dim: 512 fc_dropout: 0.4 # - Spectra Token encoder hidden_dims: [64, 128] peak_dropout: 0.2 # - Formula-based spec encoders formula_dropout: 0.2 formula_dims: [64, 128, 256] cross_attn_heads: 2 use_cls: True # -- GAT params attn_heads: [12,12,12] # - Molecule encoder (GNN) gnn_channels: [64,128,256] gnn_type: "gcn" num_gnn_layers: 3 gnn_hidden_dim: 512 gnn_dropout: 0.3