| { | |
| "data_cfg": { | |
| "dataset_path": "../dataset/data/processed/peptides_bbb_with_augmentation.parquet", | |
| "fold_col": "fold_id", | |
| "id_col": "peptide_id", | |
| "label_col": "bbb_label", | |
| "random_state": 42, | |
| "sequence_col": "sequence", | |
| "struct_manifest_path": "../dataset/data/processed/peptides_struct_manifest.parquet", | |
| "tabular_exclude": [ | |
| "peptide_id", | |
| "sequence", | |
| "bbb_label", | |
| "split", | |
| "source_split", | |
| "source_db", | |
| "label_tier", | |
| "is_gold", | |
| "cluster_id", | |
| "external_test", | |
| "source_id", | |
| "assay_method", | |
| "reference", | |
| "organism", | |
| "fold_id", | |
| "is_augmented", | |
| "parent_peptide_id", | |
| "sample_weight" | |
| ], | |
| "test_size": 0.2, | |
| "three_d_columns": [] | |
| }, | |
| "exp_cfg": { | |
| "esm": { | |
| "cache_dir": "artifacts/cache/esm2" | |
| }, | |
| "features": { | |
| "use_3d": false, | |
| "use_esm": true, | |
| "use_gnn": false, | |
| "use_tabular": true | |
| }, | |
| "mixup": { | |
| "alpha": 0.2, | |
| "enabled": true, | |
| "prob": 0.5 | |
| }, | |
| "model": { | |
| "dropout": 0.2, | |
| "esm_dim": 128, | |
| "hidden_dim": 256 | |
| }, | |
| "model_type": "esm_tab_mlp", | |
| "name": "exp06_esm_tab_mlp_aug" | |
| }, | |
| "tab_cols": [ | |
| "length", | |
| "mw", | |
| "ext_coef_reduced", | |
| "ext_coef_oxidized", | |
| "hydrophobic_ratio_pct", | |
| "pi", | |
| "net_charge_ph7", | |
| "total_charge", | |
| "mean_hydrophobicity", | |
| "hydrophobicity_ph7", | |
| "hydrophilic_ratio", | |
| "aliphatic_index", | |
| "boman_index", | |
| "aromaticity", | |
| "instability_index", | |
| "gravy", | |
| "charge_density", | |
| "aa_basic_pct", | |
| "aa_acidic_pct", | |
| "aa_aromatic_pct", | |
| "aa_hydrophobic_pct", | |
| "aa_polar_pct", | |
| "hydrophobic_moment", | |
| "mw_pyteomics", | |
| "mw_delta_abs" | |
| ], | |
| "train_cfg": { | |
| "calibration": { | |
| "enabled": true, | |
| "method": "isotonic" | |
| }, | |
| "maximize_metric": true, | |
| "output": { | |
| "keep_top_k": 1, | |
| "root": "artifacts", | |
| "save_periodic_every": 5 | |
| }, | |
| "primary_metric": "pr_auc", | |
| "run_name": "default", | |
| "secondary_metric": "mcc", | |
| "seed": 42, | |
| "tracking": { | |
| "mlflow": true, | |
| "mlflow_experiment": "bbb_classifier", | |
| "tensorboard": true | |
| }, | |
| "training": { | |
| "batch_size": 128, | |
| "epochs": 50, | |
| "eval_every": 1, | |
| "grad_clip": 1.0, | |
| "log_every": 5, | |
| "lr": 0.001, | |
| "num_workers": 4, | |
| "patience": 8, | |
| "weight_decay": 0.0001 | |
| } | |
| } | |
| } |