{ "data_cfg": { "dataset_path": "../dataset/data/processed/peptides_bbb_with_augmentation.parquet", "fold_col": "fold_id", "id_col": "peptide_id", "label_col": "bbb_label", "random_state": 42, "sequence_col": "sequence", "struct_manifest_path": "../dataset/data/processed/peptides_struct_manifest.parquet", "tabular_exclude": [ "peptide_id", "sequence", "bbb_label", "split", "source_split", "source_db", "label_tier", "is_gold", "cluster_id", "external_test", "source_id", "assay_method", "reference", "organism", "fold_id", "is_augmented", "parent_peptide_id", "sample_weight" ], "test_size": 0.2, "three_d_columns": [] }, "exp_cfg": { "esm": { "cache_dir": "artifacts/cache/esm2" }, "features": { "use_3d": false, "use_esm": true, "use_gnn": false, "use_tabular": true }, "mixup": { "alpha": 0.2, "enabled": true, "prob": 0.5 }, "model": { "dropout": 0.2, "esm_dim": 128, "hidden_dim": 256 }, "model_type": "esm_tab_mlp", "name": "exp06_esm_tab_mlp_aug" }, "tab_cols": [ "length", "mw", "ext_coef_reduced", "ext_coef_oxidized", "hydrophobic_ratio_pct", "pi", "net_charge_ph7", "total_charge", "mean_hydrophobicity", "hydrophobicity_ph7", "hydrophilic_ratio", "aliphatic_index", "boman_index", "aromaticity", "instability_index", "gravy", "charge_density", "aa_basic_pct", "aa_acidic_pct", "aa_aromatic_pct", "aa_hydrophobic_pct", "aa_polar_pct", "hydrophobic_moment", "mw_pyteomics", "mw_delta_abs" ], "train_cfg": { "calibration": { "enabled": true, "method": "isotonic" }, "maximize_metric": true, "output": { "keep_top_k": 1, "root": "artifacts", "save_periodic_every": 5 }, "primary_metric": "pr_auc", "run_name": "default", "secondary_metric": "mcc", "seed": 42, "tracking": { "mlflow": true, "mlflow_experiment": "bbb_classifier", "tensorboard": true }, "training": { "batch_size": 128, "epochs": 50, "eval_every": 1, "grad_clip": 1.0, "log_every": 5, "lr": 0.001, "num_workers": 4, "patience": 8, "weight_decay": 0.0001 } } }