Tabular Classification
Scikit-learn
Joblib
genomics
structural-variants
short-tandem-repeats
variant-calling
confidence-calibration
random-forest
Instructions to use khyeom/SVSTR-Score with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Scikit-learn
How to use khyeom/SVSTR-Score with Scikit-learn:
from huggingface_hub import hf_hub_download import joblib model = joblib.load( hf_hub_download("khyeom/SVSTR-Score", "sklearn_model.joblib") ) # only load pickle files from sources you trust # read more about it here https://skops.readthedocs.io/en/stable/persistence.html - Notebooks
- Google Colab
- Kaggle
| { | |
| "variant": "sv", | |
| "feature_cols": [ | |
| "is_pass", | |
| "svtype_DEL", | |
| "svtype_DUP", | |
| "svtype_INS", | |
| "svtype_INV", | |
| "svtype_BND", | |
| "svlen_log", | |
| "cipos_width", | |
| "ciend_width", | |
| "is_imprecise", | |
| "pe_support", | |
| "sr_support", | |
| "total_support", | |
| "vaf", | |
| "gt_hom", | |
| "gq", | |
| "qual_norm", | |
| "local_depth", | |
| "gc_min", | |
| "gc_max", | |
| "entropy_min", | |
| "microhom_max", | |
| "in_segdup_either", | |
| "in_segdup_both", | |
| "in_difficult_either", | |
| "in_difficult_both", | |
| "in_lowmap_either", | |
| "in_tandem_either", | |
| "in_Alu_either", | |
| "in_L1_either", | |
| "in_SVA_either", | |
| "in_LTR_either", | |
| "frac_span_repeat", | |
| "n_neighbors", | |
| "nn_log_dist" | |
| ], | |
| "n_features": 35, | |
| "tier_edges": [ | |
| 0.3, | |
| 0.5, | |
| 0.7 | |
| ], | |
| "tier_names": [ | |
| "LOW", | |
| "Warning", | |
| "Moderate", | |
| "High" | |
| ], | |
| "missing_sentinel": -99999.0, | |
| "rf_params": { | |
| "bootstrap": true, | |
| "ccp_alpha": 0.0, | |
| "class_weight": "balanced_subsample", | |
| "criterion": "gini", | |
| "max_depth": null, | |
| "max_features": "sqrt", | |
| "max_leaf_nodes": null, | |
| "max_samples": null, | |
| "min_impurity_decrease": 0.0, | |
| "min_samples_leaf": 20, | |
| "min_samples_split": 2, | |
| "min_weight_fraction_leaf": 0.0, | |
| "monotonic_cst": null, | |
| "n_estimators": 400, | |
| "n_jobs": -1, | |
| "oob_score": false, | |
| "random_state": 42, | |
| "verbose": 0, | |
| "warm_start": false | |
| }, | |
| "n_train_rows": 2575116, | |
| "n_samples": 208, | |
| "qc": { | |
| "label_rows_raw": 2782190, | |
| "label_dist_raw": { | |
| "concordant": 1530286, | |
| "discordant": 1251904 | |
| }, | |
| "label_rows_usable": 2782190, | |
| "ambiguous_keys_dropped": 9462, | |
| "ambiguous_feat_rows": 18568, | |
| "ambiguous_label_rows": 19114, | |
| "dup_keys_feature": 18568, | |
| "dup_keys_label": 19114, | |
| "merged_rows": 2575116, | |
| "match_rate_vs_labels": 0.9255715820989939, | |
| "match_rate_vs_features": 1.0, | |
| "class_balance": { | |
| "concordant": 1511906, | |
| "discordant": 1063210 | |
| }, | |
| "concordant_rate": 0.5871215121959554 | |
| }, | |
| "importances": { | |
| "impurity": [ | |
| { | |
| "feature": "svlen_log", | |
| "impurity_importance": 0.14231107100051552 | |
| }, | |
| { | |
| "feature": "svtype_BND", | |
| "impurity_importance": 0.13513441637259968 | |
| }, | |
| { | |
| "feature": "nn_log_dist", | |
| "impurity_importance": 0.06888043456708229 | |
| }, | |
| { | |
| "feature": "svtype_DUP", | |
| "impurity_importance": 0.05967552431932549 | |
| }, | |
| { | |
| "feature": "cipos_width", | |
| "impurity_importance": 0.05563805020798381 | |
| }, | |
| { | |
| "feature": "svtype_DEL", | |
| "impurity_importance": 0.053612638587210104 | |
| }, | |
| { | |
| "feature": "sr_support", | |
| "impurity_importance": 0.04950031891627467 | |
| }, | |
| { | |
| "feature": "vaf", | |
| "impurity_importance": 0.04822805852199238 | |
| }, | |
| { | |
| "feature": "qual_norm", | |
| "impurity_importance": 0.04321008187665855 | |
| }, | |
| { | |
| "feature": "svtype_INS", | |
| "impurity_importance": 0.03671741257598638 | |
| }, | |
| { | |
| "feature": "ciend_width", | |
| "impurity_importance": 0.027655467190250325 | |
| }, | |
| { | |
| "feature": "local_depth", | |
| "impurity_importance": 0.024667786835611386 | |
| }, | |
| { | |
| "feature": "microhom_max", | |
| "impurity_importance": 0.023198612318248754 | |
| }, | |
| { | |
| "feature": "is_imprecise", | |
| "impurity_importance": 0.02244493530457544 | |
| }, | |
| { | |
| "feature": "frac_span_repeat", | |
| "impurity_importance": 0.02223685870094091 | |
| }, | |
| { | |
| "feature": "entropy_min", | |
| "impurity_importance": 0.02149966456515826 | |
| }, | |
| { | |
| "feature": "pe_support", | |
| "impurity_importance": 0.018807543132767727 | |
| }, | |
| { | |
| "feature": "gc_min", | |
| "impurity_importance": 0.018609267758191137 | |
| }, | |
| { | |
| "feature": "gq", | |
| "impurity_importance": 0.017999043161167707 | |
| }, | |
| { | |
| "feature": "gc_max", | |
| "impurity_importance": 0.01691329606031783 | |
| }, | |
| { | |
| "feature": "total_support", | |
| "impurity_importance": 0.01639193545906872 | |
| }, | |
| { | |
| "feature": "gt_hom", | |
| "impurity_importance": 0.014227746565587479 | |
| }, | |
| { | |
| "feature": "n_neighbors", | |
| "impurity_importance": 0.013414404592739407 | |
| }, | |
| { | |
| "feature": "in_difficult_both", | |
| "impurity_importance": 0.01186932037949349 | |
| }, | |
| { | |
| "feature": "is_pass", | |
| "impurity_importance": 0.011331347970027637 | |
| }, | |
| { | |
| "feature": "in_tandem_either", | |
| "impurity_importance": 0.006678373764632643 | |
| }, | |
| { | |
| "feature": "in_lowmap_either", | |
| "impurity_importance": 0.004352768263433798 | |
| }, | |
| { | |
| "feature": "in_Alu_either", | |
| "impurity_importance": 0.004250882794571496 | |
| }, | |
| { | |
| "feature": "in_difficult_either", | |
| "impurity_importance": 0.004067397992242813 | |
| }, | |
| { | |
| "feature": "in_segdup_either", | |
| "impurity_importance": 0.001790844691729305 | |
| }, | |
| { | |
| "feature": "in_segdup_both", | |
| "impurity_importance": 0.001567017346771133 | |
| }, | |
| { | |
| "feature": "in_L1_either", | |
| "impurity_importance": 0.0014931685060102253 | |
| }, | |
| { | |
| "feature": "in_LTR_either", | |
| "impurity_importance": 0.001243669659616864 | |
| }, | |
| { | |
| "feature": "in_SVA_either", | |
| "impurity_importance": 0.00038064004121650836 | |
| }, | |
| { | |
| "feature": "svtype_INV", | |
| "impurity_importance": 0.0 | |
| } | |
| ], | |
| "permutation": [ | |
| { | |
| "feature": "svlen_log", | |
| "perm_importance_mean": 0.04194058803806115, | |
| "perm_importance_std": 0.0007089186317830076 | |
| }, | |
| { | |
| "feature": "nn_log_dist", | |
| "perm_importance_mean": 0.019778079687185944, | |
| "perm_importance_std": 0.0002553374546546104 | |
| }, | |
| { | |
| "feature": "svtype_DEL", | |
| "perm_importance_mean": 0.018689927317770305, | |
| "perm_importance_std": 0.0002501511162311443 | |
| }, | |
| { | |
| "feature": "cipos_width", | |
| "perm_importance_mean": 0.017400205941047363, | |
| "perm_importance_std": 0.00021672163272084185 | |
| }, | |
| { | |
| "feature": "svtype_BND", | |
| "perm_importance_mean": 0.015007739432103828, | |
| "perm_importance_std": 0.00026380208237979455 | |
| }, | |
| { | |
| "feature": "qual_norm", | |
| "perm_importance_mean": 0.013299944084231186, | |
| "perm_importance_std": 0.00013420815060181905 | |
| }, | |
| { | |
| "feature": "gc_min", | |
| "perm_importance_mean": 0.012263946411167393, | |
| "perm_importance_std": 0.0001367518392425017 | |
| }, | |
| { | |
| "feature": "entropy_min", | |
| "perm_importance_mean": 0.01175411732205851, | |
| "perm_importance_std": 0.00010827028744123835 | |
| }, | |
| { | |
| "feature": "pe_support", | |
| "perm_importance_mean": 0.011431666717043187, | |
| "perm_importance_std": 0.0002522902431662052 | |
| }, | |
| { | |
| "feature": "vaf", | |
| "perm_importance_mean": 0.010811854209153338, | |
| "perm_importance_std": 0.000206278576011294 | |
| }, | |
| { | |
| "feature": "svtype_DUP", | |
| "perm_importance_mean": 0.010256370264851777, | |
| "perm_importance_std": 0.00011705190707458093 | |
| }, | |
| { | |
| "feature": "frac_span_repeat", | |
| "perm_importance_mean": 0.009963778341341434, | |
| "perm_importance_std": 0.0003054705160430916 | |
| }, | |
| { | |
| "feature": "local_depth", | |
| "perm_importance_mean": 0.009591312749510661, | |
| "perm_importance_std": 9.987579143240777e-05 | |
| }, | |
| { | |
| "feature": "gc_max", | |
| "perm_importance_mean": 0.009472834004097907, | |
| "perm_importance_std": 3.73573580102502e-05 | |
| }, | |
| { | |
| "feature": "gq", | |
| "perm_importance_mean": 0.009203283859545164, | |
| "perm_importance_std": 0.00010130110270414088 | |
| }, | |
| { | |
| "feature": "sr_support", | |
| "perm_importance_mean": 0.008346937601715121, | |
| "perm_importance_std": 9.766321902716232e-05 | |
| }, | |
| { | |
| "feature": "microhom_max", | |
| "perm_importance_mean": 0.0074945231035745685, | |
| "perm_importance_std": 3.858741211472431e-05 | |
| }, | |
| { | |
| "feature": "svtype_INS", | |
| "perm_importance_mean": 0.007191278115888, | |
| "perm_importance_std": 6.25382140306982e-05 | |
| }, | |
| { | |
| "feature": "total_support", | |
| "perm_importance_mean": 0.0071782291459881135, | |
| "perm_importance_std": 9.233685802608801e-05 | |
| }, | |
| { | |
| "feature": "is_pass", | |
| "perm_importance_mean": 0.006096510294509483, | |
| "perm_importance_std": 0.00018164506103297094 | |
| }, | |
| { | |
| "feature": "n_neighbors", | |
| "perm_importance_mean": 0.005750802897597151, | |
| "perm_importance_std": 5.2407508065749236e-05 | |
| }, | |
| { | |
| "feature": "in_difficult_both", | |
| "perm_importance_mean": 0.005015233708107925, | |
| "perm_importance_std": 0.00018000694143725676 | |
| }, | |
| { | |
| "feature": "ciend_width", | |
| "perm_importance_mean": 0.004891217221742616, | |
| "perm_importance_std": 8.693371224059806e-05 | |
| }, | |
| { | |
| "feature": "in_tandem_either", | |
| "perm_importance_mean": 0.0043522978742952965, | |
| "perm_importance_std": 0.00013096877220331791 | |
| }, | |
| { | |
| "feature": "gt_hom", | |
| "perm_importance_mean": 0.00323902471619224, | |
| "perm_importance_std": 6.580316549205161e-05 | |
| }, | |
| { | |
| "feature": "in_lowmap_either", | |
| "perm_importance_mean": 0.002848785493209416, | |
| "perm_importance_std": 6.035487488575199e-05 | |
| }, | |
| { | |
| "feature": "in_Alu_either", | |
| "perm_importance_mean": 0.002534492148327061, | |
| "perm_importance_std": 8.942851729207139e-05 | |
| }, | |
| { | |
| "feature": "in_difficult_either", | |
| "perm_importance_mean": 0.002091988241603948, | |
| "perm_importance_std": 5.002148003217409e-05 | |
| }, | |
| { | |
| "feature": "is_imprecise", | |
| "perm_importance_mean": 0.001979962861476592, | |
| "perm_importance_std": 6.0060264184685146e-05 | |
| }, | |
| { | |
| "feature": "in_L1_either", | |
| "perm_importance_mean": 0.0011150058316057754, | |
| "perm_importance_std": 2.43935928349277e-05 | |
| }, | |
| { | |
| "feature": "in_LTR_either", | |
| "perm_importance_mean": 0.0006375153501665843, | |
| "perm_importance_std": 3.5907563733425047e-05 | |
| }, | |
| { | |
| "feature": "in_segdup_either", | |
| "perm_importance_mean": 0.0006168866779678206, | |
| "perm_importance_std": 2.936960388188349e-05 | |
| }, | |
| { | |
| "feature": "in_segdup_both", | |
| "perm_importance_mean": 0.0005383371585652164, | |
| "perm_importance_std": 3.300168720103858e-05 | |
| }, | |
| { | |
| "feature": "in_SVA_either", | |
| "perm_importance_mean": 0.00017570394306039018, | |
| "perm_importance_std": 3.211137729753612e-06 | |
| }, | |
| { | |
| "feature": "svtype_INV", | |
| "perm_importance_mean": 0.0, | |
| "perm_importance_std": 0.0 | |
| } | |
| ] | |
| }, | |
| "finalized_unix": 1782044045, | |
| "cv_report": { | |
| "overall": { | |
| "n": 2575116, | |
| "pos_rate": 0.5871215121959554, | |
| "auroc": 0.9501778843150939, | |
| "auprc": 0.9511739760033938, | |
| "brier": 0.07706030782330338, | |
| "logloss": 0.2628003224452437 | |
| }, | |
| "calibration": [ | |
| { | |
| "bin": "[0.0,0.1)", | |
| "n": 701606, | |
| "mean_pred": 0.018278732155845624, | |
| "obs_rate": 0.008842569761376044 | |
| }, | |
| { | |
| "bin": "[0.1,0.2)", | |
| "n": 77383, | |
| "mean_pred": 0.1443051591972469, | |
| "obs_rate": 0.10029334608376517 | |
| }, | |
| { | |
| "bin": "[0.2,0.3)", | |
| "n": 53690, | |
| "mean_pred": 0.24934861469329547, | |
| "obs_rate": 0.19802570311044887 | |
| }, | |
| { | |
| "bin": "[0.3,0.4)", | |
| "n": 57370, | |
| "mean_pred": 0.35138865912279116, | |
| "obs_rate": 0.31784905002614605 | |
| }, | |
| { | |
| "bin": "[0.4,0.5)", | |
| "n": 70890, | |
| "mean_pred": 0.4521905700361762, | |
| "obs_rate": 0.45171392297926366 | |
| }, | |
| { | |
| "bin": "[0.5,0.6)", | |
| "n": 98358, | |
| "mean_pred": 0.5530042682672152, | |
| "obs_rate": 0.5941153744484434 | |
| }, | |
| { | |
| "bin": "[0.6,0.7)", | |
| "n": 158080, | |
| "mean_pred": 0.6545151929144646, | |
| "obs_rate": 0.7394863360323887 | |
| }, | |
| { | |
| "bin": "[0.7,0.8)", | |
| "n": 264297, | |
| "mean_pred": 0.7534285409368959, | |
| "obs_rate": 0.8542737904705691 | |
| }, | |
| { | |
| "bin": "[0.8,0.9)", | |
| "n": 407540, | |
| "mean_pred": 0.8554391726312989, | |
| "obs_rate": 0.922547480001963 | |
| }, | |
| { | |
| "bin": "[0.9,1.0)", | |
| "n": 685902, | |
| "mean_pred": 0.9410329493878745, | |
| "obs_rate": 0.962179728299378 | |
| } | |
| ], | |
| "per_sample_auroc": { | |
| "n_samples": 208, | |
| "median": 0.9812468623755227, | |
| "p25": 0.9541872315128539, | |
| "p75": 0.9849148282479339, | |
| "min": 0.8241255348590714, | |
| "max": 0.9884727126524584 | |
| }, | |
| "by_svtype": { | |
| "BND": { | |
| "n": 545562, | |
| "pos_rate": 0.05967241120165994, | |
| "auroc": 0.9578724482896165, | |
| "auprc": 0.7205291737443336, | |
| "brier": 0.03197870532515856, | |
| "logloss": 0.11268068083789429 | |
| }, | |
| "DEL": { | |
| "n": 1168706, | |
| "pos_rate": 0.7785747655954535, | |
| "auroc": 0.8919672267722084, | |
| "auprc": 0.9557196435784474, | |
| "brier": 0.0922761808544916, | |
| "logloss": 0.3149783791403593 | |
| }, | |
| "DUP": { | |
| "n": 156534, | |
| "pos_rate": 0.023419832113151136, | |
| "auroc": 0.9936692829891886, | |
| "auprc": 0.8545445334945798, | |
| "brier": 0.009443816981288416, | |
| "logloss": 0.03587112293286734 | |
| }, | |
| "INS": { | |
| "n": 704314, | |
| "pos_rate": 0.8032780833548673, | |
| "auroc": 0.8549802852596324, | |
| "auprc": 0.9494948614191011, | |
| "brier": 0.1017598124373945, | |
| "logloss": 0.3429363119373415 | |
| } | |
| }, | |
| "by_size": { | |
| "1-10kb": { | |
| "n": 171605, | |
| "pos_rate": 0.7607121004632732, | |
| "auroc": 0.9296938425020418, | |
| "auprc": 0.967200346507, | |
| "brier": 0.06806650111282099, | |
| "logloss": 0.2455626050274475 | |
| }, | |
| "10-100kb": { | |
| "n": 42645, | |
| "pos_rate": 0.21177160276703014, | |
| "auroc": 0.9920116885561145, | |
| "auprc": 0.9676826349150152, | |
| "brier": 0.029729192191828648, | |
| "logloss": 0.10986227030572825 | |
| }, | |
| "100bp-1kb": { | |
| "n": 906987, | |
| "pos_rate": 0.7867466678133204, | |
| "auroc": 0.8976865414893258, | |
| "auprc": 0.9590485952985468, | |
| "brier": 0.08302330275831495, | |
| "logloss": 0.28853539649876037 | |
| }, | |
| "<100bp": { | |
| "n": 733616, | |
| "pos_rate": 0.7488359032518375, | |
| "auroc": 0.867906684781433, | |
| "auprc": 0.93767984701013, | |
| "brier": 0.11261501240225187, | |
| "logloss": 0.3687373180284347 | |
| }, | |
| ">100kb": { | |
| "n": 54796, | |
| "pos_rate": 0.00839477334111979, | |
| "auroc": 0.9897200830900804, | |
| "auprc": 0.788119068363777, | |
| "brier": 0.004907638846204342, | |
| "logloss": 0.029204468344063542 | |
| }, | |
| "NA": { | |
| "n": 665467, | |
| "pos_rate": 0.16371360262792894, | |
| "auroc": 0.976441017719166, | |
| "auprc": 0.9070303608956529, | |
| "brier": 0.04103092730468184, | |
| "logloss": 0.1444199784010817 | |
| } | |
| } | |
| } | |
| } |