SVSTR-Score / sv_model_meta.json
khyeom's picture
Release v1.0: HPRC-trained 35/21-feature calibrated SV+STR models (#1)
3c7d0d1
Raw
History Blame Contribute Delete
15.6 kB
{
"variant": "sv",
"feature_cols": [
"is_pass",
"svtype_DEL",
"svtype_DUP",
"svtype_INS",
"svtype_INV",
"svtype_BND",
"svlen_log",
"cipos_width",
"ciend_width",
"is_imprecise",
"pe_support",
"sr_support",
"total_support",
"vaf",
"gt_hom",
"gq",
"qual_norm",
"local_depth",
"gc_min",
"gc_max",
"entropy_min",
"microhom_max",
"in_segdup_either",
"in_segdup_both",
"in_difficult_either",
"in_difficult_both",
"in_lowmap_either",
"in_tandem_either",
"in_Alu_either",
"in_L1_either",
"in_SVA_either",
"in_LTR_either",
"frac_span_repeat",
"n_neighbors",
"nn_log_dist"
],
"n_features": 35,
"tier_edges": [
0.3,
0.5,
0.7
],
"tier_names": [
"LOW",
"Warning",
"Moderate",
"High"
],
"missing_sentinel": -99999.0,
"rf_params": {
"bootstrap": true,
"ccp_alpha": 0.0,
"class_weight": "balanced_subsample",
"criterion": "gini",
"max_depth": null,
"max_features": "sqrt",
"max_leaf_nodes": null,
"max_samples": null,
"min_impurity_decrease": 0.0,
"min_samples_leaf": 20,
"min_samples_split": 2,
"min_weight_fraction_leaf": 0.0,
"monotonic_cst": null,
"n_estimators": 400,
"n_jobs": -1,
"oob_score": false,
"random_state": 42,
"verbose": 0,
"warm_start": false
},
"n_train_rows": 2575116,
"n_samples": 208,
"qc": {
"label_rows_raw": 2782190,
"label_dist_raw": {
"concordant": 1530286,
"discordant": 1251904
},
"label_rows_usable": 2782190,
"ambiguous_keys_dropped": 9462,
"ambiguous_feat_rows": 18568,
"ambiguous_label_rows": 19114,
"dup_keys_feature": 18568,
"dup_keys_label": 19114,
"merged_rows": 2575116,
"match_rate_vs_labels": 0.9255715820989939,
"match_rate_vs_features": 1.0,
"class_balance": {
"concordant": 1511906,
"discordant": 1063210
},
"concordant_rate": 0.5871215121959554
},
"importances": {
"impurity": [
{
"feature": "svlen_log",
"impurity_importance": 0.14231107100051552
},
{
"feature": "svtype_BND",
"impurity_importance": 0.13513441637259968
},
{
"feature": "nn_log_dist",
"impurity_importance": 0.06888043456708229
},
{
"feature": "svtype_DUP",
"impurity_importance": 0.05967552431932549
},
{
"feature": "cipos_width",
"impurity_importance": 0.05563805020798381
},
{
"feature": "svtype_DEL",
"impurity_importance": 0.053612638587210104
},
{
"feature": "sr_support",
"impurity_importance": 0.04950031891627467
},
{
"feature": "vaf",
"impurity_importance": 0.04822805852199238
},
{
"feature": "qual_norm",
"impurity_importance": 0.04321008187665855
},
{
"feature": "svtype_INS",
"impurity_importance": 0.03671741257598638
},
{
"feature": "ciend_width",
"impurity_importance": 0.027655467190250325
},
{
"feature": "local_depth",
"impurity_importance": 0.024667786835611386
},
{
"feature": "microhom_max",
"impurity_importance": 0.023198612318248754
},
{
"feature": "is_imprecise",
"impurity_importance": 0.02244493530457544
},
{
"feature": "frac_span_repeat",
"impurity_importance": 0.02223685870094091
},
{
"feature": "entropy_min",
"impurity_importance": 0.02149966456515826
},
{
"feature": "pe_support",
"impurity_importance": 0.018807543132767727
},
{
"feature": "gc_min",
"impurity_importance": 0.018609267758191137
},
{
"feature": "gq",
"impurity_importance": 0.017999043161167707
},
{
"feature": "gc_max",
"impurity_importance": 0.01691329606031783
},
{
"feature": "total_support",
"impurity_importance": 0.01639193545906872
},
{
"feature": "gt_hom",
"impurity_importance": 0.014227746565587479
},
{
"feature": "n_neighbors",
"impurity_importance": 0.013414404592739407
},
{
"feature": "in_difficult_both",
"impurity_importance": 0.01186932037949349
},
{
"feature": "is_pass",
"impurity_importance": 0.011331347970027637
},
{
"feature": "in_tandem_either",
"impurity_importance": 0.006678373764632643
},
{
"feature": "in_lowmap_either",
"impurity_importance": 0.004352768263433798
},
{
"feature": "in_Alu_either",
"impurity_importance": 0.004250882794571496
},
{
"feature": "in_difficult_either",
"impurity_importance": 0.004067397992242813
},
{
"feature": "in_segdup_either",
"impurity_importance": 0.001790844691729305
},
{
"feature": "in_segdup_both",
"impurity_importance": 0.001567017346771133
},
{
"feature": "in_L1_either",
"impurity_importance": 0.0014931685060102253
},
{
"feature": "in_LTR_either",
"impurity_importance": 0.001243669659616864
},
{
"feature": "in_SVA_either",
"impurity_importance": 0.00038064004121650836
},
{
"feature": "svtype_INV",
"impurity_importance": 0.0
}
],
"permutation": [
{
"feature": "svlen_log",
"perm_importance_mean": 0.04194058803806115,
"perm_importance_std": 0.0007089186317830076
},
{
"feature": "nn_log_dist",
"perm_importance_mean": 0.019778079687185944,
"perm_importance_std": 0.0002553374546546104
},
{
"feature": "svtype_DEL",
"perm_importance_mean": 0.018689927317770305,
"perm_importance_std": 0.0002501511162311443
},
{
"feature": "cipos_width",
"perm_importance_mean": 0.017400205941047363,
"perm_importance_std": 0.00021672163272084185
},
{
"feature": "svtype_BND",
"perm_importance_mean": 0.015007739432103828,
"perm_importance_std": 0.00026380208237979455
},
{
"feature": "qual_norm",
"perm_importance_mean": 0.013299944084231186,
"perm_importance_std": 0.00013420815060181905
},
{
"feature": "gc_min",
"perm_importance_mean": 0.012263946411167393,
"perm_importance_std": 0.0001367518392425017
},
{
"feature": "entropy_min",
"perm_importance_mean": 0.01175411732205851,
"perm_importance_std": 0.00010827028744123835
},
{
"feature": "pe_support",
"perm_importance_mean": 0.011431666717043187,
"perm_importance_std": 0.0002522902431662052
},
{
"feature": "vaf",
"perm_importance_mean": 0.010811854209153338,
"perm_importance_std": 0.000206278576011294
},
{
"feature": "svtype_DUP",
"perm_importance_mean": 0.010256370264851777,
"perm_importance_std": 0.00011705190707458093
},
{
"feature": "frac_span_repeat",
"perm_importance_mean": 0.009963778341341434,
"perm_importance_std": 0.0003054705160430916
},
{
"feature": "local_depth",
"perm_importance_mean": 0.009591312749510661,
"perm_importance_std": 9.987579143240777e-05
},
{
"feature": "gc_max",
"perm_importance_mean": 0.009472834004097907,
"perm_importance_std": 3.73573580102502e-05
},
{
"feature": "gq",
"perm_importance_mean": 0.009203283859545164,
"perm_importance_std": 0.00010130110270414088
},
{
"feature": "sr_support",
"perm_importance_mean": 0.008346937601715121,
"perm_importance_std": 9.766321902716232e-05
},
{
"feature": "microhom_max",
"perm_importance_mean": 0.0074945231035745685,
"perm_importance_std": 3.858741211472431e-05
},
{
"feature": "svtype_INS",
"perm_importance_mean": 0.007191278115888,
"perm_importance_std": 6.25382140306982e-05
},
{
"feature": "total_support",
"perm_importance_mean": 0.0071782291459881135,
"perm_importance_std": 9.233685802608801e-05
},
{
"feature": "is_pass",
"perm_importance_mean": 0.006096510294509483,
"perm_importance_std": 0.00018164506103297094
},
{
"feature": "n_neighbors",
"perm_importance_mean": 0.005750802897597151,
"perm_importance_std": 5.2407508065749236e-05
},
{
"feature": "in_difficult_both",
"perm_importance_mean": 0.005015233708107925,
"perm_importance_std": 0.00018000694143725676
},
{
"feature": "ciend_width",
"perm_importance_mean": 0.004891217221742616,
"perm_importance_std": 8.693371224059806e-05
},
{
"feature": "in_tandem_either",
"perm_importance_mean": 0.0043522978742952965,
"perm_importance_std": 0.00013096877220331791
},
{
"feature": "gt_hom",
"perm_importance_mean": 0.00323902471619224,
"perm_importance_std": 6.580316549205161e-05
},
{
"feature": "in_lowmap_either",
"perm_importance_mean": 0.002848785493209416,
"perm_importance_std": 6.035487488575199e-05
},
{
"feature": "in_Alu_either",
"perm_importance_mean": 0.002534492148327061,
"perm_importance_std": 8.942851729207139e-05
},
{
"feature": "in_difficult_either",
"perm_importance_mean": 0.002091988241603948,
"perm_importance_std": 5.002148003217409e-05
},
{
"feature": "is_imprecise",
"perm_importance_mean": 0.001979962861476592,
"perm_importance_std": 6.0060264184685146e-05
},
{
"feature": "in_L1_either",
"perm_importance_mean": 0.0011150058316057754,
"perm_importance_std": 2.43935928349277e-05
},
{
"feature": "in_LTR_either",
"perm_importance_mean": 0.0006375153501665843,
"perm_importance_std": 3.5907563733425047e-05
},
{
"feature": "in_segdup_either",
"perm_importance_mean": 0.0006168866779678206,
"perm_importance_std": 2.936960388188349e-05
},
{
"feature": "in_segdup_both",
"perm_importance_mean": 0.0005383371585652164,
"perm_importance_std": 3.300168720103858e-05
},
{
"feature": "in_SVA_either",
"perm_importance_mean": 0.00017570394306039018,
"perm_importance_std": 3.211137729753612e-06
},
{
"feature": "svtype_INV",
"perm_importance_mean": 0.0,
"perm_importance_std": 0.0
}
]
},
"finalized_unix": 1782044045,
"cv_report": {
"overall": {
"n": 2575116,
"pos_rate": 0.5871215121959554,
"auroc": 0.9501778843150939,
"auprc": 0.9511739760033938,
"brier": 0.07706030782330338,
"logloss": 0.2628003224452437
},
"calibration": [
{
"bin": "[0.0,0.1)",
"n": 701606,
"mean_pred": 0.018278732155845624,
"obs_rate": 0.008842569761376044
},
{
"bin": "[0.1,0.2)",
"n": 77383,
"mean_pred": 0.1443051591972469,
"obs_rate": 0.10029334608376517
},
{
"bin": "[0.2,0.3)",
"n": 53690,
"mean_pred": 0.24934861469329547,
"obs_rate": 0.19802570311044887
},
{
"bin": "[0.3,0.4)",
"n": 57370,
"mean_pred": 0.35138865912279116,
"obs_rate": 0.31784905002614605
},
{
"bin": "[0.4,0.5)",
"n": 70890,
"mean_pred": 0.4521905700361762,
"obs_rate": 0.45171392297926366
},
{
"bin": "[0.5,0.6)",
"n": 98358,
"mean_pred": 0.5530042682672152,
"obs_rate": 0.5941153744484434
},
{
"bin": "[0.6,0.7)",
"n": 158080,
"mean_pred": 0.6545151929144646,
"obs_rate": 0.7394863360323887
},
{
"bin": "[0.7,0.8)",
"n": 264297,
"mean_pred": 0.7534285409368959,
"obs_rate": 0.8542737904705691
},
{
"bin": "[0.8,0.9)",
"n": 407540,
"mean_pred": 0.8554391726312989,
"obs_rate": 0.922547480001963
},
{
"bin": "[0.9,1.0)",
"n": 685902,
"mean_pred": 0.9410329493878745,
"obs_rate": 0.962179728299378
}
],
"per_sample_auroc": {
"n_samples": 208,
"median": 0.9812468623755227,
"p25": 0.9541872315128539,
"p75": 0.9849148282479339,
"min": 0.8241255348590714,
"max": 0.9884727126524584
},
"by_svtype": {
"BND": {
"n": 545562,
"pos_rate": 0.05967241120165994,
"auroc": 0.9578724482896165,
"auprc": 0.7205291737443336,
"brier": 0.03197870532515856,
"logloss": 0.11268068083789429
},
"DEL": {
"n": 1168706,
"pos_rate": 0.7785747655954535,
"auroc": 0.8919672267722084,
"auprc": 0.9557196435784474,
"brier": 0.0922761808544916,
"logloss": 0.3149783791403593
},
"DUP": {
"n": 156534,
"pos_rate": 0.023419832113151136,
"auroc": 0.9936692829891886,
"auprc": 0.8545445334945798,
"brier": 0.009443816981288416,
"logloss": 0.03587112293286734
},
"INS": {
"n": 704314,
"pos_rate": 0.8032780833548673,
"auroc": 0.8549802852596324,
"auprc": 0.9494948614191011,
"brier": 0.1017598124373945,
"logloss": 0.3429363119373415
}
},
"by_size": {
"1-10kb": {
"n": 171605,
"pos_rate": 0.7607121004632732,
"auroc": 0.9296938425020418,
"auprc": 0.967200346507,
"brier": 0.06806650111282099,
"logloss": 0.2455626050274475
},
"10-100kb": {
"n": 42645,
"pos_rate": 0.21177160276703014,
"auroc": 0.9920116885561145,
"auprc": 0.9676826349150152,
"brier": 0.029729192191828648,
"logloss": 0.10986227030572825
},
"100bp-1kb": {
"n": 906987,
"pos_rate": 0.7867466678133204,
"auroc": 0.8976865414893258,
"auprc": 0.9590485952985468,
"brier": 0.08302330275831495,
"logloss": 0.28853539649876037
},
"<100bp": {
"n": 733616,
"pos_rate": 0.7488359032518375,
"auroc": 0.867906684781433,
"auprc": 0.93767984701013,
"brier": 0.11261501240225187,
"logloss": 0.3687373180284347
},
">100kb": {
"n": 54796,
"pos_rate": 0.00839477334111979,
"auroc": 0.9897200830900804,
"auprc": 0.788119068363777,
"brier": 0.004907638846204342,
"logloss": 0.029204468344063542
},
"NA": {
"n": 665467,
"pos_rate": 0.16371360262792894,
"auroc": 0.976441017719166,
"auprc": 0.9070303608956529,
"brier": 0.04103092730468184,
"logloss": 0.1444199784010817
}
}
}
}