{ "variant": "str", "created_unix": 1782043477, "feature_cols": [ "is_pass", "motif_len", "ref_copynum", "gt_repcn_max", "gt_repcn_min", "expansion_over_ref", "repci_width_max", "spanning_reads", "flanking_reads", "inrepeat_reads", "locus_depth", "gt_hom", "ref_tract_bp", "spanning_frac", "allele_vs_readlen", "motif_is_homopolymer", "gc_flank", "entropy_flank", "in_segdup", "in_difficult", "flank_lowmap" ], "n_features": 21, "tier_edges": [ 0.3, 0.5, 0.7 ], "tier_names": [ "LOW", "Warning", "Moderate", "High" ], "missing_sentinel": -99999.0, "rf_params": { "bootstrap": true, "ccp_alpha": 0.0, "class_weight": "balanced_subsample", "criterion": "gini", "max_depth": null, "max_features": "sqrt", "max_leaf_nodes": null, "max_samples": 2000000, "min_impurity_decrease": 0.0, "min_samples_leaf": 50, "min_samples_split": 2, "min_weight_fraction_leaf": 0.0, "monotonic_cst": null, "n_estimators": 300, "n_jobs": -1, "oob_score": false, "random_state": 42, "verbose": 0, "warm_start": false }, "n_train_rows": 22651133, "n_samples": 208, "qc": { "label_rows_raw": 36254400, "label_dist_raw": { "concordant": 21350382, "discordant": 13838163, "unlabeled": 1065855 }, "label_rows_usable": 35188545, "ambiguous_keys_dropped": 0, "ambiguous_feat_rows": 0, "ambiguous_label_rows": 0, "dup_keys_feature": 0, "dup_keys_label": 0, "merged_rows": 22651133, "match_rate_vs_labels": 0.6437075758602693, "match_rate_vs_features": 0.9832673629385175, "class_balance": { "concordant": 13960015, "discordant": 8691118 }, "concordant_rate": 0.6163053742168217 }, "cv_folds": 5, "cv_fold_metrics": [ { "n": 4469639, "pos_rate": 0.6172603648751052, "auroc": 0.8345731588413778, "auprc": 0.8868311937682424, "brier": 0.16715199887480572, "logloss": 0.505031384190826, "fold": 0, "seconds": 404.5 }, { "n": 4469658, "pos_rate": 0.6172628867801518, "auroc": 0.8348793797657998, "auprc": 0.8871277104956028, "brier": 0.16710046702995693, "logloss": 0.5048207582711781, "fold": 1, "seconds": 457.3 }, { "n": 4569998, "pos_rate": 0.6173429397562099, "auroc": 0.8345632397054213, "auprc": 0.8867279699640327, "brier": 0.16717765756008388, "logloss": 0.5050623605875793, "fold": 2, "seconds": 480.6 }, { "n": 4570859, "pos_rate": 0.6168989242503433, "auroc": 0.8350534258010407, "auprc": 0.8870572426757822, "brier": 0.1669604630273807, "logloss": 0.5044600822147348, "fold": 3, "seconds": 546.9 }, { "n": 4570979, "pos_rate": 0.6128043904817765, "auroc": 0.8317845587452297, "auprc": 0.8823297885222531, "brier": 0.16790578845588436, "logloss": 0.5066430427730261, "fold": 4, "seconds": 558.6 } ], "cv_report": { "overall": { "n": 22651133, "pos_rate": 0.6163053742168217, "auroc": 0.8341539493365068, "auprc": 0.885996637709877, "brier": 0.16726047042063633, "logloss": 0.5052060179718258 }, "calibration": [ { "bin": "[0.0,0.1)", "n": 759079, "mean_pred": 0.06623314081824806, "obs_rate": 0.027333123429840636 }, { "bin": "[0.1,0.2)", "n": 1807689, "mean_pred": 0.15353118408631086, "obs_rate": 0.1398288090484591 }, { "bin": "[0.2,0.3)", "n": 2278662, "mean_pred": 0.250703986073481, "obs_rate": 0.2854271497922904 }, { "bin": "[0.3,0.4)", "n": 2401825, "mean_pred": 0.35114505321433914, "obs_rate": 0.4219845325950059 }, { "bin": "[0.4,0.5)", "n": 2503890, "mean_pred": 0.4496778698066448, "obs_rate": 0.5559477453083003 }, { "bin": "[0.5,0.6)", "n": 2743182, "mean_pred": 0.5514420283736253, "obs_rate": 0.6633803371413198 }, { "bin": "[0.6,0.7)", "n": 3201411, "mean_pred": 0.6513120336728542, "obs_rate": 0.7673941271520589 }, { "bin": "[0.7,0.8)", "n": 2972899, "mean_pred": 0.7478180823491758, "obs_rate": 0.8596629081579966 }, { "bin": "[0.8,0.9)", "n": 2979925, "mean_pred": 0.8513437073854806, "obs_rate": 0.9412015403072225 }, { "bin": "[0.9,1.0)", "n": 1002571, "mean_pred": 0.9221679799864609, "obs_rate": 0.9910769411842154 } ], "per_sample_auroc": { "n_samples": 208, "median": 0.8353140721290141, "p25": 0.8326614184016954, "p75": 0.8373927525350378, "min": 0.740174387702103, "max": 0.8401855333526593 }, "by_homopolymer": { "homopolymer": { "n": 176, "pos_rate": 0.0, "auroc": null, "auprc": null, "brier": 0.12461994174893026 }, "other": { "n": 22650957, "pos_rate": 0.6163101629657414, "auroc": 0.8341526308855854, "auprc": 0.8859973231761953, "brier": 0.16726080174142982, "logloss": 0.5052065639352175 } }, "by_is_pass": { "PASS": { "n": 22645309, "pos_rate": 0.6163365225000904, "auroc": 0.8341536917536043, "auprc": 0.8860084593752011, "brier": 0.1672574382686718, "logloss": 0.505198302627369 }, "nonPASS": { "n": 5824, "pos_rate": 0.4951923076923077, "auroc": 0.821139738835895, "auprc": 0.8249088115206255, "brier": 0.17905030870563365, "logloss": 0.5352053928461165 } } }, "importances": { "impurity": [ { "feature": "entropy_flank", "impurity_importance": 0.28992320685730033 }, { "feature": "motif_len", "impurity_importance": 0.15078304844246473 }, { "feature": "gc_flank", "impurity_importance": 0.11765967510912077 }, { "feature": "ref_tract_bp", "impurity_importance": 0.09594543197447271 }, { "feature": "allele_vs_readlen", "impurity_importance": 0.06304989891121958 }, { "feature": "ref_copynum", "impurity_importance": 0.06281644250839796 }, { "feature": "gt_repcn_max", "impurity_importance": 0.045375808024477604 }, { "feature": "gt_repcn_min", "impurity_importance": 0.04503548319154128 }, { "feature": "flanking_reads", "impurity_importance": 0.04081082547154657 }, { "feature": "spanning_frac", "impurity_importance": 0.02788421749138721 }, { "feature": "expansion_over_ref", "impurity_importance": 0.017739812221077934 }, { "feature": "locus_depth", "impurity_importance": 0.014556405292958223 }, { "feature": "spanning_reads", "impurity_importance": 0.011672664495590936 }, { "feature": "in_difficult", "impurity_importance": 0.009656418449637608 }, { "feature": "gt_hom", "impurity_importance": 0.0024291103645865167 }, { "feature": "in_segdup", "impurity_importance": 0.001648983588740384 }, { "feature": "flank_lowmap", "impurity_importance": 0.001477948437034436 }, { "feature": "repci_width_max", "impurity_importance": 0.0012018133362063474 }, { "feature": "inrepeat_reads", "impurity_importance": 0.00033183288445321743 }, { "feature": "is_pass", "impurity_importance": 9.729477856164029e-07 }, { "feature": "motif_is_homopolymer", "impurity_importance": 0.0 } ], "permutation": [ { "feature": "entropy_flank", "perm_importance_mean": 0.13934060781658777, "perm_importance_std": 0.0006361765279266924 }, { "feature": "motif_len", "perm_importance_mean": 0.1232472127797279, "perm_importance_std": 0.0005893220011599711 }, { "feature": "gc_flank", "perm_importance_mean": 0.06320217026546789, "perm_importance_std": 0.00039522027338993824 }, { "feature": "ref_tract_bp", "perm_importance_mean": 0.056776687651067095, "perm_importance_std": 0.00015236878123781785 }, { "feature": "ref_copynum", "perm_importance_mean": 0.02267318905161917, "perm_importance_std": 0.00014989102435837524 }, { "feature": "allele_vs_readlen", "perm_importance_mean": 0.020529595235711205, "perm_importance_std": 0.00017190103816491447 }, { "feature": "gt_repcn_min", "perm_importance_mean": 0.01731383830567197, "perm_importance_std": 0.000195043199990813 }, { "feature": "gt_repcn_max", "perm_importance_mean": 0.014405902490600276, "perm_importance_std": 0.00013955774976049523 }, { "feature": "expansion_over_ref", "perm_importance_mean": 0.008579439049389648, "perm_importance_std": 8.141211169349268e-05 }, { "feature": "flanking_reads", "perm_importance_mean": 0.005908979701386818, "perm_importance_std": 8.933000723756271e-05 }, { "feature": "spanning_frac", "perm_importance_mean": 0.005236130437139996, "perm_importance_std": 4.831785228506296e-05 }, { "feature": "in_difficult", "perm_importance_mean": 0.003852866555695589, "perm_importance_std": 2.129084797378384e-05 }, { "feature": "spanning_reads", "perm_importance_mean": 0.0029217009056680563, "perm_importance_std": 4.176582259464099e-05 }, { "feature": "gt_hom", "perm_importance_mean": 0.002172501389781667, "perm_importance_std": 8.3379119655914e-06 }, { "feature": "locus_depth", "perm_importance_mean": 0.0020709165127682284, "perm_importance_std": 2.549011860464095e-05 }, { "feature": "in_segdup", "perm_importance_mean": 0.0009386532858458585, "perm_importance_std": 1.750671402431846e-05 }, { "feature": "flank_lowmap", "perm_importance_mean": 0.0005812032061902617, "perm_importance_std": 1.3028115550094254e-05 }, { "feature": "repci_width_max", "perm_importance_mean": 0.00026026760399893155, "perm_importance_std": 1.492427417015547e-05 }, { "feature": "inrepeat_reads", "perm_importance_mean": 5.1632608300478114e-05, "perm_importance_std": 5.166444569830962e-06 }, { "feature": "is_pass", "perm_importance_mean": 5.758337677796987e-08, "perm_importance_std": 3.3427425855204445e-08 }, { "feature": "motif_is_homopolymer", "perm_importance_mean": 0.0, "perm_importance_std": 0.0 } ] } }