Tabular Classification
Scikit-learn
Joblib
genomics
structural-variants
short-tandem-repeats
variant-calling
confidence-calibration
random-forest
Instructions to use khyeom/SVSTR-Score with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Scikit-learn
How to use khyeom/SVSTR-Score with Scikit-learn:
from huggingface_hub import hf_hub_download import joblib model = joblib.load( hf_hub_download("khyeom/SVSTR-Score", "sklearn_model.joblib") ) # only load pickle files from sources you trust # read more about it here https://skops.readthedocs.io/en/stable/persistence.html - Notebooks
- Google Colab
- Kaggle
| """Command-line interface for SV-SPR. | |
| Usage | |
| ----- | |
| # Score every SV in a VCF | |
| python -m svspr.cli --vcf input.vcf --ref GRCh38.fa --out scored.tsv | |
| # Score a single SV | |
| python -m svspr.cli --one --chrom chr1 --pos 100000 --end 101000 \\ | |
| --svtype DEL --svlen 1000 --alt-support 15 --ref GRCh38.fa | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import json | |
| import sys | |
| from .model import SVSPR, _DEFAULT_MODEL_PATH | |
| def build_parser() -> argparse.ArgumentParser: | |
| p = argparse.ArgumentParser( | |
| prog='svspr', | |
| description='SV-SPR — caller-agnostic SV confidence scorer.') | |
| p.add_argument('--ref', required=True, | |
| help='Reference FASTA (e.g. GRCh38). Must be indexed (.fai).') | |
| p.add_argument('--model', default=str(_DEFAULT_MODEL_PATH), | |
| help='Path to model pkl. Default: bundled svspr_v14_seq.pkl.') | |
| mode = p.add_mutually_exclusive_group(required=True) | |
| mode.add_argument('--vcf', help='Input VCF to score (batch mode).') | |
| mode.add_argument('--one', action='store_true', | |
| help='Score one SV (use --chrom / --pos / --end / --svtype / --svlen).') | |
| p.add_argument('--out', default='-', | |
| help='Output TSV path. "-" for stdout (batch mode only).') | |
| # Single-SV args | |
| p.add_argument('--chrom') | |
| p.add_argument('--pos', type=int) | |
| p.add_argument('--end', type=int) | |
| p.add_argument('--svtype', choices=['DEL', 'INS', 'DUP', 'BND']) | |
| p.add_argument('--svlen', type=int) | |
| p.add_argument('--alt-support', type=float, default=0.0, | |
| help='Generic alt read support (PR+SR for Manta, AD for Delly...). ' | |
| 'Defaults to 0 if unknown (model is sequence-only by default).') | |
| return p | |
| def main(argv=None): | |
| args = build_parser().parse_args(argv) | |
| model = SVSPR(args.model) | |
| if args.one: | |
| for k in ('chrom', 'pos', 'end', 'svtype', 'svlen'): | |
| if getattr(args, k) is None: | |
| sys.exit(f'--one mode requires --{k}') | |
| result = model.predict_one( | |
| chrom=args.chrom, pos=args.pos, end=args.end, | |
| svtype=args.svtype, svlen=args.svlen, | |
| total_alt_support=args.alt_support, ref_path=args.ref) | |
| print(json.dumps(result, indent=2)) | |
| return 0 | |
| df = model.predict_vcf(args.vcf, args.ref) | |
| cols = ['chrom', 'pos', 'end', 'svtype', 'svlen', 'CS', 'tier'] | |
| out_df = df[cols] if all(c in df.columns for c in cols) else df | |
| if args.out == '-': | |
| out_df.to_csv(sys.stdout, sep='\t', index=False) | |
| else: | |
| out_df.to_csv(args.out, sep='\t', index=False) | |
| print(f'Wrote {len(out_df):,} rows to {args.out}', file=sys.stderr) | |
| return 0 | |
| if __name__ == '__main__': | |
| sys.exit(main()) | |