Tabular Classification
Scikit-learn
Joblib
genomics
structural-variants
short-tandem-repeats
variant-calling
confidence-calibration
random-forest
Instructions to use khyeom/SVSTR-Score with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Scikit-learn
How to use khyeom/SVSTR-Score with Scikit-learn:
from huggingface_hub import hf_hub_download import joblib model = joblib.load( hf_hub_download("khyeom/SVSTR-Score", "sklearn_model.joblib") ) # only load pickle files from sources you trust # read more about it here https://skops.readthedocs.io/en/stable/persistence.html - Notebooks
- Google Colab
- Kaggle
File size: 2,789 Bytes
90d0b4b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | """Command-line interface for SV-SPR.
Usage
-----
# Score every SV in a VCF
python -m svspr.cli --vcf input.vcf --ref GRCh38.fa --out scored.tsv
# Score a single SV
python -m svspr.cli --one --chrom chr1 --pos 100000 --end 101000 \\
--svtype DEL --svlen 1000 --alt-support 15 --ref GRCh38.fa
"""
from __future__ import annotations
import argparse
import json
import sys
from .model import SVSPR, _DEFAULT_MODEL_PATH
def build_parser() -> argparse.ArgumentParser:
p = argparse.ArgumentParser(
prog='svspr',
description='SV-SPR — caller-agnostic SV confidence scorer.')
p.add_argument('--ref', required=True,
help='Reference FASTA (e.g. GRCh38). Must be indexed (.fai).')
p.add_argument('--model', default=str(_DEFAULT_MODEL_PATH),
help='Path to model pkl. Default: bundled svspr_v14_seq.pkl.')
mode = p.add_mutually_exclusive_group(required=True)
mode.add_argument('--vcf', help='Input VCF to score (batch mode).')
mode.add_argument('--one', action='store_true',
help='Score one SV (use --chrom / --pos / --end / --svtype / --svlen).')
p.add_argument('--out', default='-',
help='Output TSV path. "-" for stdout (batch mode only).')
# Single-SV args
p.add_argument('--chrom')
p.add_argument('--pos', type=int)
p.add_argument('--end', type=int)
p.add_argument('--svtype', choices=['DEL', 'INS', 'DUP', 'BND'])
p.add_argument('--svlen', type=int)
p.add_argument('--alt-support', type=float, default=0.0,
help='Generic alt read support (PR+SR for Manta, AD for Delly...). '
'Defaults to 0 if unknown (model is sequence-only by default).')
return p
def main(argv=None):
args = build_parser().parse_args(argv)
model = SVSPR(args.model)
if args.one:
for k in ('chrom', 'pos', 'end', 'svtype', 'svlen'):
if getattr(args, k) is None:
sys.exit(f'--one mode requires --{k}')
result = model.predict_one(
chrom=args.chrom, pos=args.pos, end=args.end,
svtype=args.svtype, svlen=args.svlen,
total_alt_support=args.alt_support, ref_path=args.ref)
print(json.dumps(result, indent=2))
return 0
df = model.predict_vcf(args.vcf, args.ref)
cols = ['chrom', 'pos', 'end', 'svtype', 'svlen', 'CS', 'tier']
out_df = df[cols] if all(c in df.columns for c in cols) else df
if args.out == '-':
out_df.to_csv(sys.stdout, sep='\t', index=False)
else:
out_df.to_csv(args.out, sep='\t', index=False)
print(f'Wrote {len(out_df):,} rows to {args.out}', file=sys.stderr)
return 0
if __name__ == '__main__':
sys.exit(main())
|