SVSTR-Score / seqonly /src /cli.py
khyeom's picture
Add sequence-only headline model (svspr_v14_seq, 11-feature) + inference package
90d0b4b verified
Raw
History Blame Contribute Delete
2.79 kB
"""Command-line interface for SV-SPR.
Usage
-----
# Score every SV in a VCF
python -m svspr.cli --vcf input.vcf --ref GRCh38.fa --out scored.tsv
# Score a single SV
python -m svspr.cli --one --chrom chr1 --pos 100000 --end 101000 \\
--svtype DEL --svlen 1000 --alt-support 15 --ref GRCh38.fa
"""
from __future__ import annotations
import argparse
import json
import sys
from .model import SVSPR, _DEFAULT_MODEL_PATH
def build_parser() -> argparse.ArgumentParser:
p = argparse.ArgumentParser(
prog='svspr',
description='SV-SPR — caller-agnostic SV confidence scorer.')
p.add_argument('--ref', required=True,
help='Reference FASTA (e.g. GRCh38). Must be indexed (.fai).')
p.add_argument('--model', default=str(_DEFAULT_MODEL_PATH),
help='Path to model pkl. Default: bundled svspr_v14_seq.pkl.')
mode = p.add_mutually_exclusive_group(required=True)
mode.add_argument('--vcf', help='Input VCF to score (batch mode).')
mode.add_argument('--one', action='store_true',
help='Score one SV (use --chrom / --pos / --end / --svtype / --svlen).')
p.add_argument('--out', default='-',
help='Output TSV path. "-" for stdout (batch mode only).')
# Single-SV args
p.add_argument('--chrom')
p.add_argument('--pos', type=int)
p.add_argument('--end', type=int)
p.add_argument('--svtype', choices=['DEL', 'INS', 'DUP', 'BND'])
p.add_argument('--svlen', type=int)
p.add_argument('--alt-support', type=float, default=0.0,
help='Generic alt read support (PR+SR for Manta, AD for Delly...). '
'Defaults to 0 if unknown (model is sequence-only by default).')
return p
def main(argv=None):
args = build_parser().parse_args(argv)
model = SVSPR(args.model)
if args.one:
for k in ('chrom', 'pos', 'end', 'svtype', 'svlen'):
if getattr(args, k) is None:
sys.exit(f'--one mode requires --{k}')
result = model.predict_one(
chrom=args.chrom, pos=args.pos, end=args.end,
svtype=args.svtype, svlen=args.svlen,
total_alt_support=args.alt_support, ref_path=args.ref)
print(json.dumps(result, indent=2))
return 0
df = model.predict_vcf(args.vcf, args.ref)
cols = ['chrom', 'pos', 'end', 'svtype', 'svlen', 'CS', 'tier']
out_df = df[cols] if all(c in df.columns for c in cols) else df
if args.out == '-':
out_df.to_csv(sys.stdout, sep='\t', index=False)
else:
out_df.to_csv(args.out, sep='\t', index=False)
print(f'Wrote {len(out_df):,} rows to {args.out}', file=sys.stderr)
return 0
if __name__ == '__main__':
sys.exit(main())