File size: 2,789 Bytes
90d0b4b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
"""Command-line interface for SV-SPR.

Usage
-----
    # Score every SV in a VCF
    python -m svspr.cli --vcf input.vcf --ref GRCh38.fa --out scored.tsv

    # Score a single SV
    python -m svspr.cli --one --chrom chr1 --pos 100000 --end 101000 \\
        --svtype DEL --svlen 1000 --alt-support 15 --ref GRCh38.fa
"""
from __future__ import annotations

import argparse
import json
import sys

from .model import SVSPR, _DEFAULT_MODEL_PATH


def build_parser() -> argparse.ArgumentParser:
    p = argparse.ArgumentParser(
        prog='svspr',
        description='SV-SPR — caller-agnostic SV confidence scorer.')
    p.add_argument('--ref', required=True,
                   help='Reference FASTA (e.g. GRCh38). Must be indexed (.fai).')
    p.add_argument('--model', default=str(_DEFAULT_MODEL_PATH),
                   help='Path to model pkl. Default: bundled svspr_v14_seq.pkl.')

    mode = p.add_mutually_exclusive_group(required=True)
    mode.add_argument('--vcf', help='Input VCF to score (batch mode).')
    mode.add_argument('--one', action='store_true',
                      help='Score one SV (use --chrom / --pos / --end / --svtype / --svlen).')

    p.add_argument('--out', default='-',
                   help='Output TSV path. "-" for stdout (batch mode only).')

    # Single-SV args
    p.add_argument('--chrom')
    p.add_argument('--pos', type=int)
    p.add_argument('--end', type=int)
    p.add_argument('--svtype', choices=['DEL', 'INS', 'DUP', 'BND'])
    p.add_argument('--svlen', type=int)
    p.add_argument('--alt-support', type=float, default=0.0,
                   help='Generic alt read support (PR+SR for Manta, AD for Delly...). '
                        'Defaults to 0 if unknown (model is sequence-only by default).')

    return p


def main(argv=None):
    args = build_parser().parse_args(argv)
    model = SVSPR(args.model)

    if args.one:
        for k in ('chrom', 'pos', 'end', 'svtype', 'svlen'):
            if getattr(args, k) is None:
                sys.exit(f'--one mode requires --{k}')
        result = model.predict_one(
            chrom=args.chrom, pos=args.pos, end=args.end,
            svtype=args.svtype, svlen=args.svlen,
            total_alt_support=args.alt_support, ref_path=args.ref)
        print(json.dumps(result, indent=2))
        return 0

    df = model.predict_vcf(args.vcf, args.ref)
    cols = ['chrom', 'pos', 'end', 'svtype', 'svlen', 'CS', 'tier']
    out_df = df[cols] if all(c in df.columns for c in cols) else df
    if args.out == '-':
        out_df.to_csv(sys.stdout, sep='\t', index=False)
    else:
        out_df.to_csv(args.out, sep='\t', index=False)
        print(f'Wrote {len(out_df):,} rows to {args.out}', file=sys.stderr)
    return 0


if __name__ == '__main__':
    sys.exit(main())