File size: 1,901 Bytes
082d661
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/env python3
"""Run every available OCR backend against real scanned samples and write a
tracked report (backend/evals/ocr_backend_report.json).

    python scripts/ocr_smoke.py

Reads backend/.env, so configured backends (e.g. MiniCPM) are exercised live.
Unavailable backends (missing deps/keys) are recorded with the reason.
"""
from __future__ import annotations

import json
import sys
from pathlib import Path

ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(ROOT / "backend"))

from app.config import get_settings  # noqa: E402
from app.ocr.backends import build_ocr_registry  # noqa: E402
from app.ocr.backends.healthcheck import run_ocr_backend_tests  # noqa: E402

REPORT_PATH = ROOT / "backend" / "evals" / "ocr_backend_report.json"


def main() -> None:
    s = get_settings()
    reg = build_ocr_registry(s)
    report = run_ocr_backend_tests(s, reg)
    REPORT_PATH.write_text(json.dumps(report, indent=2))

    print("\n" + "=" * 78)
    print(f" OCR BACKEND REAL-EXTRACTION REPORT   (mode={report['mode']})")
    print("=" * 78)
    print(f" {'backend':<12}{'tier':<8}{'available':<11}{'functional':<11}{'engine / reason'}")
    print("-" * 78)
    for b in report["backends"]:
        if b["available"]:
            case = b["cases"][0] if b["cases"] else {}
            detail = f"{case.get('engine','')}  ({case.get('chars',0)} chars, {case.get('latency_ms',0)}ms)"
            func = "✓ yes" if b["ok"] else "✗ no"
        else:
            detail = b["requires"]
            func = "—"
        print(f" {b['name']:<12}{b['tier']:<8}{('yes' if b['available'] else 'no'):<11}{func:<11}{detail[:42]}")
    print("-" * 78)
    print(f" available : {report['available_backends']}")
    print(f" functional: {report['functional_backends']}")
    print(f" report → {REPORT_PATH}")
    print("=" * 78 + "\n")


if __name__ == "__main__":
    main()