File size: 2,658 Bytes
082d661
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/env python3
"""Run the OCR output-quality + document-analysis benchmark across all available
backends (OpenBMB MiniCPM-V, Cohere Aya-Vision, Tesseract, sidecar) and PUBLISH the
results.

    python scripts/ocr_quality.py

Writes:
  backend/evals/ocr_quality_report.json   (committed, tracked)
  <writable>/metrics_snapshots/ocr_quality_<ts>.json  (published snapshot)
"""
from __future__ import annotations

import json
import sys
import time
from pathlib import Path

ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(ROOT / "backend"))

from app.config import get_settings  # noqa: E402
from app.db import Database  # noqa: E402
from app.metrics import MetricsStore  # noqa: E402
from app.ocr.backends import build_ocr_registry  # noqa: E402
from app.ocr.quality import run_ocr_quality  # noqa: E402
from app.providers import build_registry  # noqa: E402
from app.rag_store import VectorStore  # noqa: E402
from app.router import ModelRouter  # noqa: E402

REPORT = ROOT / "backend" / "evals" / "ocr_quality_report.json"


def main() -> None:
    s = get_settings()
    metrics = MetricsStore(s.metrics_db_path)
    router = ModelRouter(build_registry(s), s, metrics)
    ocr = build_ocr_registry(s)
    db = Database(s.app_db_path)
    rag = VectorStore(s.rag_db_path)

    report = run_ocr_quality(s, ocr, router, metrics, db=db, rag_store=rag)
    REPORT.write_text(json.dumps(report, indent=2))
    snap_dir = s.writable_dir / "metrics_snapshots"
    snap_dir.mkdir(parents=True, exist_ok=True)
    (snap_dir / f"ocr_quality_{time.strftime('%Y%m%dT%H%M%S')}.json").write_text(json.dumps(report, indent=2))

    pct = lambda v: "n/a" if v is None else f"{v*100:.1f}%"
    print("\n" + "=" * 90)
    print(" OCR OUTPUT QUALITY + DOCUMENT ANALYSIS  (smaller CER/WER = better; higher field-acc = better)")
    print("=" * 90)
    print(f" {'backend':<11}{'model':<17}{'params':>7}{'CER':>8}{'WER':>8}{'field-exact':>13}{'F1':>8}{'lat(ms)':>9}{'$/doc':>9}")
    print("-" * 90)
    for r in report["backends"]:
        params = f"{r['params_b']}B" if r.get("params_b") else "—"
        print(f" {r['backend']:<11}{(r.get('model') or '')[:16]:<17}{params:>7}"
              f"{pct(r['cer']):>8}{pct(r['wer']):>8}{pct(r['field_exact_match']):>13}"
              f"{pct(r['field_f1']):>8}{(r['avg_latency_ms'] or 0):>9.0f}{(r['avg_cost_usd'] or 0):>9.5f}")
    print("-" * 90)
    print(f" best OCR text quality   : {report['best_ocr_quality']}")
    print(f" best document analysis  : {report['best_document_analysis']}")
    print(f" published → {REPORT}")
    print("=" * 90 + "\n")


if __name__ == "__main__":
    main()