Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """Run every available OCR backend against real scanned samples and write a | |
| tracked report (backend/evals/ocr_backend_report.json). | |
| python scripts/ocr_smoke.py | |
| Reads backend/.env, so configured backends (e.g. MiniCPM) are exercised live. | |
| Unavailable backends (missing deps/keys) are recorded with the reason. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import sys | |
| from pathlib import Path | |
| ROOT = Path(__file__).resolve().parent.parent | |
| sys.path.insert(0, str(ROOT / "backend")) | |
| from app.config import get_settings # noqa: E402 | |
| from app.ocr.backends import build_ocr_registry # noqa: E402 | |
| from app.ocr.backends.healthcheck import run_ocr_backend_tests # noqa: E402 | |
| REPORT_PATH = ROOT / "backend" / "evals" / "ocr_backend_report.json" | |
| def main() -> None: | |
| s = get_settings() | |
| reg = build_ocr_registry(s) | |
| report = run_ocr_backend_tests(s, reg) | |
| REPORT_PATH.write_text(json.dumps(report, indent=2)) | |
| print("\n" + "=" * 78) | |
| print(f" OCR BACKEND REAL-EXTRACTION REPORT (mode={report['mode']})") | |
| print("=" * 78) | |
| print(f" {'backend':<12}{'tier':<8}{'available':<11}{'functional':<11}{'engine / reason'}") | |
| print("-" * 78) | |
| for b in report["backends"]: | |
| if b["available"]: | |
| case = b["cases"][0] if b["cases"] else {} | |
| detail = f"{case.get('engine','')} ({case.get('chars',0)} chars, {case.get('latency_ms',0)}ms)" | |
| func = "β yes" if b["ok"] else "β no" | |
| else: | |
| detail = b["requires"] | |
| func = "β" | |
| print(f" {b['name']:<12}{b['tier']:<8}{('yes' if b['available'] else 'no'):<11}{func:<11}{detail[:42]}") | |
| print("-" * 78) | |
| print(f" available : {report['available_backends']}") | |
| print(f" functional: {report['functional_backends']}") | |
| print(f" report β {REPORT_PATH}") | |
| print("=" * 78 + "\n") | |
| if __name__ == "__main__": | |
| main() | |