ERP-DocIQ

Running

App Files Files Community

ERP-DocIQ / scripts /ocr_smoke.py

kenmandal

Deploy latest: ERP DocIQ NLQ chatbot + reasoning models (MiniCPM3-4B/Command R7B) + ERP fine-tuning + extreme OCR docs

082d661 verified 17 days ago

Raw

History Blame Contribute Delete

1.9 kB

	#!/usr/bin/env python3
	"""Run every available OCR backend against real scanned samples and write a
	tracked report (backend/evals/ocr_backend_report.json).

	python scripts/ocr_smoke.py

	Reads backend/.env, so configured backends (e.g. MiniCPM) are exercised live.
	Unavailable backends (missing deps/keys) are recorded with the reason.
	"""
	from __future__ import annotations

	import json
	import sys
	from pathlib import Path

	ROOT = Path(__file__).resolve().parent.parent
	sys.path.insert(0, str(ROOT / "backend"))

	from app.config import get_settings # noqa: E402
	from app.ocr.backends import build_ocr_registry # noqa: E402
	from app.ocr.backends.healthcheck import run_ocr_backend_tests # noqa: E402

	REPORT_PATH = ROOT / "backend" / "evals" / "ocr_backend_report.json"


	def main() -> None:
	s = get_settings()
	reg = build_ocr_registry(s)
	report = run_ocr_backend_tests(s, reg)
	REPORT_PATH.write_text(json.dumps(report, indent=2))

	print("\n" + "=" * 78)
	print(f" OCR BACKEND REAL-EXTRACTION REPORT (mode={report['mode']})")
	print("=" * 78)
	print(f" {'backend':<12}{'tier':<8}{'available':<11}{'functional':<11}{'engine / reason'}")
	print("-" * 78)
	for b in report["backends"]:
	if b["available"]:
	case = b["cases"][0] if b["cases"] else {}
	detail = f"{case.get('engine','')} ({case.get('chars',0)} chars, {case.get('latency_ms',0)}ms)"
	func = "✓ yes" if b["ok"] else "✗ no"
	else:
	detail = b["requires"]
	func = "—"
	print(f" {b['name']:<12}{b['tier']:<8}{('yes' if b['available'] else 'no'):<11}{func:<11}{detail[:42]}")
	print("-" * 78)
	print(f" available : {report['available_backends']}")
	print(f" functional: {report['functional_backends']}")
	print(f" report → {REPORT_PATH}")
	print("=" * 78 + "\n")


	if __name__ == "__main__":
	main()