jw-search / scripts /run-ocr-benchmark.py
jw-tools's picture
deploy: latest main (lazy-ML cold start, durable launcher, web-image search, scene search) + full-app data refresh
7ea1851 verified
#!/usr/bin/env python3
"""
Score OCR predictions against a ground-truth annotation file.
This gives us a stable benchmark before wiring OCR into production indexing.
"""
from __future__ import annotations
import argparse
import json
import sys
from pathlib import Path
ROOT_DIR = Path(__file__).resolve().parents[1]
BACKEND_DIR = ROOT_DIR / "backend"
if str(BACKEND_DIR) not in sys.path:
sys.path.insert(0, str(BACKEND_DIR))
from ocr_ground_truth import build_benchmark_report, load_ground_truth, load_predictions # noqa: E402
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="Score OCR predictions against Search-UI ground truth.")
parser.add_argument("--ground-truth", required=True, type=Path, help="Ground-truth JSON file")
parser.add_argument("--predictions", required=True, type=Path, help="Predictions JSON file")
parser.add_argument(
"--report",
type=Path,
default=ROOT_DIR / "docs" / "reports" / "ocr-benchmark-report.json",
help="Where to write the report JSON",
)
return parser
def main() -> int:
args = build_parser().parse_args()
annotations = load_ground_truth(args.ground_truth)
predictions = load_predictions(args.predictions)
report = build_benchmark_report(annotations, predictions)
args.report.parent.mkdir(parents=True, exist_ok=True)
args.report.write_text(json.dumps(report, indent=2, ensure_ascii=False), encoding="utf-8")
summary = report["summary"]
print(
"OCR benchmark:"
f" segments={summary['segments']}"
f" exact_match_rate={summary['exact_match_rate']}"
f" avg_similarity={summary['average_normalized_edit_similarity']}"
f" junk_rate={summary['junk_prediction_rate']}"
f" missing_rate={summary['missing_prediction_rate']}"
)
print(f"Report: {args.report}")
return 0
if __name__ == "__main__":
raise SystemExit(main())