from __future__ import annotations import argparse import json import sys from pathlib import Path from typing import Any ROOT_DIR = Path(__file__).resolve().parent.parent if str(ROOT_DIR) not in sys.path: sys.path.insert(0, str(ROOT_DIR)) from scripts import check_research_sources, research_watchlist DEFAULT_REPORT = ROOT_DIR / "outputs" / "research-refresh.json" def refresh_research_evidence( *, watchlist_report: Path = ROOT_DIR / "docs" / "research-watchlist.md", recommendation_report: Path = ROOT_DIR / "docs" / "recommended-free-stack.md", decision_card_json: Path = ROOT_DIR / "docs" / "recommended-decision-card.json", decision_card_report: Path = ROOT_DIR / "docs" / "recommended-decision-card.md", metadata_report: Path = ROOT_DIR / "docs" / "huggingface-model-metadata.md", report_out: Path = DEFAULT_REPORT, require_live_hf_metadata: bool = False, timeout: float = 12.0, ) -> dict[str, Any]: research_watchlist.write_report(watchlist_report, research_watchlist.CANDIDATES) research_watchlist.write_recommendation_report(recommendation_report, research_watchlist.CANDIDATES) research_watchlist.write_decision_card_json(decision_card_json, research_watchlist.CANDIDATES) research_watchlist.write_decision_card_report(decision_card_report, research_watchlist.CANDIDATES) hf_checks = check_research_sources.collect_huggingface_metadata_checks(timeout=timeout) check_research_sources.write_huggingface_metadata_report(metadata_report, hf_checks) source_checks = check_research_sources.collect_checks(metadata_path=metadata_report) source_checks.extend(check_research_sources.collect_key_source_checks()) source_summary = check_research_sources.summarize(source_checks) license_violations = research_watchlist.license_policy_violations(research_watchlist.CANDIDATES) live_hf_failures = [check for check in hf_checks if not check.ok] ready = bool( source_summary["ready"] and not license_violations and (not require_live_hf_metadata or not live_hf_failures) ) result: dict[str, Any] = { "ready": ready, "watchlistReport": str(watchlist_report), "recommendationReport": str(recommendation_report), "decisionCardJson": str(decision_card_json), "decisionCardReport": str(decision_card_report), "metadataReport": str(metadata_report), "sourceSummary": source_summary, "licensePolicy": { "ready": not license_violations, "violations": license_violations, }, "liveHfMetadata": { "required": require_live_hf_metadata, "counts": { "PASS": sum(1 for check in hf_checks if check.ok), "FAIL": len(live_hf_failures), }, "failures": [check.__dict__ for check in live_hf_failures], }, } report_out.parent.mkdir(parents=True, exist_ok=True) report_out.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8") result["report"] = str(report_out) return result def print_summary(result: dict[str, Any]) -> None: source_counts = result["sourceSummary"].get("counts", {}) live_counts = result["liveHfMetadata"].get("counts", {}) print(f"Research refresh: {'ready' if result['ready'] else 'not ready'}") print(f"Watchlist: {result['watchlistReport']}") print(f"Recommendation: {result['recommendationReport']}") print(f"Decision card JSON: {result['decisionCardJson']}") print(f"Decision card report: {result['decisionCardReport']}") print(f"Metadata: {result['metadataReport']}") print(f"Source checks: {source_counts}") print(f"License violations: {len(result['licensePolicy']['violations'])}") print( "Live HF metadata: " f"required={result['liveHfMetadata']['required']} " f"pass={live_counts.get('PASS', 0)} fail={live_counts.get('FAIL', 0)}" ) print(f"Report: {result['report']}") def main() -> None: parser = argparse.ArgumentParser(description="Refresh Arabic OCR/TTS research evidence and decision reports.") parser.add_argument("--watchlist-report", type=Path, default=ROOT_DIR / "docs" / "research-watchlist.md") parser.add_argument("--recommendation-report", type=Path, default=ROOT_DIR / "docs" / "recommended-free-stack.md") parser.add_argument("--decision-card-json", type=Path, default=ROOT_DIR / "docs" / "recommended-decision-card.json") parser.add_argument("--decision-card-report", type=Path, default=ROOT_DIR / "docs" / "recommended-decision-card.md") parser.add_argument("--metadata-report", type=Path, default=ROOT_DIR / "docs" / "huggingface-model-metadata.md") parser.add_argument("--report-out", type=Path, default=DEFAULT_REPORT) parser.add_argument( "--require-live-hf-metadata", action="store_true", help="Treat failed live Hugging Face metadata fetches as refresh failures.", ) parser.add_argument("--timeout", type=float, default=12.0) parser.add_argument("--json", action="store_true") args = parser.parse_args() result = refresh_research_evidence( watchlist_report=args.watchlist_report, recommendation_report=args.recommendation_report, decision_card_json=args.decision_card_json, decision_card_report=args.decision_card_report, metadata_report=args.metadata_report, report_out=args.report_out, require_live_hf_metadata=args.require_live_hf_metadata, timeout=args.timeout, ) if args.json: print(json.dumps(result, ensure_ascii=False, indent=2)) else: print_summary(result) if not result["ready"]: raise SystemExit(1) if __name__ == "__main__": main()