from __future__ import annotations import argparse import json import sys from pathlib import Path from typing import Any ROOT_DIR = Path(__file__).resolve().parent.parent if str(ROOT_DIR) not in sys.path: sys.path.insert(0, str(ROOT_DIR)) from scripts import audit_goal_readiness, check_deployment_readiness, check_research_sources, refresh_research_evidence, research_watchlist def prove_local_readiness( out: Path = ROOT_DIR / "outputs" / "local-readiness.json", check_representative_links: bool = False, check_key_links: bool = False, check_hf_metadata: bool = False, hf_metadata_report: Path | None = None, verify_audio_pipeline: bool = False, voice_id: str = "espeak-ar-clear", audio_format: str = "wav", max_speech_chars: int = 240, refresh_research: bool = False, research_refresh_report: Path | None = None, ) -> dict[str, Any]: out.parent.mkdir(parents=True, exist_ok=True) research_refresh: dict[str, Any] | None = None if refresh_research: research_refresh_report = research_refresh_report or out.parent / "research-refresh.json" research_refresh = refresh_research_evidence.refresh_research_evidence(report_out=research_refresh_report) if check_hf_metadata and hf_metadata_report is None: hf_metadata_report = out.parent / "hf-model-metadata.md" research_checks = check_research_sources.collect_command_checks( check_key_links=check_key_links, check_representative_links=check_representative_links, check_hf_metadata=check_hf_metadata, write_hf_metadata_report=hf_metadata_report if check_hf_metadata else None, ) research = check_research_sources.summarize(research_checks) license_policy_violations = research_watchlist.license_policy_violations(research_watchlist.CANDIDATES) license_policy = { "ready": not license_policy_violations, "violations": license_policy_violations, } deployment_checks = check_deployment_readiness.collect_checks() deployment = check_deployment_readiness.summarize(deployment_checks) audit_checks = audit_goal_readiness.collect_checks() audit = audit_goal_readiness.summarize(audit_checks) pipeline: dict[str, Any] | None = None if verify_audio_pipeline: try: from scripts.verify_pipeline import create_sample_pdf, verify_pipeline sample_pdf = create_sample_pdf(out.parent / "local-readiness-sample.pdf") audio_out = out.parent / f"local-readiness-audio.{audio_format}" pipeline = { "ok": True, "result": verify_pipeline( sample_pdf, voice_id, audio_out, audio_format=audio_format, max_speech_chars=max_speech_chars, ), } except Exception as exc: pipeline = {"ok": False, "error": str(exc)} ready = bool( research["ready"] and license_policy["ready"] and deployment["ready"] and audit["ready"] and (research_refresh is None or research_refresh["ready"]) and (pipeline is None or pipeline["ok"]) ) result = { "ready": ready, "complete": bool(audit["complete"]), "notes": [ "This proves local/repo readiness. Final completion still requires deployed Vercel site and worker reports.", "Run scripts/prove_live_deployment.py after the Vercel site and worker are live.", ], "research": research, "researchRefresh": research_refresh, "licensePolicy": license_policy, "deployment": deployment, "audit": audit, "pipeline": pipeline, "hfMetadataReport": str(hf_metadata_report) if hf_metadata_report else None, } out.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8") result["report"] = str(out) return result def print_summary(result: dict[str, Any]) -> None: print(f"Local readiness: {'ready' if result['ready'] else 'not ready'}") print(f"Goal complete: {'yes' if result['complete'] else 'no, live site and worker reports still required'}") print(f"Report: {result['report']}") print() for section in ["research", "licensePolicy", "deployment", "audit"]: summary = result[section] counts = summary.get("counts", {}) print(f"{section}: ready={summary.get('ready')} counts={counts}") if result.get("researchRefresh"): refresh = result["researchRefresh"] source_counts = refresh.get("sourceSummary", {}).get("counts", {}) live_counts = refresh.get("liveHfMetadata", {}).get("counts", {}) print( "researchRefresh: " f"ready={refresh.get('ready')} source={source_counts} " f"liveHf={live_counts} report={refresh.get('report')}" ) pipeline = result.get("pipeline") if pipeline is not None: print(f"pipeline: ok={pipeline.get('ok')}") if not pipeline.get("ok"): print(f"pipeline error: {pipeline.get('error')}") if result.get("hfMetadataReport"): print(f"Hugging Face metadata report: {result['hfMetadataReport']}") def main() -> None: parser = argparse.ArgumentParser(description="Write a local readiness proof report for the Arabic audio reader.") parser.add_argument("--out", type=Path, default=ROOT_DIR / "outputs" / "local-readiness.json") parser.add_argument("--check-representative-links", action="store_true", help="Fetch representative research links.") parser.add_argument("--check-key-links", action="store_true", help="Fetch exact key OCR/TTS/hosting source links.") parser.add_argument("--check-hf-metadata", action="store_true", help="Fetch Hugging Face model metadata.") parser.add_argument( "--hf-metadata-report", type=Path, help="Markdown output path for --check-hf-metadata. Defaults beside the readiness report.", ) parser.add_argument("--verify-audio-pipeline", action="store_true", help="Run a short local PDF-to-audio smoke test.") parser.add_argument("--voice-id", default="espeak-ar-clear", help="Voice id for --verify-audio-pipeline.") parser.add_argument("--format", choices=["wav", "mp3"], default="wav", help="Audio format for --verify-audio-pipeline.") parser.add_argument("--max-speech-chars", type=int, default=240, help="Short smoke-test character limit.") parser.add_argument("--refresh-research", action="store_true", help="Refresh research watchlist/recommendation/metadata reports before auditing local readiness.") parser.add_argument("--research-refresh-report", type=Path, help="JSON report path for --refresh-research. Defaults beside the readiness report.") parser.add_argument("--json", action="store_true", help="Print JSON.") args = parser.parse_args() result = prove_local_readiness( out=args.out, check_representative_links=args.check_representative_links, check_key_links=args.check_key_links, check_hf_metadata=args.check_hf_metadata, hf_metadata_report=args.hf_metadata_report, verify_audio_pipeline=args.verify_audio_pipeline, voice_id=args.voice_id, audio_format=args.format, max_speech_chars=args.max_speech_chars, refresh_research=args.refresh_research, research_refresh_report=args.research_refresh_report, ) if args.json: print(json.dumps(result, ensure_ascii=False, indent=2)) else: print_summary(result) if not result["ready"]: raise SystemExit(1) if __name__ == "__main__": main()