arabic-audio-reader-worker / scripts /prove_local_readiness.py
Syncre's picture
Deploy Arabic Audio Reader worker
2e1a095 verified
from __future__ import annotations
import argparse
import json
import sys
from pathlib import Path
from typing import Any
ROOT_DIR = Path(__file__).resolve().parent.parent
if str(ROOT_DIR) not in sys.path:
sys.path.insert(0, str(ROOT_DIR))
from scripts import audit_goal_readiness, check_deployment_readiness, check_research_sources, refresh_research_evidence, research_watchlist
def prove_local_readiness(
out: Path = ROOT_DIR / "outputs" / "local-readiness.json",
check_representative_links: bool = False,
check_key_links: bool = False,
check_hf_metadata: bool = False,
hf_metadata_report: Path | None = None,
verify_audio_pipeline: bool = False,
voice_id: str = "espeak-ar-clear",
audio_format: str = "wav",
max_speech_chars: int = 240,
refresh_research: bool = False,
research_refresh_report: Path | None = None,
) -> dict[str, Any]:
out.parent.mkdir(parents=True, exist_ok=True)
research_refresh: dict[str, Any] | None = None
if refresh_research:
research_refresh_report = research_refresh_report or out.parent / "research-refresh.json"
research_refresh = refresh_research_evidence.refresh_research_evidence(report_out=research_refresh_report)
if check_hf_metadata and hf_metadata_report is None:
hf_metadata_report = out.parent / "hf-model-metadata.md"
research_checks = check_research_sources.collect_command_checks(
check_key_links=check_key_links,
check_representative_links=check_representative_links,
check_hf_metadata=check_hf_metadata,
write_hf_metadata_report=hf_metadata_report if check_hf_metadata else None,
)
research = check_research_sources.summarize(research_checks)
license_policy_violations = research_watchlist.license_policy_violations(research_watchlist.CANDIDATES)
license_policy = {
"ready": not license_policy_violations,
"violations": license_policy_violations,
}
deployment_checks = check_deployment_readiness.collect_checks()
deployment = check_deployment_readiness.summarize(deployment_checks)
audit_checks = audit_goal_readiness.collect_checks()
audit = audit_goal_readiness.summarize(audit_checks)
pipeline: dict[str, Any] | None = None
if verify_audio_pipeline:
try:
from scripts.verify_pipeline import create_sample_pdf, verify_pipeline
sample_pdf = create_sample_pdf(out.parent / "local-readiness-sample.pdf")
audio_out = out.parent / f"local-readiness-audio.{audio_format}"
pipeline = {
"ok": True,
"result": verify_pipeline(
sample_pdf,
voice_id,
audio_out,
audio_format=audio_format,
max_speech_chars=max_speech_chars,
),
}
except Exception as exc:
pipeline = {"ok": False, "error": str(exc)}
ready = bool(
research["ready"]
and license_policy["ready"]
and deployment["ready"]
and audit["ready"]
and (research_refresh is None or research_refresh["ready"])
and (pipeline is None or pipeline["ok"])
)
result = {
"ready": ready,
"complete": bool(audit["complete"]),
"notes": [
"This proves local/repo readiness. Final completion still requires deployed Vercel site and worker reports.",
"Run scripts/prove_live_deployment.py after the Vercel site and worker are live.",
],
"research": research,
"researchRefresh": research_refresh,
"licensePolicy": license_policy,
"deployment": deployment,
"audit": audit,
"pipeline": pipeline,
"hfMetadataReport": str(hf_metadata_report) if hf_metadata_report else None,
}
out.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8")
result["report"] = str(out)
return result
def print_summary(result: dict[str, Any]) -> None:
print(f"Local readiness: {'ready' if result['ready'] else 'not ready'}")
print(f"Goal complete: {'yes' if result['complete'] else 'no, live site and worker reports still required'}")
print(f"Report: {result['report']}")
print()
for section in ["research", "licensePolicy", "deployment", "audit"]:
summary = result[section]
counts = summary.get("counts", {})
print(f"{section}: ready={summary.get('ready')} counts={counts}")
if result.get("researchRefresh"):
refresh = result["researchRefresh"]
source_counts = refresh.get("sourceSummary", {}).get("counts", {})
live_counts = refresh.get("liveHfMetadata", {}).get("counts", {})
print(
"researchRefresh: "
f"ready={refresh.get('ready')} source={source_counts} "
f"liveHf={live_counts} report={refresh.get('report')}"
)
pipeline = result.get("pipeline")
if pipeline is not None:
print(f"pipeline: ok={pipeline.get('ok')}")
if not pipeline.get("ok"):
print(f"pipeline error: {pipeline.get('error')}")
if result.get("hfMetadataReport"):
print(f"Hugging Face metadata report: {result['hfMetadataReport']}")
def main() -> None:
parser = argparse.ArgumentParser(description="Write a local readiness proof report for the Arabic audio reader.")
parser.add_argument("--out", type=Path, default=ROOT_DIR / "outputs" / "local-readiness.json")
parser.add_argument("--check-representative-links", action="store_true", help="Fetch representative research links.")
parser.add_argument("--check-key-links", action="store_true", help="Fetch exact key OCR/TTS/hosting source links.")
parser.add_argument("--check-hf-metadata", action="store_true", help="Fetch Hugging Face model metadata.")
parser.add_argument(
"--hf-metadata-report",
type=Path,
help="Markdown output path for --check-hf-metadata. Defaults beside the readiness report.",
)
parser.add_argument("--verify-audio-pipeline", action="store_true", help="Run a short local PDF-to-audio smoke test.")
parser.add_argument("--voice-id", default="espeak-ar-clear", help="Voice id for --verify-audio-pipeline.")
parser.add_argument("--format", choices=["wav", "mp3"], default="wav", help="Audio format for --verify-audio-pipeline.")
parser.add_argument("--max-speech-chars", type=int, default=240, help="Short smoke-test character limit.")
parser.add_argument("--refresh-research", action="store_true", help="Refresh research watchlist/recommendation/metadata reports before auditing local readiness.")
parser.add_argument("--research-refresh-report", type=Path, help="JSON report path for --refresh-research. Defaults beside the readiness report.")
parser.add_argument("--json", action="store_true", help="Print JSON.")
args = parser.parse_args()
result = prove_local_readiness(
out=args.out,
check_representative_links=args.check_representative_links,
check_key_links=args.check_key_links,
check_hf_metadata=args.check_hf_metadata,
hf_metadata_report=args.hf_metadata_report,
verify_audio_pipeline=args.verify_audio_pipeline,
voice_id=args.voice_id,
audio_format=args.format,
max_speech_chars=args.max_speech_chars,
refresh_research=args.refresh_research,
research_refresh_report=args.research_refresh_report,
)
if args.json:
print(json.dumps(result, ensure_ascii=False, indent=2))
else:
print_summary(result)
if not result["ready"]:
raise SystemExit(1)
if __name__ == "__main__":
main()