Spaces:

Syncre
/

arabic-audio-reader-worker

Running

App Files Files Community

arabic-audio-reader-worker / scripts /prove_local_readiness.py

Syncre

Deploy Arabic Audio Reader worker

2e1a095 verified 1 day ago

raw

history blame contribute delete

7.72 kB

	from __future__ import annotations

	import argparse
	import json
	import sys
	from pathlib import Path
	from typing import Any

	ROOT_DIR = Path(__file__).resolve().parent.parent
	if str(ROOT_DIR) not in sys.path:
	sys.path.insert(0, str(ROOT_DIR))

	from scripts import audit_goal_readiness, check_deployment_readiness, check_research_sources, refresh_research_evidence, research_watchlist

	def prove_local_readiness(
	out: Path = ROOT_DIR / "outputs" / "local-readiness.json",
	check_representative_links: bool = False,
	check_key_links: bool = False,
	check_hf_metadata: bool = False,
	hf_metadata_report: Path \| None = None,
	verify_audio_pipeline: bool = False,
	voice_id: str = "espeak-ar-clear",
	audio_format: str = "wav",
	max_speech_chars: int = 240,
	refresh_research: bool = False,
	research_refresh_report: Path \| None = None,
	) -> dict[str, Any]:
	out.parent.mkdir(parents=True, exist_ok=True)

	research_refresh: dict[str, Any] \| None = None
	if refresh_research:
	research_refresh_report = research_refresh_report or out.parent / "research-refresh.json"
	research_refresh = refresh_research_evidence.refresh_research_evidence(report_out=research_refresh_report)

	if check_hf_metadata and hf_metadata_report is None:
	hf_metadata_report = out.parent / "hf-model-metadata.md"
	research_checks = check_research_sources.collect_command_checks(
	check_key_links=check_key_links,
	check_representative_links=check_representative_links,
	check_hf_metadata=check_hf_metadata,
	write_hf_metadata_report=hf_metadata_report if check_hf_metadata else None,
	)
	research = check_research_sources.summarize(research_checks)
	license_policy_violations = research_watchlist.license_policy_violations(research_watchlist.CANDIDATES)
	license_policy = {
	"ready": not license_policy_violations,
	"violations": license_policy_violations,
	}

	deployment_checks = check_deployment_readiness.collect_checks()
	deployment = check_deployment_readiness.summarize(deployment_checks)

	audit_checks = audit_goal_readiness.collect_checks()
	audit = audit_goal_readiness.summarize(audit_checks)

	pipeline: dict[str, Any] \| None = None
	if verify_audio_pipeline:
	try:
	from scripts.verify_pipeline import create_sample_pdf, verify_pipeline

	sample_pdf = create_sample_pdf(out.parent / "local-readiness-sample.pdf")
	audio_out = out.parent / f"local-readiness-audio.{audio_format}"
	pipeline = {
	"ok": True,
	"result": verify_pipeline(
	sample_pdf,
	voice_id,
	audio_out,
	audio_format=audio_format,
	max_speech_chars=max_speech_chars,
	),
	}
	except Exception as exc:
	pipeline = {"ok": False, "error": str(exc)}

	ready = bool(
	research["ready"]
	and license_policy["ready"]
	and deployment["ready"]
	and audit["ready"]
	and (research_refresh is None or research_refresh["ready"])
	and (pipeline is None or pipeline["ok"])
	)
	result = {
	"ready": ready,
	"complete": bool(audit["complete"]),
	"notes": [
	"This proves local/repo readiness. Final completion still requires deployed Vercel site and worker reports.",
	"Run scripts/prove_live_deployment.py after the Vercel site and worker are live.",
	],
	"research": research,
	"researchRefresh": research_refresh,
	"licensePolicy": license_policy,
	"deployment": deployment,
	"audit": audit,
	"pipeline": pipeline,
	"hfMetadataReport": str(hf_metadata_report) if hf_metadata_report else None,
	}
	out.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8")
	result["report"] = str(out)
	return result


	def print_summary(result: dict[str, Any]) -> None:
	print(f"Local readiness: {'ready' if result['ready'] else 'not ready'}")
	print(f"Goal complete: {'yes' if result['complete'] else 'no, live site and worker reports still required'}")
	print(f"Report: {result['report']}")
	print()
	for section in ["research", "licensePolicy", "deployment", "audit"]:
	summary = result[section]
	counts = summary.get("counts", {})
	print(f"{section}: ready={summary.get('ready')} counts={counts}")
	if result.get("researchRefresh"):
	refresh = result["researchRefresh"]
	source_counts = refresh.get("sourceSummary", {}).get("counts", {})
	live_counts = refresh.get("liveHfMetadata", {}).get("counts", {})
	print(
	"researchRefresh: "
	f"ready={refresh.get('ready')} source={source_counts} "
	f"liveHf={live_counts} report={refresh.get('report')}"
	)
	pipeline = result.get("pipeline")
	if pipeline is not None:
	print(f"pipeline: ok={pipeline.get('ok')}")
	if not pipeline.get("ok"):
	print(f"pipeline error: {pipeline.get('error')}")
	if result.get("hfMetadataReport"):
	print(f"Hugging Face metadata report: {result['hfMetadataReport']}")


	def main() -> None:
	parser = argparse.ArgumentParser(description="Write a local readiness proof report for the Arabic audio reader.")
	parser.add_argument("--out", type=Path, default=ROOT_DIR / "outputs" / "local-readiness.json")
	parser.add_argument("--check-representative-links", action="store_true", help="Fetch representative research links.")
	parser.add_argument("--check-key-links", action="store_true", help="Fetch exact key OCR/TTS/hosting source links.")
	parser.add_argument("--check-hf-metadata", action="store_true", help="Fetch Hugging Face model metadata.")
	parser.add_argument(
	"--hf-metadata-report",
	type=Path,
	help="Markdown output path for --check-hf-metadata. Defaults beside the readiness report.",
	)
	parser.add_argument("--verify-audio-pipeline", action="store_true", help="Run a short local PDF-to-audio smoke test.")
	parser.add_argument("--voice-id", default="espeak-ar-clear", help="Voice id for --verify-audio-pipeline.")
	parser.add_argument("--format", choices=["wav", "mp3"], default="wav", help="Audio format for --verify-audio-pipeline.")
	parser.add_argument("--max-speech-chars", type=int, default=240, help="Short smoke-test character limit.")
	parser.add_argument("--refresh-research", action="store_true", help="Refresh research watchlist/recommendation/metadata reports before auditing local readiness.")
	parser.add_argument("--research-refresh-report", type=Path, help="JSON report path for --refresh-research. Defaults beside the readiness report.")
	parser.add_argument("--json", action="store_true", help="Print JSON.")
	args = parser.parse_args()

	result = prove_local_readiness(
	out=args.out,
	check_representative_links=args.check_representative_links,
	check_key_links=args.check_key_links,
	check_hf_metadata=args.check_hf_metadata,
	hf_metadata_report=args.hf_metadata_report,
	verify_audio_pipeline=args.verify_audio_pipeline,
	voice_id=args.voice_id,
	audio_format=args.format,
	max_speech_chars=args.max_speech_chars,
	refresh_research=args.refresh_research,
	research_refresh_report=args.research_refresh_report,
	)
	if args.json:
	print(json.dumps(result, ensure_ascii=False, indent=2))
	else:
	print_summary(result)
	if not result["ready"]:
	raise SystemExit(1)


	if __name__ == "__main__":
	main()