arabic-audio-reader-worker / scripts /refresh_research_evidence.py
Syncre's picture
Deploy Arabic Audio Reader worker
2e1a095 verified
from __future__ import annotations
import argparse
import json
import sys
from pathlib import Path
from typing import Any
ROOT_DIR = Path(__file__).resolve().parent.parent
if str(ROOT_DIR) not in sys.path:
sys.path.insert(0, str(ROOT_DIR))
from scripts import check_research_sources, research_watchlist
DEFAULT_REPORT = ROOT_DIR / "outputs" / "research-refresh.json"
def refresh_research_evidence(
*,
watchlist_report: Path = ROOT_DIR / "docs" / "research-watchlist.md",
recommendation_report: Path = ROOT_DIR / "docs" / "recommended-free-stack.md",
decision_card_json: Path = ROOT_DIR / "docs" / "recommended-decision-card.json",
decision_card_report: Path = ROOT_DIR / "docs" / "recommended-decision-card.md",
metadata_report: Path = ROOT_DIR / "docs" / "huggingface-model-metadata.md",
report_out: Path = DEFAULT_REPORT,
require_live_hf_metadata: bool = False,
timeout: float = 12.0,
) -> dict[str, Any]:
research_watchlist.write_report(watchlist_report, research_watchlist.CANDIDATES)
research_watchlist.write_recommendation_report(recommendation_report, research_watchlist.CANDIDATES)
research_watchlist.write_decision_card_json(decision_card_json, research_watchlist.CANDIDATES)
research_watchlist.write_decision_card_report(decision_card_report, research_watchlist.CANDIDATES)
hf_checks = check_research_sources.collect_huggingface_metadata_checks(timeout=timeout)
check_research_sources.write_huggingface_metadata_report(metadata_report, hf_checks)
source_checks = check_research_sources.collect_checks(metadata_path=metadata_report)
source_checks.extend(check_research_sources.collect_key_source_checks())
source_summary = check_research_sources.summarize(source_checks)
license_violations = research_watchlist.license_policy_violations(research_watchlist.CANDIDATES)
live_hf_failures = [check for check in hf_checks if not check.ok]
ready = bool(
source_summary["ready"]
and not license_violations
and (not require_live_hf_metadata or not live_hf_failures)
)
result: dict[str, Any] = {
"ready": ready,
"watchlistReport": str(watchlist_report),
"recommendationReport": str(recommendation_report),
"decisionCardJson": str(decision_card_json),
"decisionCardReport": str(decision_card_report),
"metadataReport": str(metadata_report),
"sourceSummary": source_summary,
"licensePolicy": {
"ready": not license_violations,
"violations": license_violations,
},
"liveHfMetadata": {
"required": require_live_hf_metadata,
"counts": {
"PASS": sum(1 for check in hf_checks if check.ok),
"FAIL": len(live_hf_failures),
},
"failures": [check.__dict__ for check in live_hf_failures],
},
}
report_out.parent.mkdir(parents=True, exist_ok=True)
report_out.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8")
result["report"] = str(report_out)
return result
def print_summary(result: dict[str, Any]) -> None:
source_counts = result["sourceSummary"].get("counts", {})
live_counts = result["liveHfMetadata"].get("counts", {})
print(f"Research refresh: {'ready' if result['ready'] else 'not ready'}")
print(f"Watchlist: {result['watchlistReport']}")
print(f"Recommendation: {result['recommendationReport']}")
print(f"Decision card JSON: {result['decisionCardJson']}")
print(f"Decision card report: {result['decisionCardReport']}")
print(f"Metadata: {result['metadataReport']}")
print(f"Source checks: {source_counts}")
print(f"License violations: {len(result['licensePolicy']['violations'])}")
print(
"Live HF metadata: "
f"required={result['liveHfMetadata']['required']} "
f"pass={live_counts.get('PASS', 0)} fail={live_counts.get('FAIL', 0)}"
)
print(f"Report: {result['report']}")
def main() -> None:
parser = argparse.ArgumentParser(description="Refresh Arabic OCR/TTS research evidence and decision reports.")
parser.add_argument("--watchlist-report", type=Path, default=ROOT_DIR / "docs" / "research-watchlist.md")
parser.add_argument("--recommendation-report", type=Path, default=ROOT_DIR / "docs" / "recommended-free-stack.md")
parser.add_argument("--decision-card-json", type=Path, default=ROOT_DIR / "docs" / "recommended-decision-card.json")
parser.add_argument("--decision-card-report", type=Path, default=ROOT_DIR / "docs" / "recommended-decision-card.md")
parser.add_argument("--metadata-report", type=Path, default=ROOT_DIR / "docs" / "huggingface-model-metadata.md")
parser.add_argument("--report-out", type=Path, default=DEFAULT_REPORT)
parser.add_argument(
"--require-live-hf-metadata",
action="store_true",
help="Treat failed live Hugging Face metadata fetches as refresh failures.",
)
parser.add_argument("--timeout", type=float, default=12.0)
parser.add_argument("--json", action="store_true")
args = parser.parse_args()
result = refresh_research_evidence(
watchlist_report=args.watchlist_report,
recommendation_report=args.recommendation_report,
decision_card_json=args.decision_card_json,
decision_card_report=args.decision_card_report,
metadata_report=args.metadata_report,
report_out=args.report_out,
require_live_hf_metadata=args.require_live_hf_metadata,
timeout=args.timeout,
)
if args.json:
print(json.dumps(result, ensure_ascii=False, indent=2))
else:
print_summary(result)
if not result["ready"]:
raise SystemExit(1)
if __name__ == "__main__":
main()