| from __future__ import annotations |
|
|
| import argparse |
| import json |
| import sys |
| from pathlib import Path |
| from typing import Any |
|
|
|
|
| ROOT_DIR = Path(__file__).resolve().parent.parent |
| if str(ROOT_DIR) not in sys.path: |
| sys.path.insert(0, str(ROOT_DIR)) |
|
|
| from scripts import check_research_sources, research_watchlist |
|
|
|
|
| DEFAULT_REPORT = ROOT_DIR / "outputs" / "research-refresh.json" |
|
|
|
|
| def refresh_research_evidence( |
| *, |
| watchlist_report: Path = ROOT_DIR / "docs" / "research-watchlist.md", |
| recommendation_report: Path = ROOT_DIR / "docs" / "recommended-free-stack.md", |
| decision_card_json: Path = ROOT_DIR / "docs" / "recommended-decision-card.json", |
| decision_card_report: Path = ROOT_DIR / "docs" / "recommended-decision-card.md", |
| metadata_report: Path = ROOT_DIR / "docs" / "huggingface-model-metadata.md", |
| report_out: Path = DEFAULT_REPORT, |
| require_live_hf_metadata: bool = False, |
| timeout: float = 12.0, |
| ) -> dict[str, Any]: |
| research_watchlist.write_report(watchlist_report, research_watchlist.CANDIDATES) |
| research_watchlist.write_recommendation_report(recommendation_report, research_watchlist.CANDIDATES) |
| research_watchlist.write_decision_card_json(decision_card_json, research_watchlist.CANDIDATES) |
| research_watchlist.write_decision_card_report(decision_card_report, research_watchlist.CANDIDATES) |
|
|
| hf_checks = check_research_sources.collect_huggingface_metadata_checks(timeout=timeout) |
| check_research_sources.write_huggingface_metadata_report(metadata_report, hf_checks) |
|
|
| source_checks = check_research_sources.collect_checks(metadata_path=metadata_report) |
| source_checks.extend(check_research_sources.collect_key_source_checks()) |
| source_summary = check_research_sources.summarize(source_checks) |
|
|
| license_violations = research_watchlist.license_policy_violations(research_watchlist.CANDIDATES) |
| live_hf_failures = [check for check in hf_checks if not check.ok] |
| ready = bool( |
| source_summary["ready"] |
| and not license_violations |
| and (not require_live_hf_metadata or not live_hf_failures) |
| ) |
| result: dict[str, Any] = { |
| "ready": ready, |
| "watchlistReport": str(watchlist_report), |
| "recommendationReport": str(recommendation_report), |
| "decisionCardJson": str(decision_card_json), |
| "decisionCardReport": str(decision_card_report), |
| "metadataReport": str(metadata_report), |
| "sourceSummary": source_summary, |
| "licensePolicy": { |
| "ready": not license_violations, |
| "violations": license_violations, |
| }, |
| "liveHfMetadata": { |
| "required": require_live_hf_metadata, |
| "counts": { |
| "PASS": sum(1 for check in hf_checks if check.ok), |
| "FAIL": len(live_hf_failures), |
| }, |
| "failures": [check.__dict__ for check in live_hf_failures], |
| }, |
| } |
| report_out.parent.mkdir(parents=True, exist_ok=True) |
| report_out.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8") |
| result["report"] = str(report_out) |
| return result |
|
|
|
|
| def print_summary(result: dict[str, Any]) -> None: |
| source_counts = result["sourceSummary"].get("counts", {}) |
| live_counts = result["liveHfMetadata"].get("counts", {}) |
| print(f"Research refresh: {'ready' if result['ready'] else 'not ready'}") |
| print(f"Watchlist: {result['watchlistReport']}") |
| print(f"Recommendation: {result['recommendationReport']}") |
| print(f"Decision card JSON: {result['decisionCardJson']}") |
| print(f"Decision card report: {result['decisionCardReport']}") |
| print(f"Metadata: {result['metadataReport']}") |
| print(f"Source checks: {source_counts}") |
| print(f"License violations: {len(result['licensePolicy']['violations'])}") |
| print( |
| "Live HF metadata: " |
| f"required={result['liveHfMetadata']['required']} " |
| f"pass={live_counts.get('PASS', 0)} fail={live_counts.get('FAIL', 0)}" |
| ) |
| print(f"Report: {result['report']}") |
|
|
|
|
| def main() -> None: |
| parser = argparse.ArgumentParser(description="Refresh Arabic OCR/TTS research evidence and decision reports.") |
| parser.add_argument("--watchlist-report", type=Path, default=ROOT_DIR / "docs" / "research-watchlist.md") |
| parser.add_argument("--recommendation-report", type=Path, default=ROOT_DIR / "docs" / "recommended-free-stack.md") |
| parser.add_argument("--decision-card-json", type=Path, default=ROOT_DIR / "docs" / "recommended-decision-card.json") |
| parser.add_argument("--decision-card-report", type=Path, default=ROOT_DIR / "docs" / "recommended-decision-card.md") |
| parser.add_argument("--metadata-report", type=Path, default=ROOT_DIR / "docs" / "huggingface-model-metadata.md") |
| parser.add_argument("--report-out", type=Path, default=DEFAULT_REPORT) |
| parser.add_argument( |
| "--require-live-hf-metadata", |
| action="store_true", |
| help="Treat failed live Hugging Face metadata fetches as refresh failures.", |
| ) |
| parser.add_argument("--timeout", type=float, default=12.0) |
| parser.add_argument("--json", action="store_true") |
| args = parser.parse_args() |
|
|
| result = refresh_research_evidence( |
| watchlist_report=args.watchlist_report, |
| recommendation_report=args.recommendation_report, |
| decision_card_json=args.decision_card_json, |
| decision_card_report=args.decision_card_report, |
| metadata_report=args.metadata_report, |
| report_out=args.report_out, |
| require_live_hf_metadata=args.require_live_hf_metadata, |
| timeout=args.timeout, |
| ) |
| if args.json: |
| print(json.dumps(result, ensure_ascii=False, indent=2)) |
| else: |
| print_summary(result) |
| if not result["ready"]: |
| raise SystemExit(1) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|