| """Generate markdown resource views and search index from catalog JSON. | |
| Usage: | |
| python scripts/generate_resource_views.py | |
| """ | |
| from __future__ import annotations | |
| import json | |
| from pathlib import Path | |
| from typing import Any | |
| CATEGORY_CONFIG = { | |
| "dataset": ("resources/datasets/README.md", "Datasets"), | |
| "model": ("resources/models/README.md", "Models"), | |
| "benchmark": ("resources/benchmarks/README.md", "Benchmarks"), | |
| "tool": ("resources/tools/README.md", "Tools"), | |
| "paper": ("resources/papers/README.md", "Papers"), | |
| "project": ("resources/projects/README.md", "Projects"), | |
| "code": ("resources/codes/README.md", "Code"), | |
| } | |
| def _load_catalog(path: Path) -> dict[str, Any]: | |
| return json.loads(path.read_text(encoding="utf-8")) | |
| def _escape_cell(value: str) -> str: | |
| return value.replace("|", "\\|").strip() | |
| def _marker_text(markers: list[str]) -> str: | |
| return ", ".join(f"`{marker}`" for marker in markers) | |
| def _resource_row(resource: dict[str, Any]) -> str: | |
| evidence = resource["pashto_evidence"] | |
| evidence_text = _escape_cell(evidence["evidence_text"]) | |
| markers = _marker_text(evidence["markers"]) | |
| if markers: | |
| evidence_text = f"{evidence_text} ({markers})" | |
| return ( | |
| f"| {_escape_cell(resource['title'])} | " | |
| f"[{resource['source']}]({resource['url']}) | " | |
| f"[{evidence_text}]({evidence['evidence_url']}) | " | |
| f"{_escape_cell(resource['primary_use'])} |" | |
| ) | |
| def _write_markdown_table(path: Path, title: str, resources: list[dict[str, Any]]) -> None: | |
| lines = [ | |
| f"# {title}", | |
| "", | |
| "## Verified Pashto Resources", | |
| "", | |
| "| Resource | Link | Pashto Evidence | Primary Use |", | |
| "|---|---|---|---|", | |
| ] | |
| if resources: | |
| lines.extend(_resource_row(resource) for resource in resources) | |
| else: | |
| lines.append("| _None yet_ | - | - | - |") | |
| lines.extend( | |
| [ | |
| "", | |
| "## Maintenance", | |
| "- Source of truth: [../catalog/resources.json](../catalog/resources.json)", | |
| "- Validation: [../../scripts/validate_resource_catalog.py](../../scripts/validate_resource_catalog.py)", | |
| "- Generated by: [../../scripts/generate_resource_views.py](../../scripts/generate_resource_views.py)", | |
| "", | |
| ] | |
| ) | |
| path.write_text("\n".join(lines), encoding="utf-8") | |
| def _write_resources_home(path: Path, counts: dict[str, int], total_verified: int) -> None: | |
| lines = [ | |
| "# Resources", | |
| "", | |
| "Structured, Pashto-focused resource tracking lives in this folder.", | |
| "", | |
| "## Sections", | |
| f"- Datasets ({counts.get('dataset', 0)}): [datasets/README.md](datasets/README.md)", | |
| f"- Models ({counts.get('model', 0)}): [models/README.md](models/README.md)", | |
| f"- Benchmarks ({counts.get('benchmark', 0)}): [benchmarks/README.md](benchmarks/README.md)", | |
| f"- Tools ({counts.get('tool', 0)}): [tools/README.md](tools/README.md)", | |
| f"- Papers ({counts.get('paper', 0)}): [papers/README.md](papers/README.md)", | |
| f"- Projects ({counts.get('project', 0)}): [projects/README.md](projects/README.md)", | |
| f"- Code ({counts.get('code', 0)}): [codes/README.md](codes/README.md)", | |
| "", | |
| "## Machine-Readable Catalog", | |
| "- Canonical catalog: [catalog/resources.json](catalog/resources.json)", | |
| "- Candidate feed: [catalog/pending_candidates.json](catalog/pending_candidates.json)", | |
| "- Schema: [schema/resource.schema.json](schema/resource.schema.json)", | |
| "", | |
| "## Update Rule", | |
| "- Add only validated resources with explicit Pashto relevance.", | |
| "- Keep every external reference clickable using markdown links.", | |
| "- Run `python scripts/validate_resource_catalog.py` before opening a PR.", | |
| "- Run `python scripts/generate_resource_views.py` after catalog changes.", | |
| "", | |
| f"Verified resource count: `{total_verified}`", | |
| "", | |
| ] | |
| path.write_text("\n".join(lines), encoding="utf-8") | |
| def _build_search_payload(resources: list[dict[str, Any]], updated_on: str) -> dict[str, Any]: | |
| search_items: list[dict[str, Any]] = [] | |
| for resource in resources: | |
| evidence = resource["pashto_evidence"] | |
| search_items.append( | |
| { | |
| "id": resource["id"], | |
| "title": resource["title"], | |
| "url": resource["url"], | |
| "category": resource["category"], | |
| "source": resource["source"], | |
| "status": resource["status"], | |
| "summary": resource["summary"], | |
| "primary_use": resource["primary_use"], | |
| "tasks": resource.get("tasks", []), | |
| "tags": resource["tags"], | |
| "evidence_text": evidence["evidence_text"], | |
| "evidence_url": evidence["evidence_url"], | |
| "markers": evidence["markers"], | |
| } | |
| ) | |
| return { | |
| "generated_on": f"{updated_on}T00:00:00Z", | |
| "count": len(search_items), | |
| "resources": search_items, | |
| } | |
| def main() -> int: | |
| catalog_path = Path("resources/catalog/resources.json") | |
| catalog = _load_catalog(catalog_path) | |
| resources: list[dict[str, Any]] = catalog.get("resources", []) | |
| updated_on = catalog.get("updated_on", "1970-01-01") | |
| verified = [resource for resource in resources if resource.get("status") == "verified"] | |
| grouped: dict[str, list[dict[str, Any]]] = {category: [] for category in CATEGORY_CONFIG} | |
| for resource in verified: | |
| category = resource.get("category") | |
| if category in grouped: | |
| grouped[category].append(resource) | |
| for category, (file_path, title) in CATEGORY_CONFIG.items(): | |
| output_path = Path(file_path) | |
| output_path.parent.mkdir(parents=True, exist_ok=True) | |
| rows = sorted(grouped[category], key=lambda item: item["title"].lower()) | |
| _write_markdown_table(output_path, title, rows) | |
| counts = {category: len(items) for category, items in grouped.items()} | |
| _write_resources_home(Path("resources/README.md"), counts, len(verified)) | |
| search_payload = _build_search_payload(resources, updated_on) | |
| search_json_path = Path("docs/search/resources.json") | |
| search_json_path.parent.mkdir(parents=True, exist_ok=True) | |
| search_json_path.write_text( | |
| json.dumps(search_payload, ensure_ascii=False, indent=2) + "\n", | |
| encoding="utf-8", | |
| ) | |
| print( | |
| "Generated resources markdown and search index: " | |
| f"{len(verified)} verified resources, {len(resources)} total resources" | |
| ) | |
| return 0 | |
| if __name__ == "__main__": | |
| raise SystemExit(main()) | |