| from scripts.validate_resource_catalog import validate_catalog | |
| def _minimal_catalog() -> dict: | |
| return { | |
| "version": "1.0.0", | |
| "updated_on": "2026-02-15", | |
| "resources": [ | |
| { | |
| "id": "dataset-example", | |
| "title": "Example Dataset", | |
| "url": "https://example.org/dataset", | |
| "category": "dataset", | |
| "source": "other", | |
| "status": "verified", | |
| "summary": "Useful Pashto example dataset for testing the validator.", | |
| "primary_use": "Testing", | |
| "pashto_evidence": { | |
| "evidence_text": "Mentions Pashto in title.", | |
| "evidence_url": "https://example.org/dataset", | |
| "markers": ["Pashto"], | |
| }, | |
| "tags": ["pashto", "test"], | |
| } | |
| ], | |
| } | |
| def test_validate_catalog_passes_for_minimal_valid_catalog() -> None: | |
| errors = validate_catalog(_minimal_catalog()) | |
| assert errors == [] | |
| def test_validate_catalog_fails_for_duplicate_ids() -> None: | |
| catalog = _minimal_catalog() | |
| catalog["resources"].append(dict(catalog["resources"][0])) | |
| errors = validate_catalog(catalog) | |
| assert any("duplicate resource id" in error for error in errors) | |
| def test_validate_catalog_fails_for_invalid_evidence_url() -> None: | |
| catalog = _minimal_catalog() | |
| catalog["resources"][0]["pashto_evidence"]["evidence_url"] = "not-a-url" | |
| errors = validate_catalog(catalog) | |
| assert any("evidence_url" in error for error in errors) | |
| def test_validate_catalog_fails_for_non_pashto_centric_model() -> None: | |
| catalog = _minimal_catalog() | |
| catalog["resources"][0]["category"] = "model" | |
| catalog["resources"][0]["title"] = "Generic Multilingual Model" | |
| catalog["resources"][0]["url"] = "https://example.org/model" | |
| catalog["resources"][0]["pashto_evidence"]["evidence_text"] = "Language support listed in docs." | |
| catalog["resources"][0]["pashto_evidence"]["evidence_url"] = "https://example.org/model-docs" | |
| catalog["resources"][0]["pashto_evidence"]["markers"] = ["multilingual"] | |
| errors = validate_catalog(catalog) | |
| assert any("must be Pashto-centric" in error for error in errors) | |
| def test_validate_catalog_allows_pashto_centric_model() -> None: | |
| catalog = _minimal_catalog() | |
| catalog["resources"][0]["category"] = "model" | |
| catalog["resources"][0]["title"] = "Pashto ASR Model" | |
| catalog["resources"][0]["url"] = "https://example.org/pashto-model" | |
| errors = validate_catalog(catalog) | |
| assert errors == [] | |
| def test_validate_catalog_allows_multilingual_model_with_pashto_evidence() -> None: | |
| catalog = _minimal_catalog() | |
| catalog["resources"][0]["category"] = "model" | |
| catalog["resources"][0]["title"] = "Generic Multilingual Model" | |
| catalog["resources"][0]["url"] = "https://example.org/model" | |
| catalog["resources"][0]["pashto_evidence"]["evidence_text"] = "Language table explicitly includes Pashto." | |
| catalog["resources"][0]["pashto_evidence"]["evidence_url"] = "https://example.org/model/languages" | |
| catalog["resources"][0]["pashto_evidence"]["markers"] = ["Pashto", "ps"] | |
| errors = validate_catalog(catalog) | |
| assert errors == [] | |