import json from functools import lru_cache from pathlib import Path from typing import Any REGISTRY_PATH = Path(__file__).resolve().parents[1] / "data" / "source_bias.json" @lru_cache(maxsize=1) def load_source_registry() -> dict[str, Any]: with REGISTRY_PATH.open("r", encoding="utf-8") as f: return json.load(f) def normalize_source_name(source: str) -> str: return " ".join((source or "").strip().lower().split()) def get_source_record(source: str) -> dict[str, Any]: registry = load_source_registry() sources = registry.get("sources", {}) aliases = registry.get("aliases", {}) normalized = normalize_source_name(source) canonical = aliases.get(normalized, source) record = sources.get(canonical) if record is None: return { "name": source or "Unknown", "bias": "Unknown", "provenance": "unmatched", "source_url": None, "article_count": None, "label_counts": None, "notes": "No source-level registry match found.", } return { "name": canonical, **record, } def get_source_bias(source: str) -> str: return str(get_source_record(source).get("bias", "Unknown"))