File size: 1,248 Bytes
208266a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import json
from functools import lru_cache
from pathlib import Path
from typing import Any


REGISTRY_PATH = Path(__file__).resolve().parents[1] / "data" / "source_bias.json"


@lru_cache(maxsize=1)
def load_source_registry() -> dict[str, Any]:
    with REGISTRY_PATH.open("r", encoding="utf-8") as f:
        return json.load(f)


def normalize_source_name(source: str) -> str:
    return " ".join((source or "").strip().lower().split())


def get_source_record(source: str) -> dict[str, Any]:
    registry = load_source_registry()
    sources = registry.get("sources", {})
    aliases = registry.get("aliases", {})

    normalized = normalize_source_name(source)
    canonical = aliases.get(normalized, source)
    record = sources.get(canonical)

    if record is None:
        return {
            "name": source or "Unknown",
            "bias": "Unknown",
            "provenance": "unmatched",
            "source_url": None,
            "article_count": None,
            "label_counts": None,
            "notes": "No source-level registry match found.",
        }

    return {
        "name": canonical,
        **record,
    }


def get_source_bias(source: str) -> str:
    return str(get_source_record(source).get("bias", "Unknown"))