newslens / src /analysis /source_bias.py
Jitender20's picture
Add NewsLens Streamlit app
208266a
import json
from functools import lru_cache
from pathlib import Path
from typing import Any
REGISTRY_PATH = Path(__file__).resolve().parents[1] / "data" / "source_bias.json"
@lru_cache(maxsize=1)
def load_source_registry() -> dict[str, Any]:
with REGISTRY_PATH.open("r", encoding="utf-8") as f:
return json.load(f)
def normalize_source_name(source: str) -> str:
return " ".join((source or "").strip().lower().split())
def get_source_record(source: str) -> dict[str, Any]:
registry = load_source_registry()
sources = registry.get("sources", {})
aliases = registry.get("aliases", {})
normalized = normalize_source_name(source)
canonical = aliases.get(normalized, source)
record = sources.get(canonical)
if record is None:
return {
"name": source or "Unknown",
"bias": "Unknown",
"provenance": "unmatched",
"source_url": None,
"article_count": None,
"label_counts": None,
"notes": "No source-level registry match found.",
}
return {
"name": canonical,
**record,
}
def get_source_bias(source: str) -> str:
return str(get_source_record(source).get("bias", "Unknown"))