Jitender20 commited on
Commit
208266a
·
1 Parent(s): 9bb094d

Add NewsLens Streamlit app

Browse files
Files changed (50) hide show
  1. Dockerfile +3 -9
  2. requirements.txt +16 -2
  3. src/__init__.py +0 -0
  4. src/__pycache__/__init__.cpython-313.pyc +0 -0
  5. src/__pycache__/config.cpython-313.pyc +0 -0
  6. src/analysis/__init__.py +0 -0
  7. src/analysis/__pycache__/__init__.cpython-313.pyc +0 -0
  8. src/analysis/__pycache__/rag_pipeline.cpython-313.pyc +0 -0
  9. src/analysis/__pycache__/source_bias.cpython-313.pyc +0 -0
  10. src/analysis/rag_pipeline.py +74 -0
  11. src/analysis/source_bias.py +47 -0
  12. src/api/__init__.py +0 -0
  13. src/api/__pycache__/__init__.cpython-313.pyc +0 -0
  14. src/api/__pycache__/main.cpython-313.pyc +0 -0
  15. src/api/__pycache__/models.cpython-313.pyc +0 -0
  16. src/api/__pycache__/routes.cpython-313.pyc +0 -0
  17. src/api/main.py +29 -0
  18. src/api/models.py +45 -0
  19. src/api/routes.py +68 -0
  20. src/config.py +35 -0
  21. src/data/source_bias.generated.json +1136 -0
  22. src/data/source_bias.json +202 -0
  23. src/db/__init__.py +0 -0
  24. src/db/__pycache__/__init__.cpython-313.pyc +0 -0
  25. src/db/__pycache__/vector_store.cpython-313.pyc +0 -0
  26. src/db/vector_store.py +120 -0
  27. src/ingestion/__init__.py +0 -0
  28. src/ingestion/__pycache__/__init__.cpython-313.pyc +0 -0
  29. src/ingestion/__pycache__/newsapi_client.cpython-313.pyc +0 -0
  30. src/ingestion/newsapi_client.py +49 -0
  31. src/models/__pycache__/dataset_prep.cpython-313.pyc +0 -0
  32. src/models/__pycache__/test_inference.cpython-313.pyc +0 -0
  33. src/models/__pycache__/train_model.cpython-313.pyc +0 -0
  34. src/models/dataset_prep.py +19 -0
  35. src/models/test_inference.py +105 -0
  36. src/models/train_model.py +114 -0
  37. src/ui/__init__.py +0 -0
  38. src/ui/__pycache__/__init__.cpython-313.pyc +0 -0
  39. src/ui/__pycache__/app.cpython-313.pyc +0 -0
  40. src/ui/app.py +518 -0
  41. src/ui/components/__init__.py +0 -0
  42. src/ui/components/__pycache__/__init__.cpython-313.pyc +0 -0
  43. src/ui/components/__pycache__/article_card.cpython-313.pyc +0 -0
  44. src/ui/components/__pycache__/charts.cpython-313.pyc +0 -0
  45. src/ui/components/article_card.py +172 -0
  46. src/ui/components/charts.py +142 -0
  47. src/ui/services/__init__.py +0 -0
  48. src/ui/services/__pycache__/__init__.cpython-313.pyc +0 -0
  49. src/ui/services/__pycache__/api_client.cpython-313.pyc +0 -0
  50. src/ui/services/api_client.py +53 -0
Dockerfile CHANGED
@@ -1,20 +1,14 @@
1
- FROM python:3.13.5-slim
2
-
3
  WORKDIR /app
4
-
5
  RUN apt-get update && apt-get install -y \
6
  build-essential \
7
  curl \
8
  git \
9
  && rm -rf /var/lib/apt/lists/*
10
-
11
  COPY requirements.txt ./
12
  COPY src/ ./src/
13
-
14
  RUN pip3 install -r requirements.txt
15
-
16
  EXPOSE 8501
17
-
18
  HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
19
-
20
- ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
1
+ FROM python:3.13-slim
 
2
  WORKDIR /app
 
3
  RUN apt-get update && apt-get install -y \
4
  build-essential \
5
  curl \
6
  git \
7
  && rm -rf /var/lib/apt/lists/*
 
8
  COPY requirements.txt ./
9
  COPY src/ ./src/
 
10
  RUN pip3 install -r requirements.txt
 
11
  EXPOSE 8501
12
+ ENV PYTHONPATH=/app
13
  HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
14
+ ENTRYPOINT ["streamlit", "run", "src/ui/app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
requirements.txt CHANGED
@@ -1,3 +1,17 @@
1
- altair
 
 
 
 
 
2
  pandas
3
- streamlit
 
 
 
 
 
 
 
 
 
 
1
+ accelerate
2
+ chromadb
3
+ datasets
4
+ fastapi
5
+ newsapi-python
6
+ numpy
7
  pandas
8
+ peft
9
+ plotly
10
+ python-dotenv
11
+ requests
12
+ scikit-learn
13
+ sentence-transformers
14
+ streamlit
15
+ torch
16
+ transformers
17
+ uvicorn
src/__init__.py ADDED
File without changes
src/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (124 Bytes). View file
 
src/__pycache__/config.cpython-313.pyc ADDED
Binary file (2.22 kB). View file
 
src/analysis/__init__.py ADDED
File without changes
src/analysis/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (133 Bytes). View file
 
src/analysis/__pycache__/rag_pipeline.cpython-313.pyc ADDED
Binary file (3.97 kB). View file
 
src/analysis/__pycache__/source_bias.cpython-313.pyc ADDED
Binary file (2.29 kB). View file
 
src/analysis/rag_pipeline.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.db.vector_store import NewsVectorStore
2
+ from src.models.test_inference import BiasPredictor
3
+ from src.analysis.source_bias import get_source_bias, get_source_record
4
+ from collections import defaultdict
5
+
6
+
7
+ class NewsAnalysisPipeline:
8
+
9
+ def __init__(self):
10
+ print("Initializing NewsLens pipeline...")
11
+ self.vector_store = NewsVectorStore()
12
+ self.bias_predictor = BiasPredictor()
13
+ print("Pipeline ready.")
14
+
15
+ def analyze(self, topic: str, top_k: int = 10) -> dict:
16
+ articles = self.vector_store.query(topic, top_k=top_k)
17
+
18
+ if not articles:
19
+ return {"topic": topic, "results": [], "summary": {}}
20
+
21
+ results = []
22
+ texts = [article["text"] for article in articles]
23
+ predictions = self.bias_predictor.predict_batch(texts)
24
+
25
+ for article, prediction in zip(articles, predictions):
26
+ source_record = get_source_record(article["source"])
27
+ results.append({
28
+ "source": article["source"],
29
+ "source_bias": source_record["bias"],
30
+ "source_bias_provenance": source_record["provenance"],
31
+ "url": article["url"],
32
+ "title": article.get("title", ""),
33
+ "description": article.get("description", ""),
34
+ "publishedAt": article.get("publishedAt", ""),
35
+ "text": article["text"],
36
+ "text_label": prediction["label"],
37
+ "confidence": prediction["confidence"],
38
+ "probabilities": {
39
+ "Not Biased": round(prediction["probabilities"][0], 4),
40
+ "Biased": round(prediction["probabilities"][1], 4),
41
+ },
42
+ "similarity_score": article["similarity_score"]
43
+ })
44
+ # Aggregate per source
45
+ summary = defaultdict(lambda: {
46
+ "source_bias": "Unknown",
47
+ "Biased": 0,
48
+ "Not Biased": 0,
49
+ "total": 0
50
+ })
51
+ for r in results:
52
+ source = r["source"]
53
+ summary[source]["source_bias"] = r["source_bias"]
54
+ summary[source][r["text_label"]] += 1
55
+ summary[source]["total"] += 1
56
+
57
+ return {
58
+ "topic": topic,
59
+ "results": results,
60
+ "summary": dict(summary)
61
+ }
62
+
63
+
64
+ if __name__ == "__main__":
65
+ pipeline = NewsAnalysisPipeline()
66
+ output = pipeline.analyze("climate change", top_k=10)
67
+
68
+ print(f"\n=== Results for: '{output['topic']}' ===")
69
+ for r in output["results"]:
70
+ print(f"[{r['text_label']}] ({r['confidence']:.2f}) | Source lean: {r['source_bias']} — {r['source']}: {r['text'][:80]}...")
71
+
72
+ print("\n=== Source Summary ===")
73
+ for source, counts in output["summary"].items():
74
+ print(f"{source} ({counts['source_bias']}): Biased={counts['Biased']}, Not Biased={counts['Not Biased']}, Total={counts['total']}")
src/analysis/source_bias.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from functools import lru_cache
3
+ from pathlib import Path
4
+ from typing import Any
5
+
6
+
7
+ REGISTRY_PATH = Path(__file__).resolve().parents[1] / "data" / "source_bias.json"
8
+
9
+
10
+ @lru_cache(maxsize=1)
11
+ def load_source_registry() -> dict[str, Any]:
12
+ with REGISTRY_PATH.open("r", encoding="utf-8") as f:
13
+ return json.load(f)
14
+
15
+
16
+ def normalize_source_name(source: str) -> str:
17
+ return " ".join((source or "").strip().lower().split())
18
+
19
+
20
+ def get_source_record(source: str) -> dict[str, Any]:
21
+ registry = load_source_registry()
22
+ sources = registry.get("sources", {})
23
+ aliases = registry.get("aliases", {})
24
+
25
+ normalized = normalize_source_name(source)
26
+ canonical = aliases.get(normalized, source)
27
+ record = sources.get(canonical)
28
+
29
+ if record is None:
30
+ return {
31
+ "name": source or "Unknown",
32
+ "bias": "Unknown",
33
+ "provenance": "unmatched",
34
+ "source_url": None,
35
+ "article_count": None,
36
+ "label_counts": None,
37
+ "notes": "No source-level registry match found.",
38
+ }
39
+
40
+ return {
41
+ "name": canonical,
42
+ **record,
43
+ }
44
+
45
+
46
+ def get_source_bias(source: str) -> str:
47
+ return str(get_source_record(source).get("bias", "Unknown"))
src/api/__init__.py ADDED
File without changes
src/api/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (128 Bytes). View file
 
src/api/__pycache__/main.cpython-313.pyc ADDED
Binary file (1.2 kB). View file
 
src/api/__pycache__/models.cpython-313.pyc ADDED
Binary file (2.91 kB). View file
 
src/api/__pycache__/routes.cpython-313.pyc ADDED
Binary file (3.71 kB). View file
 
src/api/main.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from contextlib import asynccontextmanager
2
+ from fastapi import FastAPI
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from src.analysis.rag_pipeline import NewsAnalysisPipeline
5
+ from src.api import routes
6
+
7
+ @asynccontextmanager
8
+ async def lifespan(app: FastAPI):
9
+ print("Loading pipeline at startup...")
10
+ app.state.pipeline = NewsAnalysisPipeline()
11
+ print("Pipeline ready.")
12
+ yield
13
+ print("Shutting down.")
14
+
15
+ app = FastAPI(
16
+ title="NewsLens API",
17
+ description="Bias analysis for news articles",
18
+ version="1.0.0",
19
+ lifespan=lifespan
20
+ )
21
+
22
+ app.add_middleware(
23
+ CORSMiddleware,
24
+ allow_origins=["*"],
25
+ allow_methods=["*"],
26
+ allow_headers=["*"],
27
+ )
28
+
29
+ app.include_router(routes.router)
src/api/models.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from typing import Optional
3
+
4
+ class AnalyzeRequest(BaseModel):
5
+ topic: str = Field(..., min_length=1, max_length=200)
6
+ top_k: int = Field(default=10, ge=1, le=20)
7
+
8
+ class ArticleResult(BaseModel):
9
+ source: str
10
+ source_bias: str
11
+ source_bias_provenance: Optional[str] = None
12
+ url: str
13
+ title: Optional[str] = None
14
+ description: Optional[str] = None
15
+ publishedAt: Optional[str] = None
16
+ text: str
17
+ text_label: str
18
+ confidence: float
19
+ similarity_score: float
20
+ probabilities: dict
21
+
22
+ class SourceSummary(BaseModel):
23
+ source_bias: str
24
+ Biased: int
25
+ Not_Biased: int = Field(alias="Not Biased")
26
+ total: int
27
+
28
+ class Config:
29
+ populate_by_name = True
30
+
31
+ class AnalyzeResponse(BaseModel):
32
+ topic: str
33
+ total_articles: int
34
+ results: list[ArticleResult]
35
+ summary: dict[str, SourceSummary]
36
+
37
+ class IngestRequest(BaseModel):
38
+ topic: str = Field(..., min_length=1, max_length=200)
39
+ page_size: int = Field(default=10, ge=1, le=50)
40
+
41
+ class IngestResponse(BaseModel):
42
+ topic: str
43
+ articles_fetched: int
44
+ articles_stored: int
45
+ status: str
src/api/routes.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException, Request
2
+ import time
3
+ from src.ingestion.newsapi_client import fetch_news
4
+ from src.api.models import AnalyzeRequest, AnalyzeResponse, IngestRequest, IngestResponse
5
+
6
+ router = APIRouter()
7
+
8
+ _cache: dict = {}
9
+ CACHE_TTL_SECONDS = 300
10
+
11
+ @router.get("/health")
12
+ def health():
13
+ return {"status": "ok"}
14
+
15
+ @router.post("/analyze", response_model=AnalyzeResponse)
16
+ def analyze(request: Request, payload: AnalyzeRequest):
17
+ if not payload.topic.strip():
18
+ raise HTTPException(status_code=400, detail="Topic cannot be empty.")
19
+
20
+ cache_key = (payload.topic.lower().strip(), payload.top_k)
21
+ now = time.time()
22
+
23
+ if cache_key in _cache:
24
+ cached = _cache[cache_key]
25
+ if now - cached["timestamp"] < CACHE_TTL_SECONDS:
26
+ print(f"Cache hit for: {payload.topic}")
27
+ return cached["data"]
28
+
29
+ pipeline = request.app.state.pipeline
30
+ if pipeline is None:
31
+ raise HTTPException(status_code=503, detail="Pipeline not initialized.")
32
+
33
+ try:
34
+ raw = pipeline.analyze(payload.topic, top_k=payload.top_k)
35
+ except Exception as e:
36
+ raise HTTPException(status_code=500, detail=f"Pipeline error: {str(e)}")
37
+
38
+ sorted_results = sorted(raw["results"], key=lambda x: x["confidence"], reverse=True)
39
+ response = AnalyzeResponse(
40
+ topic=raw["topic"],
41
+ total_articles=len(sorted_results),
42
+ results=sorted_results,
43
+ summary=raw["summary"]
44
+ )
45
+
46
+ _cache[cache_key] = {"data": response, "timestamp": now}
47
+ return response
48
+
49
+ @router.post("/ingest", response_model=IngestResponse)
50
+ def ingest(request: Request, payload: IngestRequest):
51
+ try:
52
+ articles = fetch_news(topic=payload.topic, page_size=payload.page_size)
53
+ except RuntimeError as exc:
54
+ raise HTTPException(status_code=503, detail=str(exc))
55
+
56
+ if not articles:
57
+ raise HTTPException(status_code=404, detail=f"No articles found for topic: {payload.topic}")
58
+
59
+ vector_store = request.app.state.pipeline.vector_store
60
+ vector_store.store_articles(articles)
61
+ _cache.clear()
62
+
63
+ return IngestResponse(
64
+ topic=payload.topic,
65
+ articles_fetched=len(articles),
66
+ articles_stored=len(articles),
67
+ status="success"
68
+ )
src/config.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+
4
+ try:
5
+ from dotenv import load_dotenv
6
+ except ImportError:
7
+ load_dotenv = None
8
+
9
+
10
+ if load_dotenv is not None:
11
+ load_dotenv()
12
+ else:
13
+ env_path = Path.cwd() / ".env"
14
+ if env_path.exists():
15
+ for line in env_path.read_text(encoding="utf-8").splitlines():
16
+ line = line.strip()
17
+ if not line or line.startswith("#") or "=" not in line:
18
+ continue
19
+ key, value = line.split("=", 1)
20
+ os.environ.setdefault(key.strip(), value.strip().strip('"').strip("'"))
21
+
22
+ _bias_model_env = os.getenv("NEWSLENS_BIAS_MODEL_PATH")
23
+
24
+ BASE_DIR = Path(__file__).resolve().parents[1]
25
+ DATA_DIR = Path(os.getenv("NEWSLENS_DATA_DIR", BASE_DIR / "data"))
26
+ CHROMA_DB_PATH = Path(os.getenv("NEWSLENS_CHROMA_DB_PATH", DATA_DIR / "chromadb"))
27
+ MODEL_DIR = Path(os.getenv("NEWSLENS_MODEL_DIR", DATA_DIR / "models"))
28
+ if _bias_model_env:
29
+ BIAS_MODEL_PATH = _bias_model_env
30
+ else:
31
+ BIAS_MODEL_PATH = Path(MODEL_DIR / "bias_lora_20260503_010859")
32
+ HF_ENDPOINT = os.getenv("NEWSLENS_HF_ENDPOINT")
33
+ HF_TOKEN = os.getenv("HF_TOKEN")
34
+ NEWS_API_KEY = os.getenv("NEWSAPI_KEY")
35
+ API_BASE_URL = os.getenv("NEWSLENS_API_BASE_URL", "http://localhost:8000")
src/data/source_bias.generated.json ADDED
@@ -0,0 +1,1136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "aliases": {
3
+ "al jazeera": "Al Jazeera",
4
+ "allysia finley (wall street journal)": "Allysia Finley (Wall Street Journal)",
5
+ "ann coulter": "Ann Coulter",
6
+ "ben shapiro": "Ben Shapiro",
7
+ "brent bozell": "Brent Bozell",
8
+ "business insider": "Business Insider",
9
+ "buzzfeed news": "BuzzFeed News",
10
+ "cbn": "CBN",
11
+ "cbs news": "CBS News",
12
+ "charles krauthammer": "Charles Krauthammer",
13
+ "chicago sun-times": "Chicago Sun-Times",
14
+ "christian science monitor": "Christian Science Monitor",
15
+ "cnn (web news)": "CNN (Web News)",
16
+ "cnn - editorial": "CNN - Editorial",
17
+ "daily beast": "Daily Beast",
18
+ "daily kos": "Daily Kos",
19
+ "daily mail": "Daily Mail",
20
+ "damon linker": "Damon Linker",
21
+ "democracy now": "Democracy Now",
22
+ "elizabeth warren": "Elizabeth Warren",
23
+ "ezra klein": "Ezra Klein",
24
+ "fox news": "Fox News",
25
+ "fox news (online)": "Fox News (Online)",
26
+ "fox news opinion": "Fox News Opinion",
27
+ "fox online news": "Fox Online News",
28
+ "george will": "George Will",
29
+ "guest writer": "Guest Writer",
30
+ "guest writer - center": "Guest Writer - Center",
31
+ "guest writer - left": "Guest Writer - Left",
32
+ "guest writer - right": "Guest Writer - Right",
33
+ "hotair": "HotAir",
34
+ "howard kurtz": "Howard Kurtz",
35
+ "international business times": "International Business Times",
36
+ "jacobin": "Jacobin",
37
+ "john fund": "John Fund",
38
+ "john stossel": "John Stossel",
39
+ "jon terbush": "Jon Terbush",
40
+ "jonah goldberg": "Jonah Goldberg",
41
+ "juan williams": "Juan Williams",
42
+ "julian zelizer": "Julian Zelizer",
43
+ "marketwatch": "MarketWatch",
44
+ "media matters": "Media Matters",
45
+ "media research center": "Media Research Center",
46
+ "michael barone": "Michael Barone",
47
+ "michael brendan dougherty": "Michael Brendan Dougherty",
48
+ "michael goodwin": "Michael Goodwin",
49
+ "michelle malkin": "Michelle Malkin",
50
+ "mother jones": "Mother Jones",
51
+ "national review": "National Review",
52
+ "nbc news (online)": "NBC News (Online)",
53
+ "nbcnews.com": "NBCNews.com",
54
+ "new york post": "New York Post",
55
+ "new york post (news)": "New York Post (News)",
56
+ "new york post (opinion)": "New York Post (Opinion)",
57
+ "newsbusters": "NewsBusters",
58
+ "newt gingrich": "Newt Gingrich",
59
+ "npr editorial": "NPR Editorial",
60
+ "npr online news": "NPR Online News",
61
+ "pew research center": "Pew Research Center",
62
+ "politico": "Politico",
63
+ "rand paul": "Rand Paul",
64
+ "rich lowry": "Rich Lowry",
65
+ "ryan cooper": "Ryan Cooper",
66
+ "s.e. cupp": "S.E. Cupp",
67
+ "scientific american": "Scientific American",
68
+ "slate": "Slate",
69
+ "the atlantic": "The Atlantic",
70
+ "the boston globe": "The Boston Globe",
71
+ "the daily wire": "The Daily Wire",
72
+ "the economist": "The Economist",
73
+ "the flip side": "The Flip Side",
74
+ "the hill": "The Hill",
75
+ "the intercept": "The Intercept",
76
+ "the marshall project": "The Marshall Project",
77
+ "the nation": "The Nation",
78
+ "the new yorker": "The New Yorker",
79
+ "the week - news": "The Week - News",
80
+ "the week - opinion": "The Week - Opinion",
81
+ "theblaze.com": "TheBlaze.com",
82
+ "thinkprogress": "ThinkProgress",
83
+ "thomas sowell": "Thomas Sowell",
84
+ "time magazine": "Time Magazine",
85
+ "townhall": "Townhall",
86
+ "usa today": "USA TODAY",
87
+ "vanity fair": "Vanity Fair",
88
+ "vice": "Vice",
89
+ "victor hanson": "Victor Hanson",
90
+ "vox": "Vox",
91
+ "wall street journal - editorial": "Wall Street Journal - Editorial",
92
+ "wall street journal - news": "Wall Street Journal - News",
93
+ "washington post": "Washington Post",
94
+ "washington times": "Washington Times",
95
+ "yahoo! news": "Yahoo! News",
96
+ "yahoo! the 360": "Yahoo! The 360"
97
+ },
98
+ "sources": {
99
+ "Al Jazeera": {
100
+ "article_count": 142,
101
+ "bias": "Left",
102
+ "label_counts": {
103
+ "Left": 141,
104
+ "Right": 1
105
+ },
106
+ "majority_share": 0.993,
107
+ "notes": "Generated by aggregating article-level political bias labels by source.",
108
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
109
+ "source_url": "www.aljazeera.com"
110
+ },
111
+ "Allysia Finley (Wall Street Journal)": {
112
+ "article_count": 4,
113
+ "bias": "Center",
114
+ "label_counts": {
115
+ "Center": 4
116
+ },
117
+ "majority_share": 1.0,
118
+ "notes": "Generated by aggregating article-level political bias labels by source.",
119
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
120
+ "source_url": "www.cnn.com"
121
+ },
122
+ "Ann Coulter": {
123
+ "article_count": 6,
124
+ "bias": "Center",
125
+ "label_counts": {
126
+ "Center": 6
127
+ },
128
+ "majority_share": 1.0,
129
+ "notes": "Generated by aggregating article-level political bias labels by source.",
130
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
131
+ "source_url": "www.townhall.com"
132
+ },
133
+ "Ben Shapiro": {
134
+ "article_count": 26,
135
+ "bias": "Center",
136
+ "label_counts": {
137
+ "Center": 26
138
+ },
139
+ "majority_share": 1.0,
140
+ "notes": "Generated by aggregating article-level political bias labels by source.",
141
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
142
+ "source_url": "www.dailywire.com"
143
+ },
144
+ "Brent Bozell": {
145
+ "article_count": 5,
146
+ "bias": "Center",
147
+ "label_counts": {
148
+ "Center": 5
149
+ },
150
+ "majority_share": 1.0,
151
+ "notes": "Generated by aggregating article-level political bias labels by source.",
152
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
153
+ "source_url": "www.foxnews.com"
154
+ },
155
+ "Business Insider": {
156
+ "article_count": 74,
157
+ "bias": "Right",
158
+ "label_counts": {
159
+ "Right": 74
160
+ },
161
+ "majority_share": 1.0,
162
+ "notes": "Generated by aggregating article-level political bias labels by source.",
163
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
164
+ "source_url": "www.businessinsider.com"
165
+ },
166
+ "BuzzFeed News": {
167
+ "article_count": 64,
168
+ "bias": "Left",
169
+ "label_counts": {
170
+ "Left": 64
171
+ },
172
+ "majority_share": 1.0,
173
+ "notes": "Generated by aggregating article-level political bias labels by source.",
174
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
175
+ "source_url": "www.buzzfeednews.com"
176
+ },
177
+ "CBN": {
178
+ "article_count": 27,
179
+ "bias": "Center",
180
+ "label_counts": {
181
+ "Center": 27
182
+ },
183
+ "majority_share": 1.0,
184
+ "notes": "Generated by aggregating article-level political bias labels by source.",
185
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
186
+ "source_url": "www.cbn.com"
187
+ },
188
+ "CBS News": {
189
+ "article_count": 163,
190
+ "bias": "Left",
191
+ "label_counts": {
192
+ "Left": 163
193
+ },
194
+ "majority_share": 1.0,
195
+ "notes": "Generated by aggregating article-level political bias labels by source.",
196
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
197
+ "source_url": "www.cbsnews.com"
198
+ },
199
+ "CNN (Web News)": {
200
+ "article_count": 2485,
201
+ "bias": "Left",
202
+ "label_counts": {
203
+ "Left": 2485
204
+ },
205
+ "majority_share": 1.0,
206
+ "notes": "Generated by aggregating article-level political bias labels by source.",
207
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
208
+ "source_url": "www.cnn.com"
209
+ },
210
+ "CNN - Editorial": {
211
+ "article_count": 87,
212
+ "bias": "Left",
213
+ "label_counts": {
214
+ "Left": 87
215
+ },
216
+ "majority_share": 1.0,
217
+ "notes": "Generated by aggregating article-level political bias labels by source.",
218
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
219
+ "source_url": "www.cnn.com"
220
+ },
221
+ "Charles Krauthammer": {
222
+ "article_count": 9,
223
+ "bias": "Center",
224
+ "label_counts": {
225
+ "Center": 9
226
+ },
227
+ "majority_share": 1.0,
228
+ "notes": "Generated by aggregating article-level political bias labels by source.",
229
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
230
+ "source_url": "www.nationalreview.com"
231
+ },
232
+ "Chicago Sun-Times": {
233
+ "article_count": 83,
234
+ "bias": "Left",
235
+ "label_counts": {
236
+ "Left": 83
237
+ },
238
+ "majority_share": 1.0,
239
+ "notes": "Generated by aggregating article-level political bias labels by source.",
240
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
241
+ "source_url": "www.chicago.suntimes.com"
242
+ },
243
+ "Christian Science Monitor": {
244
+ "article_count": 1300,
245
+ "bias": "Right",
246
+ "label_counts": {
247
+ "Right": 1300
248
+ },
249
+ "majority_share": 1.0,
250
+ "notes": "Generated by aggregating article-level political bias labels by source.",
251
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
252
+ "source_url": "www.csmonitor.com"
253
+ },
254
+ "Daily Beast": {
255
+ "article_count": 240,
256
+ "bias": "Left",
257
+ "label_counts": {
258
+ "Left": 240
259
+ },
260
+ "majority_share": 1.0,
261
+ "notes": "Generated by aggregating article-level political bias labels by source.",
262
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
263
+ "source_url": "www.thedailybeast.com"
264
+ },
265
+ "Daily Kos": {
266
+ "article_count": 127,
267
+ "bias": "Left",
268
+ "label_counts": {
269
+ "Left": 127
270
+ },
271
+ "majority_share": 1.0,
272
+ "notes": "Generated by aggregating article-level political bias labels by source.",
273
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
274
+ "source_url": "www.dailykos.com"
275
+ },
276
+ "Daily Mail": {
277
+ "article_count": 46,
278
+ "bias": "Center",
279
+ "label_counts": {
280
+ "Center": 46
281
+ },
282
+ "majority_share": 1.0,
283
+ "notes": "Generated by aggregating article-level political bias labels by source.",
284
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
285
+ "source_url": "www.dailymail.co.uk"
286
+ },
287
+ "Damon Linker": {
288
+ "article_count": 14,
289
+ "bias": "Left",
290
+ "label_counts": {
291
+ "Left": 14
292
+ },
293
+ "majority_share": 1.0,
294
+ "notes": "Generated by aggregating article-level political bias labels by source.",
295
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
296
+ "source_url": "www.theweek.com"
297
+ },
298
+ "Democracy Now": {
299
+ "article_count": 75,
300
+ "bias": "Left",
301
+ "label_counts": {
302
+ "Left": 75
303
+ },
304
+ "majority_share": 1.0,
305
+ "notes": "Generated by aggregating article-level political bias labels by source.",
306
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
307
+ "source_url": "www.democracynow.org"
308
+ },
309
+ "Elizabeth Warren": {
310
+ "article_count": 4,
311
+ "bias": "Left",
312
+ "label_counts": {
313
+ "Left": 4
314
+ },
315
+ "majority_share": 1.0,
316
+ "notes": "Generated by aggregating article-level political bias labels by source.",
317
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
318
+ "source_url": "www.time.com"
319
+ },
320
+ "Ezra Klein": {
321
+ "article_count": 10,
322
+ "bias": "Left",
323
+ "label_counts": {
324
+ "Left": 10
325
+ },
326
+ "majority_share": 1.0,
327
+ "notes": "Generated by aggregating article-level political bias labels by source.",
328
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
329
+ "source_url": "www.npr.org"
330
+ },
331
+ "Fox News": {
332
+ "article_count": 1353,
333
+ "bias": "Center",
334
+ "label_counts": {
335
+ "Center": 1353
336
+ },
337
+ "majority_share": 1.0,
338
+ "notes": "Generated by aggregating article-level political bias labels by source.",
339
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
340
+ "source_url": "www.foxnews.com"
341
+ },
342
+ "Fox News (Online)": {
343
+ "article_count": 86,
344
+ "bias": "Center",
345
+ "label_counts": {
346
+ "Center": 86
347
+ },
348
+ "majority_share": 1.0,
349
+ "notes": "Generated by aggregating article-level political bias labels by source.",
350
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
351
+ "source_url": "www.foxnews.com"
352
+ },
353
+ "Fox News Opinion": {
354
+ "article_count": 58,
355
+ "bias": "Center",
356
+ "label_counts": {
357
+ "Center": 58
358
+ },
359
+ "majority_share": 1.0,
360
+ "notes": "Generated by aggregating article-level political bias labels by source.",
361
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
362
+ "source_url": "www.foxnews.com"
363
+ },
364
+ "Fox Online News": {
365
+ "article_count": 2035,
366
+ "bias": "Center",
367
+ "label_counts": {
368
+ "Center": 2035
369
+ },
370
+ "majority_share": 1.0,
371
+ "notes": "Generated by aggregating article-level political bias labels by source.",
372
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
373
+ "source_url": "www.foxnews.com"
374
+ },
375
+ "George Will": {
376
+ "article_count": 14,
377
+ "bias": "Center",
378
+ "label_counts": {
379
+ "Center": 14
380
+ },
381
+ "majority_share": 1.0,
382
+ "notes": "Generated by aggregating article-level political bias labels by source.",
383
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
384
+ "source_url": "www.nationalreview.com"
385
+ },
386
+ "Guest Writer": {
387
+ "article_count": 84,
388
+ "bias": "Right",
389
+ "label_counts": {
390
+ "Right": 84
391
+ },
392
+ "majority_share": 1.0,
393
+ "notes": "Generated by aggregating article-level political bias labels by source.",
394
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
395
+ "source_url": "www.washingtontimes.com"
396
+ },
397
+ "Guest Writer - Center": {
398
+ "article_count": 3,
399
+ "bias": "Right",
400
+ "label_counts": {
401
+ "Right": 3
402
+ },
403
+ "majority_share": 1.0,
404
+ "notes": "Generated by aggregating article-level political bias labels by source.",
405
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
406
+ "source_url": "www.theatlantic.com"
407
+ },
408
+ "Guest Writer - Left": {
409
+ "article_count": 109,
410
+ "bias": "Left",
411
+ "label_counts": {
412
+ "Left": 109
413
+ },
414
+ "majority_share": 1.0,
415
+ "notes": "Generated by aggregating article-level political bias labels by source.",
416
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
417
+ "source_url": "www.cnn.com"
418
+ },
419
+ "Guest Writer - Right": {
420
+ "article_count": 385,
421
+ "bias": "Center",
422
+ "label_counts": {
423
+ "Center": 385
424
+ },
425
+ "majority_share": 1.0,
426
+ "notes": "Generated by aggregating article-level political bias labels by source.",
427
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
428
+ "source_url": "www.nationalreview.com"
429
+ },
430
+ "HotAir": {
431
+ "article_count": 64,
432
+ "bias": "Center",
433
+ "label_counts": {
434
+ "Center": 64
435
+ },
436
+ "majority_share": 1.0,
437
+ "notes": "Generated by aggregating article-level political bias labels by source.",
438
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
439
+ "source_url": "www.hotair.com"
440
+ },
441
+ "Howard Kurtz": {
442
+ "article_count": 14,
443
+ "bias": "Right",
444
+ "label_counts": {
445
+ "Right": 14
446
+ },
447
+ "majority_share": 1.0,
448
+ "notes": "Generated by aggregating article-level political bias labels by source.",
449
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
450
+ "source_url": "www.foxnews.com"
451
+ },
452
+ "International Business Times": {
453
+ "article_count": 48,
454
+ "bias": "Right",
455
+ "label_counts": {
456
+ "Right": 48
457
+ },
458
+ "majority_share": 1.0,
459
+ "notes": "Generated by aggregating article-level political bias labels by source.",
460
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
461
+ "source_url": "www.ibtimes.com"
462
+ },
463
+ "Jacobin": {
464
+ "article_count": 23,
465
+ "bias": "Left",
466
+ "label_counts": {
467
+ "Left": 23
468
+ },
469
+ "majority_share": 1.0,
470
+ "notes": "Generated by aggregating article-level political bias labels by source.",
471
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
472
+ "source_url": "www.jacobinmag.com"
473
+ },
474
+ "John Fund": {
475
+ "article_count": 16,
476
+ "bias": "Center",
477
+ "label_counts": {
478
+ "Center": 16
479
+ },
480
+ "majority_share": 1.0,
481
+ "notes": "Generated by aggregating article-level political bias labels by source.",
482
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
483
+ "source_url": "www.nationalreview.com"
484
+ },
485
+ "John Stossel": {
486
+ "article_count": 26,
487
+ "bias": "Center",
488
+ "label_counts": {
489
+ "Center": 26
490
+ },
491
+ "majority_share": 1.0,
492
+ "notes": "Generated by aggregating article-level political bias labels by source.",
493
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
494
+ "source_url": "www.foxnews.com"
495
+ },
496
+ "Jon Terbush": {
497
+ "article_count": 3,
498
+ "bias": "Left",
499
+ "label_counts": {
500
+ "Left": 3
501
+ },
502
+ "majority_share": 1.0,
503
+ "notes": "Generated by aggregating article-level political bias labels by source.",
504
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
505
+ "source_url": "www.theweek.com"
506
+ },
507
+ "Jonah Goldberg": {
508
+ "article_count": 8,
509
+ "bias": "Center",
510
+ "label_counts": {
511
+ "Center": 8
512
+ },
513
+ "majority_share": 1.0,
514
+ "notes": "Generated by aggregating article-level political bias labels by source.",
515
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
516
+ "source_url": "www.nationalreview.com"
517
+ },
518
+ "Juan Williams": {
519
+ "article_count": 10,
520
+ "bias": "Left",
521
+ "label_counts": {
522
+ "Left": 10
523
+ },
524
+ "majority_share": 1.0,
525
+ "notes": "Generated by aggregating article-level political bias labels by source.",
526
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
527
+ "source_url": "www.foxnews.com"
528
+ },
529
+ "Julian Zelizer": {
530
+ "article_count": 10,
531
+ "bias": "Left",
532
+ "label_counts": {
533
+ "Left": 10
534
+ },
535
+ "majority_share": 1.0,
536
+ "notes": "Generated by aggregating article-level political bias labels by source.",
537
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
538
+ "source_url": "www.cnn.com"
539
+ },
540
+ "MarketWatch": {
541
+ "article_count": 106,
542
+ "bias": "Center",
543
+ "label_counts": {
544
+ "Center": 106
545
+ },
546
+ "majority_share": 1.0,
547
+ "notes": "Generated by aggregating article-level political bias labels by source.",
548
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
549
+ "source_url": "www.marketwatch.com"
550
+ },
551
+ "Media Matters": {
552
+ "article_count": 107,
553
+ "bias": "Left",
554
+ "label_counts": {
555
+ "Left": 107
556
+ },
557
+ "majority_share": 1.0,
558
+ "notes": "Generated by aggregating article-level political bias labels by source.",
559
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
560
+ "source_url": "www.mediamatters.org"
561
+ },
562
+ "Media Research Center": {
563
+ "article_count": 22,
564
+ "bias": "Center",
565
+ "label_counts": {
566
+ "Center": 22
567
+ },
568
+ "majority_share": 1.0,
569
+ "notes": "Generated by aggregating article-level political bias labels by source.",
570
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
571
+ "source_url": "www.newsbusters.org"
572
+ },
573
+ "Michael Barone": {
574
+ "article_count": 4,
575
+ "bias": "Center",
576
+ "label_counts": {
577
+ "Center": 4
578
+ },
579
+ "majority_share": 1.0,
580
+ "notes": "Generated by aggregating article-level political bias labels by source.",
581
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
582
+ "source_url": "www.townhall.com"
583
+ },
584
+ "Michael Brendan Dougherty": {
585
+ "article_count": 8,
586
+ "bias": "Center",
587
+ "label_counts": {
588
+ "Center": 8
589
+ },
590
+ "majority_share": 1.0,
591
+ "notes": "Generated by aggregating article-level political bias labels by source.",
592
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
593
+ "source_url": "www.theweek.com"
594
+ },
595
+ "Michael Goodwin": {
596
+ "article_count": 4,
597
+ "bias": "Center",
598
+ "label_counts": {
599
+ "Center": 4
600
+ },
601
+ "majority_share": 1.0,
602
+ "notes": "Generated by aggregating article-level political bias labels by source.",
603
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
604
+ "source_url": "www.nypost.com"
605
+ },
606
+ "Michelle Malkin": {
607
+ "article_count": 12,
608
+ "bias": "Center",
609
+ "label_counts": {
610
+ "Center": 12
611
+ },
612
+ "majority_share": 1.0,
613
+ "notes": "Generated by aggregating article-level political bias labels by source.",
614
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
615
+ "source_url": "www.townhall.com"
616
+ },
617
+ "Mother Jones": {
618
+ "article_count": 114,
619
+ "bias": "Left",
620
+ "label_counts": {
621
+ "Left": 114
622
+ },
623
+ "majority_share": 1.0,
624
+ "notes": "Generated by aggregating article-level political bias labels by source.",
625
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
626
+ "source_url": "www.motherjones.com"
627
+ },
628
+ "NBC News (Online)": {
629
+ "article_count": 38,
630
+ "bias": "Left",
631
+ "label_counts": {
632
+ "Left": 38
633
+ },
634
+ "majority_share": 1.0,
635
+ "notes": "Generated by aggregating article-level political bias labels by source.",
636
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
637
+ "source_url": "www.nbcnews.com"
638
+ },
639
+ "NBCNews.com": {
640
+ "article_count": 14,
641
+ "bias": "Left",
642
+ "label_counts": {
643
+ "Left": 14
644
+ },
645
+ "majority_share": 1.0,
646
+ "notes": "Generated by aggregating article-level political bias labels by source.",
647
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
648
+ "source_url": "www.nbcnews.com"
649
+ },
650
+ "NPR Editorial": {
651
+ "article_count": 8,
652
+ "bias": "Left",
653
+ "label_counts": {
654
+ "Left": 8
655
+ },
656
+ "majority_share": 1.0,
657
+ "notes": "Generated by aggregating article-level political bias labels by source.",
658
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
659
+ "source_url": "www.npr.org"
660
+ },
661
+ "NPR Online News": {
662
+ "article_count": 2007,
663
+ "bias": "Right",
664
+ "label_counts": {
665
+ "Right": 2007
666
+ },
667
+ "majority_share": 1.0,
668
+ "notes": "Generated by aggregating article-level political bias labels by source.",
669
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
670
+ "source_url": "www.npr.org"
671
+ },
672
+ "National Review": {
673
+ "article_count": 1013,
674
+ "bias": "Center",
675
+ "label_counts": {
676
+ "Center": 1013
677
+ },
678
+ "majority_share": 1.0,
679
+ "notes": "Generated by aggregating article-level political bias labels by source.",
680
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
681
+ "source_url": "www.nationalreview.com"
682
+ },
683
+ "New York Post": {
684
+ "article_count": 175,
685
+ "bias": "Center",
686
+ "label_counts": {
687
+ "Center": 175
688
+ },
689
+ "majority_share": 1.0,
690
+ "notes": "Generated by aggregating article-level political bias labels by source.",
691
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
692
+ "source_url": "www.nypost.com"
693
+ },
694
+ "New York Post (News)": {
695
+ "article_count": 5,
696
+ "bias": "Center",
697
+ "label_counts": {
698
+ "Center": 5
699
+ },
700
+ "majority_share": 1.0,
701
+ "notes": "Generated by aggregating article-level political bias labels by source.",
702
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
703
+ "source_url": "www.nypost.com"
704
+ },
705
+ "New York Post (Opinion)": {
706
+ "article_count": 5,
707
+ "bias": "Center",
708
+ "label_counts": {
709
+ "Center": 5
710
+ },
711
+ "majority_share": 1.0,
712
+ "notes": "Generated by aggregating article-level political bias labels by source.",
713
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
714
+ "source_url": "www.nypost.com"
715
+ },
716
+ "NewsBusters": {
717
+ "article_count": 44,
718
+ "bias": "Center",
719
+ "label_counts": {
720
+ "Center": 44
721
+ },
722
+ "majority_share": 1.0,
723
+ "notes": "Generated by aggregating article-level political bias labels by source.",
724
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
725
+ "source_url": "www.newsbusters.org"
726
+ },
727
+ "Newt Gingrich": {
728
+ "article_count": 14,
729
+ "bias": "Center",
730
+ "label_counts": {
731
+ "Center": 14
732
+ },
733
+ "majority_share": 1.0,
734
+ "notes": "Generated by aggregating article-level political bias labels by source.",
735
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
736
+ "source_url": "www.washingtontimes.com"
737
+ },
738
+ "Pew Research Center": {
739
+ "article_count": 27,
740
+ "bias": "Right",
741
+ "label_counts": {
742
+ "Right": 27
743
+ },
744
+ "majority_share": 1.0,
745
+ "notes": "Generated by aggregating article-level political bias labels by source.",
746
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
747
+ "source_url": "www.pewresearch.org"
748
+ },
749
+ "Politico": {
750
+ "article_count": 2493,
751
+ "bias": "Left",
752
+ "label_counts": {
753
+ "Left": 2493
754
+ },
755
+ "majority_share": 1.0,
756
+ "notes": "Generated by aggregating article-level political bias labels by source.",
757
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
758
+ "source_url": "www.politico.com"
759
+ },
760
+ "Rand Paul": {
761
+ "article_count": 8,
762
+ "bias": "Center",
763
+ "label_counts": {
764
+ "Center": 8
765
+ },
766
+ "majority_share": 1.0,
767
+ "notes": "Generated by aggregating article-level political bias labels by source.",
768
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
769
+ "source_url": "www.washingtontimes.com"
770
+ },
771
+ "Rich Lowry": {
772
+ "article_count": 44,
773
+ "bias": "Center",
774
+ "label_counts": {
775
+ "Center": 44
776
+ },
777
+ "majority_share": 1.0,
778
+ "notes": "Generated by aggregating article-level political bias labels by source.",
779
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
780
+ "source_url": "www.nationalreview.com"
781
+ },
782
+ "Ryan Cooper": {
783
+ "article_count": 6,
784
+ "bias": "Left",
785
+ "label_counts": {
786
+ "Left": 6
787
+ },
788
+ "majority_share": 1.0,
789
+ "notes": "Generated by aggregating article-level political bias labels by source.",
790
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
791
+ "source_url": "www.theweek.com"
792
+ },
793
+ "S.E. Cupp": {
794
+ "article_count": 4,
795
+ "bias": "Center",
796
+ "label_counts": {
797
+ "Center": 4
798
+ },
799
+ "majority_share": 1.0,
800
+ "notes": "Generated by aggregating article-level political bias labels by source.",
801
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
802
+ "source_url": "www.chicago.suntimes.com"
803
+ },
804
+ "Scientific American": {
805
+ "article_count": 35,
806
+ "bias": "Right",
807
+ "label_counts": {
808
+ "Left": 2,
809
+ "Right": 33
810
+ },
811
+ "majority_share": 0.9429,
812
+ "notes": "Generated by aggregating article-level political bias labels by source.",
813
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
814
+ "source_url": "www.scientificamerican.com"
815
+ },
816
+ "Slate": {
817
+ "article_count": 158,
818
+ "bias": "Left",
819
+ "label_counts": {
820
+ "Left": 158
821
+ },
822
+ "majority_share": 1.0,
823
+ "notes": "Generated by aggregating article-level political bias labels by source.",
824
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
825
+ "source_url": "www.slate.com"
826
+ },
827
+ "The Atlantic": {
828
+ "article_count": 172,
829
+ "bias": "Left",
830
+ "label_counts": {
831
+ "Left": 172
832
+ },
833
+ "majority_share": 1.0,
834
+ "notes": "Generated by aggregating article-level political bias labels by source.",
835
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
836
+ "source_url": "www.theatlantic.com"
837
+ },
838
+ "The Boston Globe": {
839
+ "article_count": 24,
840
+ "bias": "Left",
841
+ "label_counts": {
842
+ "Left": 24
843
+ },
844
+ "majority_share": 1.0,
845
+ "notes": "Generated by aggregating article-level political bias labels by source.",
846
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
847
+ "source_url": "www.bostonglobe.com"
848
+ },
849
+ "The Daily Wire": {
850
+ "article_count": 122,
851
+ "bias": "Center",
852
+ "label_counts": {
853
+ "Center": 122
854
+ },
855
+ "majority_share": 1.0,
856
+ "notes": "Generated by aggregating article-level political bias labels by source.",
857
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
858
+ "source_url": "www.dailywire.com"
859
+ },
860
+ "The Economist": {
861
+ "article_count": 28,
862
+ "bias": "Left",
863
+ "label_counts": {
864
+ "Left": 28
865
+ },
866
+ "majority_share": 1.0,
867
+ "notes": "Generated by aggregating article-level political bias labels by source.",
868
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
869
+ "source_url": "www.economist.com"
870
+ },
871
+ "The Flip Side": {
872
+ "article_count": 239,
873
+ "bias": "Right",
874
+ "label_counts": {
875
+ "Right": 239
876
+ },
877
+ "majority_share": 1.0,
878
+ "notes": "Generated by aggregating article-level political bias labels by source.",
879
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
880
+ "source_url": "www.theflipside.io"
881
+ },
882
+ "The Hill": {
883
+ "article_count": 1377,
884
+ "bias": "Right",
885
+ "label_counts": {
886
+ "Right": 1377
887
+ },
888
+ "majority_share": 1.0,
889
+ "notes": "Generated by aggregating article-level political bias labels by source.",
890
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
891
+ "source_url": "www.thehill.com"
892
+ },
893
+ "The Intercept": {
894
+ "article_count": 43,
895
+ "bias": "Left",
896
+ "label_counts": {
897
+ "Left": 43
898
+ },
899
+ "majority_share": 1.0,
900
+ "notes": "Generated by aggregating article-level political bias labels by source.",
901
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
902
+ "source_url": "www.theintercept.com"
903
+ },
904
+ "The Marshall Project": {
905
+ "article_count": 27,
906
+ "bias": "Right",
907
+ "label_counts": {
908
+ "Right": 27
909
+ },
910
+ "majority_share": 1.0,
911
+ "notes": "Generated by aggregating article-level political bias labels by source.",
912
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
913
+ "source_url": "www.themarshallproject.org"
914
+ },
915
+ "The Nation": {
916
+ "article_count": 32,
917
+ "bias": "Left",
918
+ "label_counts": {
919
+ "Left": 32
920
+ },
921
+ "majority_share": 1.0,
922
+ "notes": "Generated by aggregating article-level political bias labels by source.",
923
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
924
+ "source_url": "www.thenation.com"
925
+ },
926
+ "The New Yorker": {
927
+ "article_count": 21,
928
+ "bias": "Left",
929
+ "label_counts": {
930
+ "Left": 21
931
+ },
932
+ "majority_share": 1.0,
933
+ "notes": "Generated by aggregating article-level political bias labels by source.",
934
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
935
+ "source_url": "www.newyorker.com"
936
+ },
937
+ "The Week - News": {
938
+ "article_count": 119,
939
+ "bias": "Right",
940
+ "label_counts": {
941
+ "Right": 119
942
+ },
943
+ "majority_share": 1.0,
944
+ "notes": "Generated by aggregating article-level political bias labels by source.",
945
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
946
+ "source_url": "www.theweek.com"
947
+ },
948
+ "The Week - Opinion": {
949
+ "article_count": 24,
950
+ "bias": "Left",
951
+ "label_counts": {
952
+ "Left": 24
953
+ },
954
+ "majority_share": 1.0,
955
+ "notes": "Generated by aggregating article-level political bias labels by source.",
956
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
957
+ "source_url": "www.theweek.com"
958
+ },
959
+ "TheBlaze.com": {
960
+ "article_count": 219,
961
+ "bias": "Center",
962
+ "label_counts": {
963
+ "Center": 219
964
+ },
965
+ "majority_share": 1.0,
966
+ "notes": "Generated by aggregating article-level political bias labels by source.",
967
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
968
+ "source_url": "www.theblaze.com"
969
+ },
970
+ "ThinkProgress": {
971
+ "article_count": 33,
972
+ "bias": "Left",
973
+ "label_counts": {
974
+ "Left": 33
975
+ },
976
+ "majority_share": 1.0,
977
+ "notes": "Generated by aggregating article-level political bias labels by source.",
978
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
979
+ "source_url": "www.thinkprogress.org"
980
+ },
981
+ "Thomas Sowell": {
982
+ "article_count": 3,
983
+ "bias": "Center",
984
+ "label_counts": {
985
+ "Center": 3
986
+ },
987
+ "majority_share": 1.0,
988
+ "notes": "Generated by aggregating article-level political bias labels by source.",
989
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
990
+ "source_url": "www.townhall.com"
991
+ },
992
+ "Time Magazine": {
993
+ "article_count": 70,
994
+ "bias": "Left",
995
+ "label_counts": {
996
+ "Left": 70
997
+ },
998
+ "majority_share": 1.0,
999
+ "notes": "Generated by aggregating article-level political bias labels by source.",
1000
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
1001
+ "source_url": "www.time.com"
1002
+ },
1003
+ "Townhall": {
1004
+ "article_count": 1273,
1005
+ "bias": "Center",
1006
+ "label_counts": {
1007
+ "Center": 1273
1008
+ },
1009
+ "majority_share": 1.0,
1010
+ "notes": "Generated by aggregating article-level political bias labels by source.",
1011
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
1012
+ "source_url": "www.townhall.com"
1013
+ },
1014
+ "USA TODAY": {
1015
+ "article_count": 1785,
1016
+ "bias": "Right",
1017
+ "label_counts": {
1018
+ "Right": 1785
1019
+ },
1020
+ "majority_share": 1.0,
1021
+ "notes": "Generated by aggregating article-level political bias labels by source.",
1022
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
1023
+ "source_url": "www.usatoday.com"
1024
+ },
1025
+ "Vanity Fair": {
1026
+ "article_count": 157,
1027
+ "bias": "Left",
1028
+ "label_counts": {
1029
+ "Left": 157
1030
+ },
1031
+ "majority_share": 1.0,
1032
+ "notes": "Generated by aggregating article-level political bias labels by source.",
1033
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
1034
+ "source_url": "www.vanityfair.com"
1035
+ },
1036
+ "Vice": {
1037
+ "article_count": 67,
1038
+ "bias": "Left",
1039
+ "label_counts": {
1040
+ "Left": 67
1041
+ },
1042
+ "majority_share": 1.0,
1043
+ "notes": "Generated by aggregating article-level political bias labels by source.",
1044
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
1045
+ "source_url": "www.vice.com"
1046
+ },
1047
+ "Victor Hanson": {
1048
+ "article_count": 62,
1049
+ "bias": "Center",
1050
+ "label_counts": {
1051
+ "Center": 62
1052
+ },
1053
+ "majority_share": 1.0,
1054
+ "notes": "Generated by aggregating article-level political bias labels by source.",
1055
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
1056
+ "source_url": "www.nationalreview.com"
1057
+ },
1058
+ "Vox": {
1059
+ "article_count": 1460,
1060
+ "bias": "Left",
1061
+ "label_counts": {
1062
+ "Left": 1460
1063
+ },
1064
+ "majority_share": 1.0,
1065
+ "notes": "Generated by aggregating article-level political bias labels by source.",
1066
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
1067
+ "source_url": "www.vox.com"
1068
+ },
1069
+ "Wall Street Journal - Editorial": {
1070
+ "article_count": 7,
1071
+ "bias": "Center",
1072
+ "label_counts": {
1073
+ "Center": 7
1074
+ },
1075
+ "majority_share": 1.0,
1076
+ "notes": "Generated by aggregating article-level political bias labels by source.",
1077
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
1078
+ "source_url": "www.wsj.com"
1079
+ },
1080
+ "Wall Street Journal - News": {
1081
+ "article_count": 255,
1082
+ "bias": "Right",
1083
+ "label_counts": {
1084
+ "Right": 255
1085
+ },
1086
+ "majority_share": 1.0,
1087
+ "notes": "Generated by aggregating article-level political bias labels by source.",
1088
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
1089
+ "source_url": "www.wsj.com"
1090
+ },
1091
+ "Washington Post": {
1092
+ "article_count": 108,
1093
+ "bias": "Left",
1094
+ "label_counts": {
1095
+ "Left": 108
1096
+ },
1097
+ "majority_share": 1.0,
1098
+ "notes": "Generated by aggregating article-level political bias labels by source.",
1099
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
1100
+ "source_url": "www.washingtonpost.com"
1101
+ },
1102
+ "Washington Times": {
1103
+ "article_count": 2883,
1104
+ "bias": "Center",
1105
+ "label_counts": {
1106
+ "Center": 2883
1107
+ },
1108
+ "majority_share": 1.0,
1109
+ "notes": "Generated by aggregating article-level political bias labels by source.",
1110
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
1111
+ "source_url": "www.washingtontimes.com"
1112
+ },
1113
+ "Yahoo! News": {
1114
+ "article_count": 11,
1115
+ "bias": "Left",
1116
+ "label_counts": {
1117
+ "Left": 11
1118
+ },
1119
+ "majority_share": 1.0,
1120
+ "notes": "Generated by aggregating article-level political bias labels by source.",
1121
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
1122
+ "source_url": "www.news.yahoo.com"
1123
+ },
1124
+ "Yahoo! The 360": {
1125
+ "article_count": 80,
1126
+ "bias": "Right",
1127
+ "label_counts": {
1128
+ "Right": 80
1129
+ },
1130
+ "majority_share": 1.0,
1131
+ "notes": "Generated by aggregating article-level political bias labels by source.",
1132
+ "provenance": "siddharthmb/article-bias-prediction-media-splits",
1133
+ "source_url": "www.news.yahoo.com"
1134
+ }
1135
+ }
1136
+ }
src/data/source_bias.json ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sources": {
3
+ "Fox News": {
4
+ "bias": "Right",
5
+ "provenance": "manual_demo",
6
+ "source_url": null,
7
+ "article_count": null,
8
+ "label_counts": null,
9
+ "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
10
+ },
11
+ "Breitbart": {
12
+ "bias": "Right",
13
+ "provenance": "manual_demo",
14
+ "source_url": null,
15
+ "article_count": null,
16
+ "label_counts": null,
17
+ "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
18
+ },
19
+ "The Daily Wire": {
20
+ "bias": "Right",
21
+ "provenance": "manual_demo",
22
+ "source_url": null,
23
+ "article_count": null,
24
+ "label_counts": null,
25
+ "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
26
+ },
27
+ "New York Post": {
28
+ "bias": "Right",
29
+ "provenance": "manual_demo",
30
+ "source_url": null,
31
+ "article_count": null,
32
+ "label_counts": null,
33
+ "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
34
+ },
35
+ "TechRadar": {
36
+ "bias": "Right",
37
+ "provenance": "manual_demo",
38
+ "source_url": null,
39
+ "article_count": null,
40
+ "label_counts": null,
41
+ "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
42
+ },
43
+ "BBC News": {
44
+ "bias": "Center",
45
+ "provenance": "manual_demo",
46
+ "source_url": null,
47
+ "article_count": null,
48
+ "label_counts": null,
49
+ "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
50
+ },
51
+ "Reuters": {
52
+ "bias": "Center",
53
+ "provenance": "manual_demo",
54
+ "source_url": null,
55
+ "article_count": null,
56
+ "label_counts": null,
57
+ "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
58
+ },
59
+ "Associated Press": {
60
+ "bias": "Center",
61
+ "provenance": "manual_demo",
62
+ "source_url": null,
63
+ "article_count": null,
64
+ "label_counts": null,
65
+ "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
66
+ },
67
+ "Mental Floss": {
68
+ "bias": "Center",
69
+ "provenance": "manual_demo",
70
+ "source_url": null,
71
+ "article_count": null,
72
+ "label_counts": null,
73
+ "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
74
+ },
75
+ "New Scientist": {
76
+ "bias": "Center",
77
+ "provenance": "manual_demo",
78
+ "source_url": null,
79
+ "article_count": null,
80
+ "label_counts": null,
81
+ "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
82
+ },
83
+ "Nature.com": {
84
+ "bias": "Center",
85
+ "provenance": "manual_demo",
86
+ "source_url": null,
87
+ "article_count": null,
88
+ "label_counts": null,
89
+ "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
90
+ },
91
+ "Futurity: Research News": {
92
+ "bias": "Center",
93
+ "provenance": "manual_demo",
94
+ "source_url": null,
95
+ "article_count": null,
96
+ "label_counts": null,
97
+ "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
98
+ },
99
+ "Yahoo Entertainment": {
100
+ "bias": "Center",
101
+ "provenance": "manual_demo",
102
+ "source_url": null,
103
+ "article_count": null,
104
+ "label_counts": null,
105
+ "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
106
+ },
107
+ "NPR": {
108
+ "bias": "Center-Left",
109
+ "provenance": "manual_demo",
110
+ "source_url": null,
111
+ "article_count": null,
112
+ "label_counts": null,
113
+ "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
114
+ },
115
+ "The Guardian": {
116
+ "bias": "Center-Left",
117
+ "provenance": "manual_demo",
118
+ "source_url": null,
119
+ "article_count": null,
120
+ "label_counts": null,
121
+ "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
122
+ },
123
+ "Techdirt": {
124
+ "bias": "Center-Left",
125
+ "provenance": "manual_demo",
126
+ "source_url": null,
127
+ "article_count": null,
128
+ "label_counts": null,
129
+ "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
130
+ },
131
+ "Vox": {
132
+ "bias": "Center-Left",
133
+ "provenance": "manual_demo",
134
+ "source_url": null,
135
+ "article_count": null,
136
+ "label_counts": null,
137
+ "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
138
+ },
139
+ "Wired": {
140
+ "bias": "Center-Left",
141
+ "provenance": "manual_demo",
142
+ "source_url": null,
143
+ "article_count": null,
144
+ "label_counts": null,
145
+ "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
146
+ },
147
+ "Al Jazeera English": {
148
+ "bias": "Left",
149
+ "provenance": "manual_demo",
150
+ "source_url": null,
151
+ "article_count": null,
152
+ "label_counts": null,
153
+ "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
154
+ },
155
+ "Jezebel": {
156
+ "bias": "Left",
157
+ "provenance": "manual_demo",
158
+ "source_url": null,
159
+ "article_count": null,
160
+ "label_counts": null,
161
+ "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
162
+ },
163
+ "Gizmodo.com": {
164
+ "bias": "Left",
165
+ "provenance": "manual_demo",
166
+ "source_url": null,
167
+ "article_count": null,
168
+ "label_counts": null,
169
+ "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
170
+ },
171
+ "Gothamist": {
172
+ "bias": "Left",
173
+ "provenance": "manual_demo",
174
+ "source_url": null,
175
+ "article_count": null,
176
+ "label_counts": null,
177
+ "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
178
+ },
179
+ "The Intercept": {
180
+ "bias": "Left",
181
+ "provenance": "manual_demo",
182
+ "source_url": null,
183
+ "article_count": null,
184
+ "label_counts": null,
185
+ "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
186
+ }
187
+ },
188
+ "aliases": {
189
+ "ap news": "Associated Press",
190
+ "associated press": "Associated Press",
191
+ "bbc": "BBC News",
192
+ "bbc news": "BBC News",
193
+ "fox": "Fox News",
194
+ "fox news": "Fox News",
195
+ "gizmodo": "Gizmodo.com",
196
+ "npr": "NPR",
197
+ "reuters": "Reuters",
198
+ "the guardian": "The Guardian",
199
+ "wired": "Wired",
200
+ "yahoo entertainment": "Yahoo Entertainment"
201
+ }
202
+ }
src/db/__init__.py ADDED
File without changes
src/db/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (127 Bytes). View file
 
src/db/__pycache__/vector_store.cpython-313.pyc ADDED
Binary file (5.81 kB). View file
 
src/db/vector_store.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import hashlib
2
+ import chromadb
3
+ from sentence_transformers import SentenceTransformer
4
+ from src.config import CHROMA_DB_PATH, HF_TOKEN
5
+
6
+
7
+ CHROMA_DB_PATH.mkdir(parents=True, exist_ok=True)
8
+
9
+
10
+ class NewsVectorStore:
11
+ _model = None
12
+
13
+ def __init__(self, collection_name = "news_articles"):
14
+ print(f"Initializing ChromaDB at {CHROMA_DB_PATH}...")
15
+ self.client = chromadb.PersistentClient(path=str(CHROMA_DB_PATH))
16
+ self.collection = self.client.get_or_create_collection(
17
+ name=collection_name,
18
+ metadata={"hnsw:space": "cosine"}
19
+ )
20
+
21
+ if NewsVectorStore._model is None:
22
+ print("Loading embedding model (this takes a few seconds)...")
23
+ NewsVectorStore._model = SentenceTransformer(
24
+ 'all-MiniLM-L6-v2',
25
+ token=HF_TOKEN,
26
+ )
27
+
28
+ self.embedding_model = NewsVectorStore._model
29
+ print("ChromaDB initialized and embedding model loaded.")
30
+
31
+ def store_articles(self, articles_data):
32
+ """
33
+ Expects a list of dictionaries from NewsAPI.
34
+ """
35
+ if not articles_data:
36
+ print("No articles to store.")
37
+ return
38
+ documents = []
39
+ metadatas = []
40
+ ids = []
41
+
42
+ for article in articles_data:
43
+ url = article.get('url')
44
+ if not url:
45
+ continue
46
+
47
+ title = article.get('title') or ""
48
+ desc = article.get('description') or ""
49
+ content = article.get("content") or ""
50
+ text_to_embed = f"{title}. {desc}. {content}"
51
+
52
+ if len(text_to_embed.strip()) > 5:
53
+ documents.append(text_to_embed)
54
+
55
+ # Store metadata so we can display it later in the UI
56
+ metadatas.append({
57
+ "source": article.get('source', {}).get('name', 'Unknown'),
58
+ "url": url,
59
+ "publishedAt": article.get('publishedAt', ''),
60
+ "title": article.get('title') or "",
61
+ "description": article.get('description') or ""
62
+ })
63
+ doc_id = hashlib.md5(url.encode()).hexdigest()
64
+ ids.append(doc_id)
65
+
66
+ if not documents:
67
+ print("No valid documents to store.")
68
+ return
69
+
70
+ # Generate embeddings
71
+ print(f"Generating embeddings for {len(documents)} articles...")
72
+ embeddings = self.embedding_model.encode(documents,batch_size=32).tolist()
73
+
74
+ # Insert into ChromaDB
75
+ self.collection.upsert(
76
+ embeddings=embeddings,
77
+ documents=documents,
78
+ metadatas=metadatas,
79
+ ids=ids
80
+ )
81
+ print(f"Successfully stored {len(documents)} articles in ChromaDB!")
82
+
83
+ def query(self, topic: str, top_k: int = 10) -> list[dict]:
84
+ """
85
+ Embed the query topic and retrieve the top-k most similar articles.
86
+ """
87
+
88
+ print(f"querying chromaDB for the topic: '{topic}'")
89
+ query_embedding = self.embedding_model.encode([topic]).tolist()
90
+ results = self.collection.query(
91
+ query_embeddings=query_embedding,
92
+ n_results=top_k,
93
+ include=["documents", "metadatas", "distances"]
94
+ )
95
+ articles = []
96
+ for doc, meta, dist in zip(
97
+ results["documents"][0],
98
+ results["metadatas"][0],
99
+ results["distances"][0]
100
+ ):
101
+ articles.append({
102
+ "text": doc,
103
+ "source": meta.get("source", "Unknown"),
104
+ "url": meta.get("url", ""),
105
+ "publishedAt": meta.get("publishedAt", ""),
106
+ "similarity_score": round(1 - dist, 4),
107
+ "title": meta.get("title", ""),
108
+ "description": meta.get("description", ""),
109
+ })
110
+
111
+ print(f"Retrieved {len(articles)} articles.")
112
+ return articles
113
+
114
+ if __name__ == "__main__":
115
+ db = NewsVectorStore()
116
+ print(f"Total documents in collection: {db.collection.count()}")
117
+ results = db.collection.get()
118
+ urls = [m.get("url") for m in results["metadatas"]]
119
+ for url in urls:
120
+ print(url)
src/ingestion/__init__.py ADDED
File without changes
src/ingestion/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (134 Bytes). View file
 
src/ingestion/__pycache__/newsapi_client.cpython-313.pyc ADDED
Binary file (2.11 kB). View file
 
src/ingestion/newsapi_client.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.db.vector_store import NewsVectorStore
2
+ from newsapi import NewsApiClient
3
+ from src.config import NEWS_API_KEY
4
+
5
+
6
+ def fetch_news(topic="AI regulation", lang="en", page_size=10):
7
+ if not NEWS_API_KEY:
8
+ raise RuntimeError("NEWSAPI_KEY is not configured. Add it to .env before using /ingest.")
9
+
10
+ news_instance = NewsApiClient(api_key=NEWS_API_KEY)
11
+ try:
12
+ print("Fetching latest articles...")
13
+ response = news_instance.get_everything(q=topic, language=lang, sort_by='relevancy', page_size=page_size)
14
+
15
+ if response['status'] == 'ok':
16
+ articles = response['articles']
17
+ if not articles:
18
+ print("No articles found.")
19
+ return
20
+
21
+ print(f"Successfully fetched {len(articles)} articles.")
22
+ print("-" * 40)
23
+
24
+ return articles
25
+ else:
26
+ print(f"API Error: {response.get('message', 'Unknown error')}")
27
+ return []
28
+
29
+ except Exception as e:
30
+ print(f"Pipeline failed: {str(e)}")
31
+ return []
32
+
33
+ def run_pipeline():
34
+ print("Fetching articles...")
35
+ articles = fetch_news()
36
+
37
+ if not articles:
38
+ print("No articles found.")
39
+ return
40
+
41
+ print(f"Fetched {len(articles)} articles.")
42
+
43
+ db = NewsVectorStore()
44
+ db.store_articles(articles)
45
+
46
+ print("Pipeline complete.")
47
+
48
+ if __name__ == "__main__":
49
+ run_pipeline()
src/models/__pycache__/dataset_prep.cpython-313.pyc ADDED
Binary file (1.01 kB). View file
 
src/models/__pycache__/test_inference.cpython-313.pyc ADDED
Binary file (5.48 kB). View file
 
src/models/__pycache__/train_model.cpython-313.pyc ADDED
Binary file (4.4 kB). View file
 
src/models/dataset_prep.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from datasets import load_dataset
3
+ from collections import Counter
4
+ from src.config import HF_ENDPOINT, HF_TOKEN
5
+
6
+
7
+ if HF_ENDPOINT:
8
+ os.environ["HF_ENDPOINT"] = HF_ENDPOINT
9
+
10
+ def fetch_and_inspect_data():
11
+ try:
12
+ dataset = load_dataset("mediabiasgroup/BABE", token=HF_TOKEN)
13
+ print(Counter(dataset["train"]["label"]))
14
+ print(Counter(dataset["train"]["label_opinion"]))
15
+ except Exception as e:
16
+ print(f"Failed to load dataset: {e}")
17
+
18
+ if __name__ == "__main__":
19
+ fetch_and_inspect_data()
src/models/test_inference.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ from transformers import RobertaTokenizer, RobertaForSequenceClassification
4
+ import torch.nn.functional as F
5
+ from src.config import BIAS_MODEL_PATH, HF_ENDPOINT, HF_TOKEN
6
+
7
+ if HF_ENDPOINT:
8
+ os.environ["HF_ENDPOINT"] = HF_ENDPOINT
9
+
10
+ class BiasPredictor:
11
+ def __init__(self, model_dir=BIAS_MODEL_PATH, base_model_name="roberta-base"):
12
+
13
+ print("Loading model and tokenizer once...")
14
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
+ self.tokenizer = RobertaTokenizer.from_pretrained(str(model_dir), token=HF_TOKEN)
16
+ self.model = RobertaForSequenceClassification.from_pretrained(str(model_dir), token=HF_TOKEN)
17
+ self.model.to(self.device)
18
+ self.model.eval()
19
+ print("\n--- CLASSIFIER PARAM CHECK ---")
20
+ for name, param in self.model.named_parameters():
21
+ if "classifier" in name:
22
+ print(name, param.requires_grad, param.data.mean().item())
23
+ print("--- END CHECK ---\n")
24
+
25
+ self.label_map = {
26
+ 0: "Not Biased",
27
+ 1: "Biased"
28
+ }
29
+
30
+ def predict(self, text):
31
+ inputs = self.tokenizer(
32
+ text,
33
+ return_tensors="pt",
34
+ truncation=True,
35
+ max_length=128,
36
+ padding=True
37
+ ).to(self.device)
38
+ with torch.no_grad():
39
+ outputs = self.model(**inputs)
40
+ logits = outputs.logits
41
+
42
+ probs = F.softmax(logits, dim=-1)
43
+ predicted_class_id = probs.argmax().item()
44
+ confidence = probs[0][predicted_class_id].item()
45
+
46
+ return {
47
+ "text": text,
48
+ "class_id": predicted_class_id,
49
+ "label": self.label_map.get(predicted_class_id, "Unknown"),
50
+ "confidence": confidence,
51
+ "probabilities": probs[0].tolist()
52
+ }
53
+
54
+ def predict_batch(self, texts: list[str]) -> list[dict]:
55
+ inputs = self.tokenizer(
56
+ texts,
57
+ return_tensors="pt",
58
+ truncation=True,
59
+ max_length=128,
60
+ padding=True
61
+ ).to(self.device)
62
+
63
+ with torch.no_grad():
64
+ outputs = self.model(**inputs)
65
+ logits = outputs.logits
66
+ probs = F.softmax(logits, dim=-1)
67
+
68
+ results = []
69
+ for i, text in enumerate(texts):
70
+ predicted_class_id = probs[i].argmax().item()
71
+ confidence = probs[i][predicted_class_id].item()
72
+ results.append({
73
+ "text": text,
74
+ "class_id": predicted_class_id,
75
+ "label": self.label_map.get(predicted_class_id, "Unknown"),
76
+ "confidence": confidence,
77
+ "probabilities": probs[i].tolist()
78
+ })
79
+
80
+ return results
81
+
82
+ if __name__ == "__main__":
83
+ predictor = BiasPredictor()
84
+
85
+ texts = [
86
+ "The government brutally crushed the peaceful protesters.",
87
+ "The government deployed police officers to the protest site.",
88
+ "Scientists warn of accelerating climate change impacts.",
89
+ "Climate alarmists continue pushing their radical agenda."
90
+ ]
91
+
92
+ print("\n--- BATCH TEST ---")
93
+ results = predictor.predict_batch(texts)
94
+ for r in results:
95
+ print(f"[{r['label']}] ({r['confidence']:.4f}) {r['text'][:60]}")
96
+
97
+ print("\n ------- Single pass test for each text seprately ----------")
98
+ for text in [
99
+ "The government brutally crushed the peaceful protesters.",
100
+ "The government deployed police officers to the protest site.",
101
+ "Scientists warn of accelerating climate change impacts.",
102
+ "Climate alarmists continue pushing their radical agenda."
103
+ ]:
104
+ r = predictor.predict(text)
105
+ print(f"[{r['label']}] ({r['confidence']:.4f}) {r['text'][:60]}")
src/models/train_model.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from datasets import load_dataset
3
+ from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments
4
+ from peft import LoraConfig, get_peft_model, TaskType
5
+ from sklearn.metrics import accuracy_score, f1_score
6
+ import torch
7
+ import numpy as np
8
+ from transformers import set_seed
9
+ from transformers import DataCollatorWithPadding
10
+ from datetime import datetime
11
+ from src.config import HF_ENDPOINT, HF_TOKEN, MODEL_DIR
12
+
13
+
14
+ if HF_ENDPOINT:
15
+ os.environ["HF_ENDPOINT"] = HF_ENDPOINT
16
+
17
+
18
+ set_seed(42)
19
+ np.random.seed(42)
20
+ torch.manual_seed(42)
21
+
22
+ output_dir=os.path.join(MODEL_DIR, "bias_checkpoints")
23
+ os.makedirs(MODEL_DIR, exist_ok=True)
24
+
25
+
26
+ def main():
27
+
28
+ dataset = load_dataset("mediabiasgroup/BABE", token=HF_TOKEN)
29
+ dataset = dataset["train"].train_test_split(test_size=0.2, seed=42)
30
+ model_name = "roberta-base"
31
+ tokenizer = RobertaTokenizer.from_pretrained(model_name, token=HF_TOKEN)
32
+
33
+ def collapse_labels(example):
34
+ old = example["label"]
35
+
36
+ if old in [0, 1]:
37
+ example["label"] = 0 # Right
38
+ elif old == 2:
39
+ example["label"] = 1 # Center
40
+ else:
41
+ example["label"] = 2 # Left
42
+
43
+ return example
44
+
45
+ #dataset = dataset.map(collapse_labels)
46
+
47
+ def tokenize_function(examples):
48
+ return tokenizer(examples["text"], truncation=True, max_length=128)
49
+ data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
50
+ tokenized_datasets = dataset.map(tokenize_function, batched = True)
51
+ tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
52
+ tokenized_datasets.set_format("torch", columns=["input_ids", "attention_mask", "labels"])
53
+
54
+ model = RobertaForSequenceClassification.from_pretrained(
55
+ model_name,
56
+ num_labels=2,
57
+ token=HF_TOKEN,
58
+ )
59
+
60
+ peft_config = LoraConfig(
61
+ task_type=TaskType.SEQ_CLS,
62
+ r=8,
63
+ lora_alpha=32,
64
+ lora_dropout=0.1,
65
+ target_modules=["query", "value"]
66
+ )
67
+
68
+ model = get_peft_model(model, peft_config)
69
+ model.print_trainable_parameters()
70
+
71
+ training_args = TrainingArguments(
72
+ output_dir=output_dir,
73
+ learning_rate=2e-4,
74
+ per_device_train_batch_size=8,
75
+ per_device_eval_batch_size=8,
76
+ num_train_epochs=3,
77
+ eval_strategy="epoch",
78
+ save_strategy="epoch",
79
+ logging_steps=10,
80
+ report_to="none"
81
+ )
82
+
83
+ def compute_metrics(eval_pred):
84
+ logits, labels = eval_pred
85
+ preds = np.argmax(logits, axis=1)
86
+
87
+ return {
88
+ "accuracy": accuracy_score(labels, preds),
89
+ "f1_weighted": f1_score(labels, preds, average="weighted")
90
+ }
91
+
92
+
93
+ trainer = Trainer(
94
+ model=model,
95
+ args=training_args,
96
+ train_dataset=tokenized_datasets["train"],
97
+ eval_dataset=tokenized_datasets["test"],
98
+ compute_metrics=compute_metrics,
99
+ data_collator=data_collator
100
+ )
101
+
102
+ trainer.train()
103
+
104
+ # CRITICAL FIX
105
+ model = model.merge_and_unload()
106
+
107
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
108
+ save_path = os.path.join(MODEL_DIR, f"bias_lora_{timestamp}")
109
+
110
+ model.save_pretrained(save_path)
111
+ tokenizer.save_pretrained(save_path)
112
+
113
+ if __name__ == "__main__":
114
+ main()
src/ui/__init__.py ADDED
File without changes
src/ui/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (127 Bytes). View file
 
src/ui/__pycache__/app.cpython-313.pyc ADDED
Binary file (20.5 kB). View file
 
src/ui/app.py ADDED
@@ -0,0 +1,518 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import defaultdict
2
+ from html import escape
3
+
4
+ import streamlit as st
5
+
6
+ from src.ui.components.article_card import inject_article_card_styles, render_article_card
7
+ from src.ui.components.charts import build_bias_distribution_chart, build_lean_bias_chart
8
+ from src.ui.services.api_client import NewsLensClient
9
+ from src.ui.services.api_client import DirectPipelineClient
10
+
11
+ MODEL_EVAL = {
12
+ "eval_accuracy": 0.8544,
13
+ "eval_f1_weighted": 0.8546,
14
+ "eval_loss": 0.3933,
15
+ "train_loss": 0.3888,
16
+ "epochs": 3,
17
+ }
18
+
19
+
20
+ st.set_page_config(
21
+ page_title="NewsLens",
22
+ layout="wide",
23
+ initial_sidebar_state="expanded",
24
+ )
25
+
26
+
27
+ def inject_styles() -> None:
28
+ st.markdown(
29
+ """
30
+ <style>
31
+ :root {
32
+ --nl-ink: #15202b;
33
+ --nl-muted: #64748b;
34
+ --nl-line: #d8dee9;
35
+ --nl-panel: #ffffff;
36
+ --nl-soft: #f6f8fb;
37
+ --nl-blue: #2457c5;
38
+ --nl-teal: #087f8c;
39
+ --nl-red: #c24138;
40
+ --nl-green: #247857;
41
+ }
42
+
43
+ .block-container {
44
+ padding-top: 1.4rem;
45
+ padding-bottom: 2rem;
46
+ max-width: 1240px;
47
+ }
48
+
49
+ [data-testid="stSidebar"] {
50
+ background: #f7f9fc;
51
+ border-right: 1px solid var(--nl-line);
52
+ }
53
+
54
+ [data-testid="stSidebar"] h1,
55
+ [data-testid="stSidebar"] h2,
56
+ [data-testid="stSidebar"] h3 {
57
+ color: var(--nl-ink);
58
+ }
59
+
60
+ h1, h2, h3 {
61
+ letter-spacing: 0;
62
+ }
63
+
64
+ .nl-topbar {
65
+ border-bottom: 1px solid var(--nl-line);
66
+ padding: 0 0 1rem 0;
67
+ margin-bottom: 1.2rem;
68
+ }
69
+
70
+ .nl-kicker {
71
+ color: var(--nl-teal);
72
+ font-size: 0.78rem;
73
+ font-weight: 800;
74
+ letter-spacing: 0.08em;
75
+ text-transform: uppercase;
76
+ margin-bottom: 0.25rem;
77
+ }
78
+
79
+ .nl-title {
80
+ color: var(--nl-ink);
81
+ font-size: 2.25rem;
82
+ font-weight: 800;
83
+ line-height: 1.1;
84
+ margin: 0;
85
+ }
86
+
87
+ .nl-subtitle {
88
+ color: var(--nl-muted);
89
+ max-width: 780px;
90
+ margin-top: 0.55rem;
91
+ font-size: 1rem;
92
+ line-height: 1.55;
93
+ }
94
+
95
+ .nl-empty {
96
+ background: linear-gradient(135deg, #f7f9fc 0%, #eef6f2 100%);
97
+ border: 1px solid var(--nl-line);
98
+ border-radius: 8px;
99
+ padding: 2.2rem;
100
+ margin-top: 1rem;
101
+ }
102
+
103
+ .nl-empty h3 {
104
+ color: var(--nl-ink);
105
+ margin: 0 0 0.5rem 0;
106
+ }
107
+
108
+ .nl-empty p {
109
+ color: var(--nl-muted);
110
+ margin: 0;
111
+ line-height: 1.6;
112
+ }
113
+
114
+ .nl-section-heading {
115
+ color: var(--nl-ink);
116
+ font-size: 1.05rem;
117
+ font-weight: 800;
118
+ margin: 1.1rem 0 0.45rem 0;
119
+ }
120
+
121
+ .nl-source-heading {
122
+ border-top: 1px solid var(--nl-line);
123
+ color: var(--nl-ink);
124
+ display: flex;
125
+ justify-content: space-between;
126
+ align-items: center;
127
+ gap: 1rem;
128
+ padding-top: 1rem;
129
+ margin: 1.1rem 0 0.5rem 0;
130
+ }
131
+
132
+ .nl-source-heading h3 {
133
+ font-size: 1.05rem;
134
+ margin: 0;
135
+ }
136
+
137
+ .nl-source-meta {
138
+ color: var(--nl-muted);
139
+ font-size: 0.85rem;
140
+ white-space: nowrap;
141
+ }
142
+
143
+ .nl-insight {
144
+ border-left: 4px solid var(--nl-teal);
145
+ background: #f5fbfa;
146
+ padding: 0.9rem 1rem;
147
+ color: var(--nl-ink);
148
+ margin: 0.25rem 0 0.9rem 0;
149
+ }
150
+
151
+ .nl-insight strong {
152
+ color: var(--nl-teal);
153
+ }
154
+
155
+ .nl-model-panel {
156
+ background: #f7f9fc;
157
+ border: 1px solid var(--nl-line);
158
+ border-radius: 8px;
159
+ padding: 1rem;
160
+ margin-top: 0.8rem;
161
+ }
162
+
163
+ .nl-model-panel h3 {
164
+ color: var(--nl-ink);
165
+ font-size: 1rem;
166
+ margin: 0 0 0.6rem 0;
167
+ }
168
+
169
+ .nl-model-grid {
170
+ display: grid;
171
+ gap: 0.65rem;
172
+ grid-template-columns: repeat(4, minmax(0, 1fr));
173
+ }
174
+
175
+ .nl-model-stat {
176
+ background: #ffffff;
177
+ border: 1px solid var(--nl-line);
178
+ border-radius: 8px;
179
+ padding: 0.75rem;
180
+ }
181
+
182
+ .nl-model-stat span {
183
+ color: var(--nl-muted);
184
+ display: block;
185
+ font-size: 0.72rem;
186
+ font-weight: 800;
187
+ letter-spacing: 0.04em;
188
+ text-transform: uppercase;
189
+ }
190
+
191
+ .nl-model-stat strong {
192
+ color: var(--nl-ink);
193
+ display: block;
194
+ font-size: 1.25rem;
195
+ margin-top: 0.2rem;
196
+ }
197
+
198
+ div[data-testid="stMetric"] {
199
+ background: var(--nl-panel);
200
+ border: 1px solid var(--nl-line);
201
+ border-radius: 8px;
202
+ padding: 0.85rem 1rem;
203
+ }
204
+
205
+ div[data-testid="stMetric"] label {
206
+ color: var(--nl-muted);
207
+ }
208
+
209
+ .stButton > button {
210
+ background: var(--nl-blue);
211
+ border: 1px solid var(--nl-blue);
212
+ color: #ffffff;
213
+ font-weight: 700;
214
+ min-height: 2.6rem;
215
+ width: 100%;
216
+ }
217
+
218
+ .stButton > button:hover {
219
+ background: #1f4dac;
220
+ border-color: #1f4dac;
221
+ color: #ffffff;
222
+ }
223
+
224
+ @media (max-width: 760px) {
225
+ .nl-title {
226
+ font-size: 1.75rem;
227
+ }
228
+
229
+ .nl-empty {
230
+ padding: 1.4rem;
231
+ }
232
+
233
+ .nl-source-heading {
234
+ align-items: flex-start;
235
+ flex-direction: column;
236
+ gap: 0.2rem;
237
+ }
238
+
239
+ .nl-model-grid {
240
+ grid-template-columns: repeat(2, minmax(0, 1fr));
241
+ }
242
+ }
243
+ </style>
244
+ """,
245
+ unsafe_allow_html=True,
246
+ )
247
+
248
+
249
+ def summarize_bias(summary: dict) -> tuple[int, int, float]:
250
+ total = sum(source.get("total", 0) for source in summary.values())
251
+ biased = sum(source.get("Biased", 0) for source in summary.values())
252
+ ratio = biased / total if total else 0
253
+ return total, biased, ratio
254
+
255
+
256
+ def insight_copy(ratio: float) -> str:
257
+ percent = int(round(ratio * 100))
258
+ if ratio >= 0.6:
259
+ return f"<strong>{percent}% biased coverage.</strong> The retrieved articles lean noticeably toward biased framing."
260
+ if ratio <= 0.4:
261
+ return f"<strong>{percent}% biased coverage.</strong> The article set is mostly neutral by the current model."
262
+ return f"<strong>{percent}% biased coverage.</strong> The result set is mixed and worth comparing source by source."
263
+
264
+
265
+ def render_model_panel() -> None:
266
+ st.markdown(
267
+ f"""
268
+ <div class="nl-model-panel">
269
+ <h3>Model Snapshot</h3>
270
+ <div class="nl-model-grid">
271
+ <div class="nl-model-stat">
272
+ <span>Eval Accuracy</span>
273
+ <strong>{MODEL_EVAL["eval_accuracy"]:.1%}</strong>
274
+ </div>
275
+ <div class="nl-model-stat">
276
+ <span>Weighted F1</span>
277
+ <strong>{MODEL_EVAL["eval_f1_weighted"]:.1%}</strong>
278
+ </div>
279
+ <div class="nl-model-stat">
280
+ <span>Eval Loss</span>
281
+ <strong>{MODEL_EVAL["eval_loss"]:.3f}</strong>
282
+ </div>
283
+ <div class="nl-model-stat">
284
+ <span>Epochs</span>
285
+ <strong>{MODEL_EVAL["epochs"]}</strong>
286
+ </div>
287
+ </div>
288
+ </div>
289
+ """,
290
+ unsafe_allow_html=True,
291
+ )
292
+
293
+
294
+ def render_empty_state() -> None:
295
+ st.markdown(
296
+ """
297
+ <div class="nl-empty">
298
+ <h3>Run a topic analysis</h3>
299
+ <p>
300
+ Search a public issue, company, policy, or event to compare retrieved articles by source,
301
+ model label, and confidence. Results will appear as a dashboard with source-level evidence.
302
+ </p>
303
+ </div>
304
+ """,
305
+ unsafe_allow_html=True,
306
+ )
307
+
308
+
309
+ inject_styles()
310
+ inject_article_card_styles()
311
+ client = DirectPipelineClient()
312
+
313
+ if "analysis" not in st.session_state:
314
+ st.session_state.analysis = None
315
+ if "last_ingest" not in st.session_state:
316
+ st.session_state.last_ingest = None
317
+
318
+ with st.sidebar:
319
+ st.title("NewsLens")
320
+ st.caption("News bias analysis dashboard")
321
+
322
+ topic = st.text_input("Topic", value="climate change", max_chars=120)
323
+ top_k = st.slider("Articles to retrieve", min_value=1, max_value=20, value=10)
324
+ page_size = st.slider("Articles to ingest", min_value=5, max_value=50, value=15, step=5)
325
+
326
+ with st.expander("Advanced", expanded=False):
327
+ debug = st.checkbox("Show model internals", value=False)
328
+
329
+ ingest = st.button("Ingest latest articles")
330
+ analyze = st.button("Analyze topic", type="primary")
331
+
332
+ if st.session_state.last_ingest:
333
+ st.success(
334
+ f"Stored {st.session_state.last_ingest['articles_stored']} "
335
+ f"article(s) for {st.session_state.last_ingest['topic']}."
336
+ )
337
+
338
+ st.divider()
339
+ st.caption("Suggested searches")
340
+ sample_topics = ["climate change", "electric vehicles", "AI regulation", "public health"]
341
+ selected_sample = st.selectbox(
342
+ "Sample topics",
343
+ ["Use typed topic"] + sample_topics,
344
+ label_visibility="collapsed",
345
+ )
346
+
347
+ if selected_sample != "Use typed topic":
348
+ topic = selected_sample
349
+
350
+ st.markdown(
351
+ """
352
+ <div class="nl-topbar">
353
+ <div class="nl-kicker">Media Intelligence</div>
354
+ <h1 class="nl-title">NewsLens Bias Analyzer</h1>
355
+ <div class="nl-subtitle">
356
+ Compare how news sources frame a topic using retrieval, source metadata, and a text-bias classifier.
357
+ </div>
358
+ </div>
359
+ """,
360
+ unsafe_allow_html=True,
361
+ )
362
+
363
+ if analyze:
364
+ if not topic.strip():
365
+ st.error("Topic cannot be empty.")
366
+ st.stop()
367
+
368
+ with st.spinner("Analyzing coverage..."):
369
+ try:
370
+ st.session_state.analysis = client.analyze(topic.strip(), top_k)
371
+ except Exception as exc:
372
+ st.error(str(exc))
373
+ st.stop()
374
+
375
+ if ingest:
376
+ if not topic.strip():
377
+ st.error("Topic cannot be empty.")
378
+ st.stop()
379
+
380
+ with st.spinner("Fetching and indexing articles..."):
381
+ try:
382
+ st.session_state.last_ingest = client.ingest(topic.strip(), page_size)
383
+ st.session_state.analysis = client.analyze(topic.strip(), top_k)
384
+ except Exception as exc:
385
+ st.error(str(exc))
386
+ st.stop()
387
+
388
+ data = st.session_state.analysis
389
+
390
+ if data is None:
391
+ render_empty_state()
392
+ st.stop()
393
+
394
+ summary = data.get("summary", {})
395
+ results = data.get("results", [])
396
+ total, biased, bias_ratio = summarize_bias(summary)
397
+ neutral = max(total - biased, 0)
398
+ source_count = len(summary)
399
+
400
+ metric_cols = st.columns(4)
401
+ metric_cols[0].metric("Articles", total)
402
+ metric_cols[1].metric("Sources", source_count)
403
+ metric_cols[2].metric("Biased", biased)
404
+ metric_cols[3].metric("Not biased", neutral)
405
+
406
+ st.markdown(
407
+ f"""<div class="nl-insight">{insight_copy(bias_ratio)}</div>""",
408
+ unsafe_allow_html=True,
409
+ )
410
+
411
+ tab_overview, tab_articles, tab_model = st.tabs(["Overview", "Articles", "Model"])
412
+
413
+ with tab_overview:
414
+ st.markdown('<div class="nl-section-heading">Bias Distribution by Source</div>', unsafe_allow_html=True)
415
+ chart = build_bias_distribution_chart(summary)
416
+ if chart:
417
+ st.plotly_chart(chart, use_container_width=True, config={"displayModeBar": False})
418
+ else:
419
+ st.warning("No chart data available.")
420
+
421
+ st.markdown('<div class="nl-section-heading">Bias by Political Lean</div>', unsafe_allow_html=True)
422
+ st.caption("Are left-leaning or right-leaning sources more biased on this topic?")
423
+ lean_chart = build_lean_bias_chart(results)
424
+ if lean_chart:
425
+ st.plotly_chart(lean_chart, use_container_width=True, config={"displayModeBar": False})
426
+ else:
427
+ st.warning("Not enough source lean data.")
428
+
429
+ with tab_articles:
430
+ st.markdown('<div class="nl-section-heading">Evidence Articles</div>', unsafe_allow_html=True)
431
+
432
+ if not results:
433
+ st.warning("No articles found.")
434
+ else:
435
+ labels = sorted({article.get("text_label", "Unknown") for article in results})
436
+ leans = sorted({article.get("source_bias", "Unknown") for article in results})
437
+
438
+ filter_cols = st.columns([1, 1, 1])
439
+ selected_label = filter_cols[0].selectbox("Classification", ["All"] + labels)
440
+ selected_lean = filter_cols[1].selectbox("Source lean", ["All"] + leans)
441
+ sort_by = filter_cols[2].selectbox(
442
+ "Sort by",
443
+ ["Confidence", "Similarity", "Source"],
444
+ )
445
+
446
+ filtered_results = results
447
+ if selected_label != "All":
448
+ filtered_results = [
449
+ article for article in filtered_results
450
+ if article.get("text_label", "Unknown") == selected_label
451
+ ]
452
+ if selected_lean != "All":
453
+ filtered_results = [
454
+ article for article in filtered_results
455
+ if article.get("source_bias", "Unknown") == selected_lean
456
+ ]
457
+
458
+ if sort_by == "Confidence":
459
+ filtered_results = sorted(
460
+ filtered_results,
461
+ key=lambda article: article.get("confidence", 0),
462
+ reverse=True,
463
+ )
464
+ elif sort_by == "Similarity":
465
+ filtered_results = sorted(
466
+ filtered_results,
467
+ key=lambda article: article.get("similarity_score", 0),
468
+ reverse=True,
469
+ )
470
+ else:
471
+ filtered_results = sorted(
472
+ filtered_results,
473
+ key=lambda article: article.get("source", "Unknown source"),
474
+ )
475
+
476
+ st.caption(f"Showing {len(filtered_results)} of {len(results)} retrieved articles.")
477
+
478
+ if not filtered_results:
479
+ st.warning("No articles match the selected filters.")
480
+ else:
481
+ grouped = defaultdict(list)
482
+ for article in filtered_results:
483
+ grouped[article.get("source", "Unknown source")].append(article)
484
+
485
+ for source, articles in grouped.items():
486
+ source_bias = articles[0].get("source_bias", "Unknown")
487
+ st.markdown(
488
+ f"""
489
+ <div class="nl-source-heading">
490
+ <h3>{escape(str(source))}</h3>
491
+ <div class="nl-source-meta">{escape(str(source_bias))} source bias | {len(articles)} article(s)</div>
492
+ </div>
493
+ """,
494
+ unsafe_allow_html=True,
495
+ )
496
+ for article in articles:
497
+ render_article_card(article, debug=debug)
498
+
499
+ with tab_model:
500
+ render_model_panel()
501
+ st.markdown('<div class="nl-section-heading">Training Run</div>', unsafe_allow_html=True)
502
+ st.write(
503
+ "RoBERTa was fine-tuned for binary text-bias classification with LoRA. "
504
+ "The best supplied run finished at 85.44% evaluation accuracy and 85.46% weighted F1."
505
+ )
506
+ st.dataframe(
507
+ [
508
+ {"Epoch": 1, "Eval loss": 0.3576, "Accuracy": 0.8432, "Weighted F1": 0.8434},
509
+ {"Epoch": 2, "Eval loss": 0.3656, "Accuracy": 0.8512, "Weighted F1": 0.8512},
510
+ {"Epoch": 3, "Eval loss": 0.3933, "Accuracy": 0.8544, "Weighted F1": 0.8546},
511
+ ],
512
+ hide_index=True,
513
+ use_container_width=True,
514
+ )
515
+ st.info(
516
+ "Use these labels as decision support, not ground truth. Bias classification is sensitive "
517
+ "to dataset definitions, article excerpts, and source coverage."
518
+ )
src/ui/components/__init__.py ADDED
File without changes
src/ui/components/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (138 Bytes). View file
 
src/ui/components/__pycache__/article_card.cpython-313.pyc ADDED
Binary file (6.52 kB). View file
 
src/ui/components/__pycache__/charts.cpython-313.pyc ADDED
Binary file (4.32 kB). View file
 
src/ui/components/article_card.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from html import escape
2
+ import streamlit as st
3
+
4
+ ARTICLE_CARD_STYLES = """
5
+ <style>
6
+ .nl-article-card {
7
+ background: #ffffff;
8
+ border: 1px solid #d8dee9;
9
+ border-radius: 8px;
10
+ padding: 1rem;
11
+ margin: 0.65rem 0 0.9rem 0;
12
+ }
13
+
14
+ .nl-article-header {
15
+ display: flex;
16
+ align-items: flex-start;
17
+ justify-content: space-between;
18
+ gap: 1rem;
19
+ }
20
+
21
+ .nl-article-source {
22
+ color: #64748b;
23
+ font-size: 0.78rem;
24
+ font-weight: 700;
25
+ letter-spacing: 0.04em;
26
+ margin-bottom: 0.25rem;
27
+ text-transform: uppercase;
28
+ }
29
+
30
+ .nl-article-card h4 {
31
+ color: #15202b;
32
+ font-size: 1rem;
33
+ line-height: 1.35;
34
+ margin: 0;
35
+ }
36
+
37
+ .nl-article-card p {
38
+ color: #475569;
39
+ line-height: 1.55;
40
+ margin: 0.65rem 0 0.8rem 0;
41
+ }
42
+
43
+ .nl-label {
44
+ border: 1px solid;
45
+ border-radius: 999px;
46
+ font-size: 0.75rem;
47
+ font-weight: 800;
48
+ padding: 0.25rem 0.55rem;
49
+ white-space: nowrap;
50
+ }
51
+
52
+ .nl-confidence-row {
53
+ color: #64748b;
54
+ display: flex;
55
+ justify-content: space-between;
56
+ font-size: 0.82rem;
57
+ margin-bottom: 0.3rem;
58
+ }
59
+
60
+ .nl-confidence-row strong {
61
+ color: #15202b;
62
+ }
63
+
64
+ .nl-confidence-track {
65
+ background: #eef2f7;
66
+ border-radius: 999px;
67
+ height: 0.45rem;
68
+ overflow: hidden;
69
+ width: 100%;
70
+ }
71
+
72
+ .nl-confidence-track div {
73
+ height: 100%;
74
+ }
75
+
76
+ .nl-read-link {
77
+ color: #2457c5;
78
+ display: inline-block;
79
+ font-weight: 800;
80
+ margin-top: 0.75rem;
81
+ text-decoration: none;
82
+ }
83
+
84
+ .nl-read-link:hover {
85
+ color: #1f4dac;
86
+ text-decoration: underline;
87
+ }
88
+
89
+ @media (max-width: 760px) {
90
+ .nl-article-header {
91
+ flex-direction: column;
92
+ gap: 0.5rem;
93
+ }
94
+ }
95
+ </style>
96
+ """
97
+
98
+
99
+ def inject_article_card_styles() -> None:
100
+ st.markdown(ARTICLE_CARD_STYLES, unsafe_allow_html=True)
101
+
102
+
103
+ def _safe_text(value: object, fallback: str = "") -> str:
104
+ if value is None:
105
+ return fallback
106
+ text = str(value).strip()
107
+ return text or fallback
108
+
109
+
110
+ def _label_style(label: str) -> tuple[str, str]:
111
+ if label.lower() == "biased":
112
+ return "#c24138", "#fff4f2"
113
+ return "#247857", "#effaf5"
114
+
115
+ def smart_truncate(text, limit=80):
116
+ if len(text) <= limit:
117
+ return text
118
+ return text[:limit].rsplit(" ", 1)[0] + "..."
119
+
120
+ def render_article_card(article: dict, debug: bool = False) -> None:
121
+ label = _safe_text(article.get("text_label"), "Unknown")
122
+ confidence = float(article.get("confidence", 0) or 0)
123
+ source = _safe_text(article.get("source"), "Unknown source")
124
+ source_bias = _safe_text(article.get("source_bias"), "Unknown bias")
125
+ source_bias_provenance = _safe_text(article.get("source_bias_provenance"))
126
+ source_meta = f"{source} / {source_bias}"
127
+ if source_bias_provenance and source_bias_provenance != "manual_demo":
128
+ source_meta = f"{source_meta} / {source_bias_provenance}"
129
+ url = _safe_text(article.get("url"), "#")
130
+
131
+ description = _safe_text(article.get("description"))
132
+ fallback_text = _safe_text(article.get("text"))[:280]
133
+ excerpt = description or fallback_text or "No article excerpt was returned by the API."
134
+ title = _safe_text(article.get("title")) or smart_truncate(excerpt, 80)
135
+
136
+ accent, soft = _label_style(label)
137
+ confidence_pct = max(0, min(confidence, 1)) * 100
138
+
139
+ st.markdown(
140
+ f"""
141
+ <div class="nl-article-card">
142
+ <div class="nl-article-header">
143
+ <div>
144
+ <div class="nl-article-source">{escape(source_meta)}</div>
145
+ <h4>{escape(title)}</h4>
146
+ </div>
147
+ <span class="nl-label" style="color:{accent}; background:{soft}; border-color:{accent};">
148
+ {escape(label)}
149
+ </span>
150
+ </div>
151
+ <p>{escape(excerpt)}</p>
152
+ <div class="nl-confidence-row">
153
+ <span>Confidence</span>
154
+ <strong>{confidence:.2f}</strong>
155
+ </div>
156
+ <div class="nl-confidence-track">
157
+ <div style="width:{confidence_pct:.0f}%; background:{accent};"></div>
158
+ </div>
159
+ <a class="nl-read-link" href="{escape(url)}" target="_blank" rel="noopener noreferrer">
160
+ Read article
161
+ </a>
162
+ </div>
163
+ """,
164
+ unsafe_allow_html=True,
165
+ )
166
+
167
+ if debug:
168
+ with st.expander("Model internals", expanded=False):
169
+ if "similarity_score" in article:
170
+ st.caption(f"Similarity score: {article['similarity_score']:.4f}")
171
+ if "probabilities" in article:
172
+ st.json(article["probabilities"])
src/ui/components/charts.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import plotly.express as px
3
+
4
+
5
+ def build_bias_distribution_chart(summary: dict):
6
+ rows = []
7
+
8
+ for source, stats in summary.items():
9
+ biased = stats.get("Biased", 0)
10
+ not_biased = stats.get("Not Biased", stats.get("Not_Biased", 0))
11
+ total = stats.get("total", biased + not_biased)
12
+
13
+ rows.append(
14
+ {
15
+ "Source": source,
16
+ "Biased": biased,
17
+ "Not biased": not_biased,
18
+ "Total": total,
19
+ }
20
+ )
21
+
22
+ df = pd.DataFrame(rows)
23
+
24
+ if df.empty:
25
+ return None
26
+
27
+ df = df.sort_values("Total", ascending=False)
28
+ df_melted = df.melt(
29
+ id_vars=["Source", "Total"],
30
+ value_vars=["Biased", "Not biased"],
31
+ var_name="Classification",
32
+ value_name="Articles",
33
+ )
34
+
35
+ fig = px.bar(
36
+ df_melted,
37
+ x="Source",
38
+ y="Articles",
39
+ color="Classification",
40
+ barmode="group",
41
+ text="Articles",
42
+ color_discrete_map={
43
+ "Biased": "#c24138",
44
+ "Not biased": "#247857",
45
+ },
46
+ )
47
+
48
+ fig.update_traces(
49
+ textposition="outside",
50
+ marker_line_width=0,
51
+ cliponaxis=False,
52
+ )
53
+ fig.update_layout(
54
+ height=430,
55
+ margin=dict(l=12, r=12, t=24, b=12),
56
+ paper_bgcolor="rgba(0,0,0,0)",
57
+ plot_bgcolor="rgba(0,0,0,0)",
58
+ bargap=0.26,
59
+ legend=dict(
60
+ orientation="h",
61
+ yanchor="bottom",
62
+ y=1.02,
63
+ xanchor="right",
64
+ x=1,
65
+ title=None,
66
+ ),
67
+ xaxis=dict(
68
+ title=None,
69
+ tickangle=-20,
70
+ showgrid=False,
71
+ linecolor="#d8dee9",
72
+ ),
73
+ yaxis=dict(
74
+ title="Articles",
75
+ gridcolor="#e8edf4",
76
+ zeroline=False,
77
+ ),
78
+ font=dict(color="#15202b", family="Arial, sans-serif"),
79
+ )
80
+
81
+ return fig
82
+
83
+ def build_lean_bias_chart(results: list) -> object:
84
+ from collections import defaultdict
85
+
86
+ lean_counts = defaultdict(lambda: {"Biased": 0, "Not biased": 0})
87
+
88
+ for article in results:
89
+ lean = article.get("source_bias", "Unknown")
90
+ label = article.get("text_label", "Unknown")
91
+ if label == "Biased":
92
+ lean_counts[lean]["Biased"] += 1
93
+ elif label == "Not Biased":
94
+ lean_counts[lean]["Not biased"] += 1
95
+
96
+ rows = []
97
+ for lean, counts in lean_counts.items():
98
+ rows.append({
99
+ "Lean": lean,
100
+ "Biased": counts["Biased"],
101
+ "Not biased": counts["Not biased"],
102
+ })
103
+
104
+ df = pd.DataFrame(rows)
105
+ if df.empty:
106
+ return None
107
+
108
+ lean_order = ["Left", "Center-Left", "Center", "Center-Right", "Right", "Unknown"]
109
+ df["Lean"] = pd.Categorical(df["Lean"], categories=lean_order, ordered=True)
110
+ df = df.sort_values("Lean")
111
+
112
+ df_melted = df.melt(
113
+ id_vars="Lean",
114
+ value_vars=["Biased", "Not biased"],
115
+ var_name="Classification",
116
+ value_name="Articles",
117
+ )
118
+
119
+ fig = px.bar(
120
+ df_melted,
121
+ x="Lean",
122
+ y="Articles",
123
+ color="Classification",
124
+ barmode="group",
125
+ text="Articles",
126
+ color_discrete_map={"Biased": "#c24138", "Not biased": "#247857"},
127
+ )
128
+
129
+ fig.update_traces(textposition="outside", marker_line_width=0, cliponaxis=False)
130
+ fig.update_layout(
131
+ height=380,
132
+ margin=dict(l=12, r=12, t=24, b=12),
133
+ paper_bgcolor="rgba(0,0,0,0)",
134
+ plot_bgcolor="rgba(0,0,0,0)",
135
+ bargap=0.3,
136
+ legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1, title=None),
137
+ xaxis=dict(title=None, showgrid=False, linecolor="#d8dee9"),
138
+ yaxis=dict(title="Articles", gridcolor="#e8edf4", zeroline=False),
139
+ font=dict(color="#15202b", family="Arial, sans-serif"),
140
+ )
141
+
142
+ return fig
src/ui/services/__init__.py ADDED
File without changes
src/ui/services/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (136 Bytes). View file
 
src/ui/services/__pycache__/api_client.cpython-313.pyc ADDED
Binary file (3.41 kB). View file
 
src/ui/services/api_client.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from src.config import API_BASE_URL
3
+
4
+ class NewsLensClient:
5
+ def __init__(self, base_url: str = API_BASE_URL):
6
+ self.base_url = base_url
7
+
8
+ def analyze(self, topic: str, top_k: int = 10) -> dict:
9
+ url = f"{self.base_url}/analyze"
10
+ payload = {
11
+ "topic": topic,
12
+ "top_k": top_k
13
+ }
14
+
15
+ try:
16
+ response = requests.post(url, json=payload, timeout=30)
17
+ response.raise_for_status()
18
+ return response.json()
19
+ except requests.exceptions.RequestException as e:
20
+ raise RuntimeError(f"API request failed: {str(e)}")
21
+
22
+ def ingest(self, topic: str, page_size: int = 10) -> dict:
23
+ url = f"{self.base_url}/ingest"
24
+ payload = {
25
+ "topic": topic,
26
+ "page_size": page_size,
27
+ }
28
+
29
+ try:
30
+ response = requests.post(url, json=payload, timeout=45)
31
+ response.raise_for_status()
32
+ return response.json()
33
+ except requests.exceptions.RequestException as e:
34
+ raise RuntimeError(f"API request failed: {str(e)}")
35
+
36
+ class DirectPipelineClient:
37
+ def __init__(self):
38
+ from src.analysis.rag_pipeline import NewsAnalysisPipeline
39
+ self.pipeline = NewsAnalysisPipeline()
40
+
41
+ def analyze(self, topic: str, top_k: int = 10) -> dict:
42
+ return self.pipeline.analyze(topic, top_k)
43
+
44
+ def ingest(self, topic: str, page_size: int = 10) -> dict:
45
+ from src.ingestion.newsapi_client import fetch_news
46
+ articles = fetch_news(topic=topic, page_size=page_size)
47
+ self.pipeline.vector_store.store_articles(articles)
48
+ return {
49
+ "topic": topic,
50
+ "articles_fetched": len(articles),
51
+ "articles_stored": len(articles),
52
+ "status": "success"
53
+ }