Spaces:
Sleeping
Sleeping
Commit ·
208266a
1
Parent(s): 9bb094d
Add NewsLens Streamlit app
Browse files- Dockerfile +3 -9
- requirements.txt +16 -2
- src/__init__.py +0 -0
- src/__pycache__/__init__.cpython-313.pyc +0 -0
- src/__pycache__/config.cpython-313.pyc +0 -0
- src/analysis/__init__.py +0 -0
- src/analysis/__pycache__/__init__.cpython-313.pyc +0 -0
- src/analysis/__pycache__/rag_pipeline.cpython-313.pyc +0 -0
- src/analysis/__pycache__/source_bias.cpython-313.pyc +0 -0
- src/analysis/rag_pipeline.py +74 -0
- src/analysis/source_bias.py +47 -0
- src/api/__init__.py +0 -0
- src/api/__pycache__/__init__.cpython-313.pyc +0 -0
- src/api/__pycache__/main.cpython-313.pyc +0 -0
- src/api/__pycache__/models.cpython-313.pyc +0 -0
- src/api/__pycache__/routes.cpython-313.pyc +0 -0
- src/api/main.py +29 -0
- src/api/models.py +45 -0
- src/api/routes.py +68 -0
- src/config.py +35 -0
- src/data/source_bias.generated.json +1136 -0
- src/data/source_bias.json +202 -0
- src/db/__init__.py +0 -0
- src/db/__pycache__/__init__.cpython-313.pyc +0 -0
- src/db/__pycache__/vector_store.cpython-313.pyc +0 -0
- src/db/vector_store.py +120 -0
- src/ingestion/__init__.py +0 -0
- src/ingestion/__pycache__/__init__.cpython-313.pyc +0 -0
- src/ingestion/__pycache__/newsapi_client.cpython-313.pyc +0 -0
- src/ingestion/newsapi_client.py +49 -0
- src/models/__pycache__/dataset_prep.cpython-313.pyc +0 -0
- src/models/__pycache__/test_inference.cpython-313.pyc +0 -0
- src/models/__pycache__/train_model.cpython-313.pyc +0 -0
- src/models/dataset_prep.py +19 -0
- src/models/test_inference.py +105 -0
- src/models/train_model.py +114 -0
- src/ui/__init__.py +0 -0
- src/ui/__pycache__/__init__.cpython-313.pyc +0 -0
- src/ui/__pycache__/app.cpython-313.pyc +0 -0
- src/ui/app.py +518 -0
- src/ui/components/__init__.py +0 -0
- src/ui/components/__pycache__/__init__.cpython-313.pyc +0 -0
- src/ui/components/__pycache__/article_card.cpython-313.pyc +0 -0
- src/ui/components/__pycache__/charts.cpython-313.pyc +0 -0
- src/ui/components/article_card.py +172 -0
- src/ui/components/charts.py +142 -0
- src/ui/services/__init__.py +0 -0
- src/ui/services/__pycache__/__init__.cpython-313.pyc +0 -0
- src/ui/services/__pycache__/api_client.cpython-313.pyc +0 -0
- src/ui/services/api_client.py +53 -0
Dockerfile
CHANGED
|
@@ -1,20 +1,14 @@
|
|
| 1 |
-
FROM python:3.13
|
| 2 |
-
|
| 3 |
WORKDIR /app
|
| 4 |
-
|
| 5 |
RUN apt-get update && apt-get install -y \
|
| 6 |
build-essential \
|
| 7 |
curl \
|
| 8 |
git \
|
| 9 |
&& rm -rf /var/lib/apt/lists/*
|
| 10 |
-
|
| 11 |
COPY requirements.txt ./
|
| 12 |
COPY src/ ./src/
|
| 13 |
-
|
| 14 |
RUN pip3 install -r requirements.txt
|
| 15 |
-
|
| 16 |
EXPOSE 8501
|
| 17 |
-
|
| 18 |
HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
|
| 19 |
-
|
| 20 |
-
ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
|
|
|
|
| 1 |
+
FROM python:3.13-slim
|
|
|
|
| 2 |
WORKDIR /app
|
|
|
|
| 3 |
RUN apt-get update && apt-get install -y \
|
| 4 |
build-essential \
|
| 5 |
curl \
|
| 6 |
git \
|
| 7 |
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
| 8 |
COPY requirements.txt ./
|
| 9 |
COPY src/ ./src/
|
|
|
|
| 10 |
RUN pip3 install -r requirements.txt
|
|
|
|
| 11 |
EXPOSE 8501
|
| 12 |
+
ENV PYTHONPATH=/app
|
| 13 |
HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
|
| 14 |
+
ENTRYPOINT ["streamlit", "run", "src/ui/app.py", "--server.port=8501", "--server.address=0.0.0.0"]
|
|
|
requirements.txt
CHANGED
|
@@ -1,3 +1,17 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
pandas
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
accelerate
|
| 2 |
+
chromadb
|
| 3 |
+
datasets
|
| 4 |
+
fastapi
|
| 5 |
+
newsapi-python
|
| 6 |
+
numpy
|
| 7 |
pandas
|
| 8 |
+
peft
|
| 9 |
+
plotly
|
| 10 |
+
python-dotenv
|
| 11 |
+
requests
|
| 12 |
+
scikit-learn
|
| 13 |
+
sentence-transformers
|
| 14 |
+
streamlit
|
| 15 |
+
torch
|
| 16 |
+
transformers
|
| 17 |
+
uvicorn
|
src/__init__.py
ADDED
|
File without changes
|
src/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (124 Bytes). View file
|
|
|
src/__pycache__/config.cpython-313.pyc
ADDED
|
Binary file (2.22 kB). View file
|
|
|
src/analysis/__init__.py
ADDED
|
File without changes
|
src/analysis/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (133 Bytes). View file
|
|
|
src/analysis/__pycache__/rag_pipeline.cpython-313.pyc
ADDED
|
Binary file (3.97 kB). View file
|
|
|
src/analysis/__pycache__/source_bias.cpython-313.pyc
ADDED
|
Binary file (2.29 kB). View file
|
|
|
src/analysis/rag_pipeline.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from src.db.vector_store import NewsVectorStore
|
| 2 |
+
from src.models.test_inference import BiasPredictor
|
| 3 |
+
from src.analysis.source_bias import get_source_bias, get_source_record
|
| 4 |
+
from collections import defaultdict
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class NewsAnalysisPipeline:
|
| 8 |
+
|
| 9 |
+
def __init__(self):
|
| 10 |
+
print("Initializing NewsLens pipeline...")
|
| 11 |
+
self.vector_store = NewsVectorStore()
|
| 12 |
+
self.bias_predictor = BiasPredictor()
|
| 13 |
+
print("Pipeline ready.")
|
| 14 |
+
|
| 15 |
+
def analyze(self, topic: str, top_k: int = 10) -> dict:
|
| 16 |
+
articles = self.vector_store.query(topic, top_k=top_k)
|
| 17 |
+
|
| 18 |
+
if not articles:
|
| 19 |
+
return {"topic": topic, "results": [], "summary": {}}
|
| 20 |
+
|
| 21 |
+
results = []
|
| 22 |
+
texts = [article["text"] for article in articles]
|
| 23 |
+
predictions = self.bias_predictor.predict_batch(texts)
|
| 24 |
+
|
| 25 |
+
for article, prediction in zip(articles, predictions):
|
| 26 |
+
source_record = get_source_record(article["source"])
|
| 27 |
+
results.append({
|
| 28 |
+
"source": article["source"],
|
| 29 |
+
"source_bias": source_record["bias"],
|
| 30 |
+
"source_bias_provenance": source_record["provenance"],
|
| 31 |
+
"url": article["url"],
|
| 32 |
+
"title": article.get("title", ""),
|
| 33 |
+
"description": article.get("description", ""),
|
| 34 |
+
"publishedAt": article.get("publishedAt", ""),
|
| 35 |
+
"text": article["text"],
|
| 36 |
+
"text_label": prediction["label"],
|
| 37 |
+
"confidence": prediction["confidence"],
|
| 38 |
+
"probabilities": {
|
| 39 |
+
"Not Biased": round(prediction["probabilities"][0], 4),
|
| 40 |
+
"Biased": round(prediction["probabilities"][1], 4),
|
| 41 |
+
},
|
| 42 |
+
"similarity_score": article["similarity_score"]
|
| 43 |
+
})
|
| 44 |
+
# Aggregate per source
|
| 45 |
+
summary = defaultdict(lambda: {
|
| 46 |
+
"source_bias": "Unknown",
|
| 47 |
+
"Biased": 0,
|
| 48 |
+
"Not Biased": 0,
|
| 49 |
+
"total": 0
|
| 50 |
+
})
|
| 51 |
+
for r in results:
|
| 52 |
+
source = r["source"]
|
| 53 |
+
summary[source]["source_bias"] = r["source_bias"]
|
| 54 |
+
summary[source][r["text_label"]] += 1
|
| 55 |
+
summary[source]["total"] += 1
|
| 56 |
+
|
| 57 |
+
return {
|
| 58 |
+
"topic": topic,
|
| 59 |
+
"results": results,
|
| 60 |
+
"summary": dict(summary)
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
if __name__ == "__main__":
|
| 65 |
+
pipeline = NewsAnalysisPipeline()
|
| 66 |
+
output = pipeline.analyze("climate change", top_k=10)
|
| 67 |
+
|
| 68 |
+
print(f"\n=== Results for: '{output['topic']}' ===")
|
| 69 |
+
for r in output["results"]:
|
| 70 |
+
print(f"[{r['text_label']}] ({r['confidence']:.2f}) | Source lean: {r['source_bias']} — {r['source']}: {r['text'][:80]}...")
|
| 71 |
+
|
| 72 |
+
print("\n=== Source Summary ===")
|
| 73 |
+
for source, counts in output["summary"].items():
|
| 74 |
+
print(f"{source} ({counts['source_bias']}): Biased={counts['Biased']}, Not Biased={counts['Not Biased']}, Total={counts['total']}")
|
src/analysis/source_bias.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
from functools import lru_cache
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from typing import Any
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
REGISTRY_PATH = Path(__file__).resolve().parents[1] / "data" / "source_bias.json"
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
@lru_cache(maxsize=1)
|
| 11 |
+
def load_source_registry() -> dict[str, Any]:
|
| 12 |
+
with REGISTRY_PATH.open("r", encoding="utf-8") as f:
|
| 13 |
+
return json.load(f)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def normalize_source_name(source: str) -> str:
|
| 17 |
+
return " ".join((source or "").strip().lower().split())
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def get_source_record(source: str) -> dict[str, Any]:
|
| 21 |
+
registry = load_source_registry()
|
| 22 |
+
sources = registry.get("sources", {})
|
| 23 |
+
aliases = registry.get("aliases", {})
|
| 24 |
+
|
| 25 |
+
normalized = normalize_source_name(source)
|
| 26 |
+
canonical = aliases.get(normalized, source)
|
| 27 |
+
record = sources.get(canonical)
|
| 28 |
+
|
| 29 |
+
if record is None:
|
| 30 |
+
return {
|
| 31 |
+
"name": source or "Unknown",
|
| 32 |
+
"bias": "Unknown",
|
| 33 |
+
"provenance": "unmatched",
|
| 34 |
+
"source_url": None,
|
| 35 |
+
"article_count": None,
|
| 36 |
+
"label_counts": None,
|
| 37 |
+
"notes": "No source-level registry match found.",
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
return {
|
| 41 |
+
"name": canonical,
|
| 42 |
+
**record,
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def get_source_bias(source: str) -> str:
|
| 47 |
+
return str(get_source_record(source).get("bias", "Unknown"))
|
src/api/__init__.py
ADDED
|
File without changes
|
src/api/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (128 Bytes). View file
|
|
|
src/api/__pycache__/main.cpython-313.pyc
ADDED
|
Binary file (1.2 kB). View file
|
|
|
src/api/__pycache__/models.cpython-313.pyc
ADDED
|
Binary file (2.91 kB). View file
|
|
|
src/api/__pycache__/routes.cpython-313.pyc
ADDED
|
Binary file (3.71 kB). View file
|
|
|
src/api/main.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from contextlib import asynccontextmanager
|
| 2 |
+
from fastapi import FastAPI
|
| 3 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 4 |
+
from src.analysis.rag_pipeline import NewsAnalysisPipeline
|
| 5 |
+
from src.api import routes
|
| 6 |
+
|
| 7 |
+
@asynccontextmanager
|
| 8 |
+
async def lifespan(app: FastAPI):
|
| 9 |
+
print("Loading pipeline at startup...")
|
| 10 |
+
app.state.pipeline = NewsAnalysisPipeline()
|
| 11 |
+
print("Pipeline ready.")
|
| 12 |
+
yield
|
| 13 |
+
print("Shutting down.")
|
| 14 |
+
|
| 15 |
+
app = FastAPI(
|
| 16 |
+
title="NewsLens API",
|
| 17 |
+
description="Bias analysis for news articles",
|
| 18 |
+
version="1.0.0",
|
| 19 |
+
lifespan=lifespan
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
app.add_middleware(
|
| 23 |
+
CORSMiddleware,
|
| 24 |
+
allow_origins=["*"],
|
| 25 |
+
allow_methods=["*"],
|
| 26 |
+
allow_headers=["*"],
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
app.include_router(routes.router)
|
src/api/models.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel, Field
|
| 2 |
+
from typing import Optional
|
| 3 |
+
|
| 4 |
+
class AnalyzeRequest(BaseModel):
|
| 5 |
+
topic: str = Field(..., min_length=1, max_length=200)
|
| 6 |
+
top_k: int = Field(default=10, ge=1, le=20)
|
| 7 |
+
|
| 8 |
+
class ArticleResult(BaseModel):
|
| 9 |
+
source: str
|
| 10 |
+
source_bias: str
|
| 11 |
+
source_bias_provenance: Optional[str] = None
|
| 12 |
+
url: str
|
| 13 |
+
title: Optional[str] = None
|
| 14 |
+
description: Optional[str] = None
|
| 15 |
+
publishedAt: Optional[str] = None
|
| 16 |
+
text: str
|
| 17 |
+
text_label: str
|
| 18 |
+
confidence: float
|
| 19 |
+
similarity_score: float
|
| 20 |
+
probabilities: dict
|
| 21 |
+
|
| 22 |
+
class SourceSummary(BaseModel):
|
| 23 |
+
source_bias: str
|
| 24 |
+
Biased: int
|
| 25 |
+
Not_Biased: int = Field(alias="Not Biased")
|
| 26 |
+
total: int
|
| 27 |
+
|
| 28 |
+
class Config:
|
| 29 |
+
populate_by_name = True
|
| 30 |
+
|
| 31 |
+
class AnalyzeResponse(BaseModel):
|
| 32 |
+
topic: str
|
| 33 |
+
total_articles: int
|
| 34 |
+
results: list[ArticleResult]
|
| 35 |
+
summary: dict[str, SourceSummary]
|
| 36 |
+
|
| 37 |
+
class IngestRequest(BaseModel):
|
| 38 |
+
topic: str = Field(..., min_length=1, max_length=200)
|
| 39 |
+
page_size: int = Field(default=10, ge=1, le=50)
|
| 40 |
+
|
| 41 |
+
class IngestResponse(BaseModel):
|
| 42 |
+
topic: str
|
| 43 |
+
articles_fetched: int
|
| 44 |
+
articles_stored: int
|
| 45 |
+
status: str
|
src/api/routes.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, HTTPException, Request
|
| 2 |
+
import time
|
| 3 |
+
from src.ingestion.newsapi_client import fetch_news
|
| 4 |
+
from src.api.models import AnalyzeRequest, AnalyzeResponse, IngestRequest, IngestResponse
|
| 5 |
+
|
| 6 |
+
router = APIRouter()
|
| 7 |
+
|
| 8 |
+
_cache: dict = {}
|
| 9 |
+
CACHE_TTL_SECONDS = 300
|
| 10 |
+
|
| 11 |
+
@router.get("/health")
|
| 12 |
+
def health():
|
| 13 |
+
return {"status": "ok"}
|
| 14 |
+
|
| 15 |
+
@router.post("/analyze", response_model=AnalyzeResponse)
|
| 16 |
+
def analyze(request: Request, payload: AnalyzeRequest):
|
| 17 |
+
if not payload.topic.strip():
|
| 18 |
+
raise HTTPException(status_code=400, detail="Topic cannot be empty.")
|
| 19 |
+
|
| 20 |
+
cache_key = (payload.topic.lower().strip(), payload.top_k)
|
| 21 |
+
now = time.time()
|
| 22 |
+
|
| 23 |
+
if cache_key in _cache:
|
| 24 |
+
cached = _cache[cache_key]
|
| 25 |
+
if now - cached["timestamp"] < CACHE_TTL_SECONDS:
|
| 26 |
+
print(f"Cache hit for: {payload.topic}")
|
| 27 |
+
return cached["data"]
|
| 28 |
+
|
| 29 |
+
pipeline = request.app.state.pipeline
|
| 30 |
+
if pipeline is None:
|
| 31 |
+
raise HTTPException(status_code=503, detail="Pipeline not initialized.")
|
| 32 |
+
|
| 33 |
+
try:
|
| 34 |
+
raw = pipeline.analyze(payload.topic, top_k=payload.top_k)
|
| 35 |
+
except Exception as e:
|
| 36 |
+
raise HTTPException(status_code=500, detail=f"Pipeline error: {str(e)}")
|
| 37 |
+
|
| 38 |
+
sorted_results = sorted(raw["results"], key=lambda x: x["confidence"], reverse=True)
|
| 39 |
+
response = AnalyzeResponse(
|
| 40 |
+
topic=raw["topic"],
|
| 41 |
+
total_articles=len(sorted_results),
|
| 42 |
+
results=sorted_results,
|
| 43 |
+
summary=raw["summary"]
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
_cache[cache_key] = {"data": response, "timestamp": now}
|
| 47 |
+
return response
|
| 48 |
+
|
| 49 |
+
@router.post("/ingest", response_model=IngestResponse)
|
| 50 |
+
def ingest(request: Request, payload: IngestRequest):
|
| 51 |
+
try:
|
| 52 |
+
articles = fetch_news(topic=payload.topic, page_size=payload.page_size)
|
| 53 |
+
except RuntimeError as exc:
|
| 54 |
+
raise HTTPException(status_code=503, detail=str(exc))
|
| 55 |
+
|
| 56 |
+
if not articles:
|
| 57 |
+
raise HTTPException(status_code=404, detail=f"No articles found for topic: {payload.topic}")
|
| 58 |
+
|
| 59 |
+
vector_store = request.app.state.pipeline.vector_store
|
| 60 |
+
vector_store.store_articles(articles)
|
| 61 |
+
_cache.clear()
|
| 62 |
+
|
| 63 |
+
return IngestResponse(
|
| 64 |
+
topic=payload.topic,
|
| 65 |
+
articles_fetched=len(articles),
|
| 66 |
+
articles_stored=len(articles),
|
| 67 |
+
status="success"
|
| 68 |
+
)
|
src/config.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
|
| 4 |
+
try:
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
except ImportError:
|
| 7 |
+
load_dotenv = None
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
if load_dotenv is not None:
|
| 11 |
+
load_dotenv()
|
| 12 |
+
else:
|
| 13 |
+
env_path = Path.cwd() / ".env"
|
| 14 |
+
if env_path.exists():
|
| 15 |
+
for line in env_path.read_text(encoding="utf-8").splitlines():
|
| 16 |
+
line = line.strip()
|
| 17 |
+
if not line or line.startswith("#") or "=" not in line:
|
| 18 |
+
continue
|
| 19 |
+
key, value = line.split("=", 1)
|
| 20 |
+
os.environ.setdefault(key.strip(), value.strip().strip('"').strip("'"))
|
| 21 |
+
|
| 22 |
+
_bias_model_env = os.getenv("NEWSLENS_BIAS_MODEL_PATH")
|
| 23 |
+
|
| 24 |
+
BASE_DIR = Path(__file__).resolve().parents[1]
|
| 25 |
+
DATA_DIR = Path(os.getenv("NEWSLENS_DATA_DIR", BASE_DIR / "data"))
|
| 26 |
+
CHROMA_DB_PATH = Path(os.getenv("NEWSLENS_CHROMA_DB_PATH", DATA_DIR / "chromadb"))
|
| 27 |
+
MODEL_DIR = Path(os.getenv("NEWSLENS_MODEL_DIR", DATA_DIR / "models"))
|
| 28 |
+
if _bias_model_env:
|
| 29 |
+
BIAS_MODEL_PATH = _bias_model_env
|
| 30 |
+
else:
|
| 31 |
+
BIAS_MODEL_PATH = Path(MODEL_DIR / "bias_lora_20260503_010859")
|
| 32 |
+
HF_ENDPOINT = os.getenv("NEWSLENS_HF_ENDPOINT")
|
| 33 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 34 |
+
NEWS_API_KEY = os.getenv("NEWSAPI_KEY")
|
| 35 |
+
API_BASE_URL = os.getenv("NEWSLENS_API_BASE_URL", "http://localhost:8000")
|
src/data/source_bias.generated.json
ADDED
|
@@ -0,0 +1,1136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"aliases": {
|
| 3 |
+
"al jazeera": "Al Jazeera",
|
| 4 |
+
"allysia finley (wall street journal)": "Allysia Finley (Wall Street Journal)",
|
| 5 |
+
"ann coulter": "Ann Coulter",
|
| 6 |
+
"ben shapiro": "Ben Shapiro",
|
| 7 |
+
"brent bozell": "Brent Bozell",
|
| 8 |
+
"business insider": "Business Insider",
|
| 9 |
+
"buzzfeed news": "BuzzFeed News",
|
| 10 |
+
"cbn": "CBN",
|
| 11 |
+
"cbs news": "CBS News",
|
| 12 |
+
"charles krauthammer": "Charles Krauthammer",
|
| 13 |
+
"chicago sun-times": "Chicago Sun-Times",
|
| 14 |
+
"christian science monitor": "Christian Science Monitor",
|
| 15 |
+
"cnn (web news)": "CNN (Web News)",
|
| 16 |
+
"cnn - editorial": "CNN - Editorial",
|
| 17 |
+
"daily beast": "Daily Beast",
|
| 18 |
+
"daily kos": "Daily Kos",
|
| 19 |
+
"daily mail": "Daily Mail",
|
| 20 |
+
"damon linker": "Damon Linker",
|
| 21 |
+
"democracy now": "Democracy Now",
|
| 22 |
+
"elizabeth warren": "Elizabeth Warren",
|
| 23 |
+
"ezra klein": "Ezra Klein",
|
| 24 |
+
"fox news": "Fox News",
|
| 25 |
+
"fox news (online)": "Fox News (Online)",
|
| 26 |
+
"fox news opinion": "Fox News Opinion",
|
| 27 |
+
"fox online news": "Fox Online News",
|
| 28 |
+
"george will": "George Will",
|
| 29 |
+
"guest writer": "Guest Writer",
|
| 30 |
+
"guest writer - center": "Guest Writer - Center",
|
| 31 |
+
"guest writer - left": "Guest Writer - Left",
|
| 32 |
+
"guest writer - right": "Guest Writer - Right",
|
| 33 |
+
"hotair": "HotAir",
|
| 34 |
+
"howard kurtz": "Howard Kurtz",
|
| 35 |
+
"international business times": "International Business Times",
|
| 36 |
+
"jacobin": "Jacobin",
|
| 37 |
+
"john fund": "John Fund",
|
| 38 |
+
"john stossel": "John Stossel",
|
| 39 |
+
"jon terbush": "Jon Terbush",
|
| 40 |
+
"jonah goldberg": "Jonah Goldberg",
|
| 41 |
+
"juan williams": "Juan Williams",
|
| 42 |
+
"julian zelizer": "Julian Zelizer",
|
| 43 |
+
"marketwatch": "MarketWatch",
|
| 44 |
+
"media matters": "Media Matters",
|
| 45 |
+
"media research center": "Media Research Center",
|
| 46 |
+
"michael barone": "Michael Barone",
|
| 47 |
+
"michael brendan dougherty": "Michael Brendan Dougherty",
|
| 48 |
+
"michael goodwin": "Michael Goodwin",
|
| 49 |
+
"michelle malkin": "Michelle Malkin",
|
| 50 |
+
"mother jones": "Mother Jones",
|
| 51 |
+
"national review": "National Review",
|
| 52 |
+
"nbc news (online)": "NBC News (Online)",
|
| 53 |
+
"nbcnews.com": "NBCNews.com",
|
| 54 |
+
"new york post": "New York Post",
|
| 55 |
+
"new york post (news)": "New York Post (News)",
|
| 56 |
+
"new york post (opinion)": "New York Post (Opinion)",
|
| 57 |
+
"newsbusters": "NewsBusters",
|
| 58 |
+
"newt gingrich": "Newt Gingrich",
|
| 59 |
+
"npr editorial": "NPR Editorial",
|
| 60 |
+
"npr online news": "NPR Online News",
|
| 61 |
+
"pew research center": "Pew Research Center",
|
| 62 |
+
"politico": "Politico",
|
| 63 |
+
"rand paul": "Rand Paul",
|
| 64 |
+
"rich lowry": "Rich Lowry",
|
| 65 |
+
"ryan cooper": "Ryan Cooper",
|
| 66 |
+
"s.e. cupp": "S.E. Cupp",
|
| 67 |
+
"scientific american": "Scientific American",
|
| 68 |
+
"slate": "Slate",
|
| 69 |
+
"the atlantic": "The Atlantic",
|
| 70 |
+
"the boston globe": "The Boston Globe",
|
| 71 |
+
"the daily wire": "The Daily Wire",
|
| 72 |
+
"the economist": "The Economist",
|
| 73 |
+
"the flip side": "The Flip Side",
|
| 74 |
+
"the hill": "The Hill",
|
| 75 |
+
"the intercept": "The Intercept",
|
| 76 |
+
"the marshall project": "The Marshall Project",
|
| 77 |
+
"the nation": "The Nation",
|
| 78 |
+
"the new yorker": "The New Yorker",
|
| 79 |
+
"the week - news": "The Week - News",
|
| 80 |
+
"the week - opinion": "The Week - Opinion",
|
| 81 |
+
"theblaze.com": "TheBlaze.com",
|
| 82 |
+
"thinkprogress": "ThinkProgress",
|
| 83 |
+
"thomas sowell": "Thomas Sowell",
|
| 84 |
+
"time magazine": "Time Magazine",
|
| 85 |
+
"townhall": "Townhall",
|
| 86 |
+
"usa today": "USA TODAY",
|
| 87 |
+
"vanity fair": "Vanity Fair",
|
| 88 |
+
"vice": "Vice",
|
| 89 |
+
"victor hanson": "Victor Hanson",
|
| 90 |
+
"vox": "Vox",
|
| 91 |
+
"wall street journal - editorial": "Wall Street Journal - Editorial",
|
| 92 |
+
"wall street journal - news": "Wall Street Journal - News",
|
| 93 |
+
"washington post": "Washington Post",
|
| 94 |
+
"washington times": "Washington Times",
|
| 95 |
+
"yahoo! news": "Yahoo! News",
|
| 96 |
+
"yahoo! the 360": "Yahoo! The 360"
|
| 97 |
+
},
|
| 98 |
+
"sources": {
|
| 99 |
+
"Al Jazeera": {
|
| 100 |
+
"article_count": 142,
|
| 101 |
+
"bias": "Left",
|
| 102 |
+
"label_counts": {
|
| 103 |
+
"Left": 141,
|
| 104 |
+
"Right": 1
|
| 105 |
+
},
|
| 106 |
+
"majority_share": 0.993,
|
| 107 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 108 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 109 |
+
"source_url": "www.aljazeera.com"
|
| 110 |
+
},
|
| 111 |
+
"Allysia Finley (Wall Street Journal)": {
|
| 112 |
+
"article_count": 4,
|
| 113 |
+
"bias": "Center",
|
| 114 |
+
"label_counts": {
|
| 115 |
+
"Center": 4
|
| 116 |
+
},
|
| 117 |
+
"majority_share": 1.0,
|
| 118 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 119 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 120 |
+
"source_url": "www.cnn.com"
|
| 121 |
+
},
|
| 122 |
+
"Ann Coulter": {
|
| 123 |
+
"article_count": 6,
|
| 124 |
+
"bias": "Center",
|
| 125 |
+
"label_counts": {
|
| 126 |
+
"Center": 6
|
| 127 |
+
},
|
| 128 |
+
"majority_share": 1.0,
|
| 129 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 130 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 131 |
+
"source_url": "www.townhall.com"
|
| 132 |
+
},
|
| 133 |
+
"Ben Shapiro": {
|
| 134 |
+
"article_count": 26,
|
| 135 |
+
"bias": "Center",
|
| 136 |
+
"label_counts": {
|
| 137 |
+
"Center": 26
|
| 138 |
+
},
|
| 139 |
+
"majority_share": 1.0,
|
| 140 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 141 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 142 |
+
"source_url": "www.dailywire.com"
|
| 143 |
+
},
|
| 144 |
+
"Brent Bozell": {
|
| 145 |
+
"article_count": 5,
|
| 146 |
+
"bias": "Center",
|
| 147 |
+
"label_counts": {
|
| 148 |
+
"Center": 5
|
| 149 |
+
},
|
| 150 |
+
"majority_share": 1.0,
|
| 151 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 152 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 153 |
+
"source_url": "www.foxnews.com"
|
| 154 |
+
},
|
| 155 |
+
"Business Insider": {
|
| 156 |
+
"article_count": 74,
|
| 157 |
+
"bias": "Right",
|
| 158 |
+
"label_counts": {
|
| 159 |
+
"Right": 74
|
| 160 |
+
},
|
| 161 |
+
"majority_share": 1.0,
|
| 162 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 163 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 164 |
+
"source_url": "www.businessinsider.com"
|
| 165 |
+
},
|
| 166 |
+
"BuzzFeed News": {
|
| 167 |
+
"article_count": 64,
|
| 168 |
+
"bias": "Left",
|
| 169 |
+
"label_counts": {
|
| 170 |
+
"Left": 64
|
| 171 |
+
},
|
| 172 |
+
"majority_share": 1.0,
|
| 173 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 174 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 175 |
+
"source_url": "www.buzzfeednews.com"
|
| 176 |
+
},
|
| 177 |
+
"CBN": {
|
| 178 |
+
"article_count": 27,
|
| 179 |
+
"bias": "Center",
|
| 180 |
+
"label_counts": {
|
| 181 |
+
"Center": 27
|
| 182 |
+
},
|
| 183 |
+
"majority_share": 1.0,
|
| 184 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 185 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 186 |
+
"source_url": "www.cbn.com"
|
| 187 |
+
},
|
| 188 |
+
"CBS News": {
|
| 189 |
+
"article_count": 163,
|
| 190 |
+
"bias": "Left",
|
| 191 |
+
"label_counts": {
|
| 192 |
+
"Left": 163
|
| 193 |
+
},
|
| 194 |
+
"majority_share": 1.0,
|
| 195 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 196 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 197 |
+
"source_url": "www.cbsnews.com"
|
| 198 |
+
},
|
| 199 |
+
"CNN (Web News)": {
|
| 200 |
+
"article_count": 2485,
|
| 201 |
+
"bias": "Left",
|
| 202 |
+
"label_counts": {
|
| 203 |
+
"Left": 2485
|
| 204 |
+
},
|
| 205 |
+
"majority_share": 1.0,
|
| 206 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 207 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 208 |
+
"source_url": "www.cnn.com"
|
| 209 |
+
},
|
| 210 |
+
"CNN - Editorial": {
|
| 211 |
+
"article_count": 87,
|
| 212 |
+
"bias": "Left",
|
| 213 |
+
"label_counts": {
|
| 214 |
+
"Left": 87
|
| 215 |
+
},
|
| 216 |
+
"majority_share": 1.0,
|
| 217 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 218 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 219 |
+
"source_url": "www.cnn.com"
|
| 220 |
+
},
|
| 221 |
+
"Charles Krauthammer": {
|
| 222 |
+
"article_count": 9,
|
| 223 |
+
"bias": "Center",
|
| 224 |
+
"label_counts": {
|
| 225 |
+
"Center": 9
|
| 226 |
+
},
|
| 227 |
+
"majority_share": 1.0,
|
| 228 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 229 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 230 |
+
"source_url": "www.nationalreview.com"
|
| 231 |
+
},
|
| 232 |
+
"Chicago Sun-Times": {
|
| 233 |
+
"article_count": 83,
|
| 234 |
+
"bias": "Left",
|
| 235 |
+
"label_counts": {
|
| 236 |
+
"Left": 83
|
| 237 |
+
},
|
| 238 |
+
"majority_share": 1.0,
|
| 239 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 240 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 241 |
+
"source_url": "www.chicago.suntimes.com"
|
| 242 |
+
},
|
| 243 |
+
"Christian Science Monitor": {
|
| 244 |
+
"article_count": 1300,
|
| 245 |
+
"bias": "Right",
|
| 246 |
+
"label_counts": {
|
| 247 |
+
"Right": 1300
|
| 248 |
+
},
|
| 249 |
+
"majority_share": 1.0,
|
| 250 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 251 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 252 |
+
"source_url": "www.csmonitor.com"
|
| 253 |
+
},
|
| 254 |
+
"Daily Beast": {
|
| 255 |
+
"article_count": 240,
|
| 256 |
+
"bias": "Left",
|
| 257 |
+
"label_counts": {
|
| 258 |
+
"Left": 240
|
| 259 |
+
},
|
| 260 |
+
"majority_share": 1.0,
|
| 261 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 262 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 263 |
+
"source_url": "www.thedailybeast.com"
|
| 264 |
+
},
|
| 265 |
+
"Daily Kos": {
|
| 266 |
+
"article_count": 127,
|
| 267 |
+
"bias": "Left",
|
| 268 |
+
"label_counts": {
|
| 269 |
+
"Left": 127
|
| 270 |
+
},
|
| 271 |
+
"majority_share": 1.0,
|
| 272 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 273 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 274 |
+
"source_url": "www.dailykos.com"
|
| 275 |
+
},
|
| 276 |
+
"Daily Mail": {
|
| 277 |
+
"article_count": 46,
|
| 278 |
+
"bias": "Center",
|
| 279 |
+
"label_counts": {
|
| 280 |
+
"Center": 46
|
| 281 |
+
},
|
| 282 |
+
"majority_share": 1.0,
|
| 283 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 284 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 285 |
+
"source_url": "www.dailymail.co.uk"
|
| 286 |
+
},
|
| 287 |
+
"Damon Linker": {
|
| 288 |
+
"article_count": 14,
|
| 289 |
+
"bias": "Left",
|
| 290 |
+
"label_counts": {
|
| 291 |
+
"Left": 14
|
| 292 |
+
},
|
| 293 |
+
"majority_share": 1.0,
|
| 294 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 295 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 296 |
+
"source_url": "www.theweek.com"
|
| 297 |
+
},
|
| 298 |
+
"Democracy Now": {
|
| 299 |
+
"article_count": 75,
|
| 300 |
+
"bias": "Left",
|
| 301 |
+
"label_counts": {
|
| 302 |
+
"Left": 75
|
| 303 |
+
},
|
| 304 |
+
"majority_share": 1.0,
|
| 305 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 306 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 307 |
+
"source_url": "www.democracynow.org"
|
| 308 |
+
},
|
| 309 |
+
"Elizabeth Warren": {
|
| 310 |
+
"article_count": 4,
|
| 311 |
+
"bias": "Left",
|
| 312 |
+
"label_counts": {
|
| 313 |
+
"Left": 4
|
| 314 |
+
},
|
| 315 |
+
"majority_share": 1.0,
|
| 316 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 317 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 318 |
+
"source_url": "www.time.com"
|
| 319 |
+
},
|
| 320 |
+
"Ezra Klein": {
|
| 321 |
+
"article_count": 10,
|
| 322 |
+
"bias": "Left",
|
| 323 |
+
"label_counts": {
|
| 324 |
+
"Left": 10
|
| 325 |
+
},
|
| 326 |
+
"majority_share": 1.0,
|
| 327 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 328 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 329 |
+
"source_url": "www.npr.org"
|
| 330 |
+
},
|
| 331 |
+
"Fox News": {
|
| 332 |
+
"article_count": 1353,
|
| 333 |
+
"bias": "Center",
|
| 334 |
+
"label_counts": {
|
| 335 |
+
"Center": 1353
|
| 336 |
+
},
|
| 337 |
+
"majority_share": 1.0,
|
| 338 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 339 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 340 |
+
"source_url": "www.foxnews.com"
|
| 341 |
+
},
|
| 342 |
+
"Fox News (Online)": {
|
| 343 |
+
"article_count": 86,
|
| 344 |
+
"bias": "Center",
|
| 345 |
+
"label_counts": {
|
| 346 |
+
"Center": 86
|
| 347 |
+
},
|
| 348 |
+
"majority_share": 1.0,
|
| 349 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 350 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 351 |
+
"source_url": "www.foxnews.com"
|
| 352 |
+
},
|
| 353 |
+
"Fox News Opinion": {
|
| 354 |
+
"article_count": 58,
|
| 355 |
+
"bias": "Center",
|
| 356 |
+
"label_counts": {
|
| 357 |
+
"Center": 58
|
| 358 |
+
},
|
| 359 |
+
"majority_share": 1.0,
|
| 360 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 361 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 362 |
+
"source_url": "www.foxnews.com"
|
| 363 |
+
},
|
| 364 |
+
"Fox Online News": {
|
| 365 |
+
"article_count": 2035,
|
| 366 |
+
"bias": "Center",
|
| 367 |
+
"label_counts": {
|
| 368 |
+
"Center": 2035
|
| 369 |
+
},
|
| 370 |
+
"majority_share": 1.0,
|
| 371 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 372 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 373 |
+
"source_url": "www.foxnews.com"
|
| 374 |
+
},
|
| 375 |
+
"George Will": {
|
| 376 |
+
"article_count": 14,
|
| 377 |
+
"bias": "Center",
|
| 378 |
+
"label_counts": {
|
| 379 |
+
"Center": 14
|
| 380 |
+
},
|
| 381 |
+
"majority_share": 1.0,
|
| 382 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 383 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 384 |
+
"source_url": "www.nationalreview.com"
|
| 385 |
+
},
|
| 386 |
+
"Guest Writer": {
|
| 387 |
+
"article_count": 84,
|
| 388 |
+
"bias": "Right",
|
| 389 |
+
"label_counts": {
|
| 390 |
+
"Right": 84
|
| 391 |
+
},
|
| 392 |
+
"majority_share": 1.0,
|
| 393 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 394 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 395 |
+
"source_url": "www.washingtontimes.com"
|
| 396 |
+
},
|
| 397 |
+
"Guest Writer - Center": {
|
| 398 |
+
"article_count": 3,
|
| 399 |
+
"bias": "Right",
|
| 400 |
+
"label_counts": {
|
| 401 |
+
"Right": 3
|
| 402 |
+
},
|
| 403 |
+
"majority_share": 1.0,
|
| 404 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 405 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 406 |
+
"source_url": "www.theatlantic.com"
|
| 407 |
+
},
|
| 408 |
+
"Guest Writer - Left": {
|
| 409 |
+
"article_count": 109,
|
| 410 |
+
"bias": "Left",
|
| 411 |
+
"label_counts": {
|
| 412 |
+
"Left": 109
|
| 413 |
+
},
|
| 414 |
+
"majority_share": 1.0,
|
| 415 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 416 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 417 |
+
"source_url": "www.cnn.com"
|
| 418 |
+
},
|
| 419 |
+
"Guest Writer - Right": {
|
| 420 |
+
"article_count": 385,
|
| 421 |
+
"bias": "Center",
|
| 422 |
+
"label_counts": {
|
| 423 |
+
"Center": 385
|
| 424 |
+
},
|
| 425 |
+
"majority_share": 1.0,
|
| 426 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 427 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 428 |
+
"source_url": "www.nationalreview.com"
|
| 429 |
+
},
|
| 430 |
+
"HotAir": {
|
| 431 |
+
"article_count": 64,
|
| 432 |
+
"bias": "Center",
|
| 433 |
+
"label_counts": {
|
| 434 |
+
"Center": 64
|
| 435 |
+
},
|
| 436 |
+
"majority_share": 1.0,
|
| 437 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 438 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 439 |
+
"source_url": "www.hotair.com"
|
| 440 |
+
},
|
| 441 |
+
"Howard Kurtz": {
|
| 442 |
+
"article_count": 14,
|
| 443 |
+
"bias": "Right",
|
| 444 |
+
"label_counts": {
|
| 445 |
+
"Right": 14
|
| 446 |
+
},
|
| 447 |
+
"majority_share": 1.0,
|
| 448 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 449 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 450 |
+
"source_url": "www.foxnews.com"
|
| 451 |
+
},
|
| 452 |
+
"International Business Times": {
|
| 453 |
+
"article_count": 48,
|
| 454 |
+
"bias": "Right",
|
| 455 |
+
"label_counts": {
|
| 456 |
+
"Right": 48
|
| 457 |
+
},
|
| 458 |
+
"majority_share": 1.0,
|
| 459 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 460 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 461 |
+
"source_url": "www.ibtimes.com"
|
| 462 |
+
},
|
| 463 |
+
"Jacobin": {
|
| 464 |
+
"article_count": 23,
|
| 465 |
+
"bias": "Left",
|
| 466 |
+
"label_counts": {
|
| 467 |
+
"Left": 23
|
| 468 |
+
},
|
| 469 |
+
"majority_share": 1.0,
|
| 470 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 471 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 472 |
+
"source_url": "www.jacobinmag.com"
|
| 473 |
+
},
|
| 474 |
+
"John Fund": {
|
| 475 |
+
"article_count": 16,
|
| 476 |
+
"bias": "Center",
|
| 477 |
+
"label_counts": {
|
| 478 |
+
"Center": 16
|
| 479 |
+
},
|
| 480 |
+
"majority_share": 1.0,
|
| 481 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 482 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 483 |
+
"source_url": "www.nationalreview.com"
|
| 484 |
+
},
|
| 485 |
+
"John Stossel": {
|
| 486 |
+
"article_count": 26,
|
| 487 |
+
"bias": "Center",
|
| 488 |
+
"label_counts": {
|
| 489 |
+
"Center": 26
|
| 490 |
+
},
|
| 491 |
+
"majority_share": 1.0,
|
| 492 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 493 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 494 |
+
"source_url": "www.foxnews.com"
|
| 495 |
+
},
|
| 496 |
+
"Jon Terbush": {
|
| 497 |
+
"article_count": 3,
|
| 498 |
+
"bias": "Left",
|
| 499 |
+
"label_counts": {
|
| 500 |
+
"Left": 3
|
| 501 |
+
},
|
| 502 |
+
"majority_share": 1.0,
|
| 503 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 504 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 505 |
+
"source_url": "www.theweek.com"
|
| 506 |
+
},
|
| 507 |
+
"Jonah Goldberg": {
|
| 508 |
+
"article_count": 8,
|
| 509 |
+
"bias": "Center",
|
| 510 |
+
"label_counts": {
|
| 511 |
+
"Center": 8
|
| 512 |
+
},
|
| 513 |
+
"majority_share": 1.0,
|
| 514 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 515 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 516 |
+
"source_url": "www.nationalreview.com"
|
| 517 |
+
},
|
| 518 |
+
"Juan Williams": {
|
| 519 |
+
"article_count": 10,
|
| 520 |
+
"bias": "Left",
|
| 521 |
+
"label_counts": {
|
| 522 |
+
"Left": 10
|
| 523 |
+
},
|
| 524 |
+
"majority_share": 1.0,
|
| 525 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 526 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 527 |
+
"source_url": "www.foxnews.com"
|
| 528 |
+
},
|
| 529 |
+
"Julian Zelizer": {
|
| 530 |
+
"article_count": 10,
|
| 531 |
+
"bias": "Left",
|
| 532 |
+
"label_counts": {
|
| 533 |
+
"Left": 10
|
| 534 |
+
},
|
| 535 |
+
"majority_share": 1.0,
|
| 536 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 537 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 538 |
+
"source_url": "www.cnn.com"
|
| 539 |
+
},
|
| 540 |
+
"MarketWatch": {
|
| 541 |
+
"article_count": 106,
|
| 542 |
+
"bias": "Center",
|
| 543 |
+
"label_counts": {
|
| 544 |
+
"Center": 106
|
| 545 |
+
},
|
| 546 |
+
"majority_share": 1.0,
|
| 547 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 548 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 549 |
+
"source_url": "www.marketwatch.com"
|
| 550 |
+
},
|
| 551 |
+
"Media Matters": {
|
| 552 |
+
"article_count": 107,
|
| 553 |
+
"bias": "Left",
|
| 554 |
+
"label_counts": {
|
| 555 |
+
"Left": 107
|
| 556 |
+
},
|
| 557 |
+
"majority_share": 1.0,
|
| 558 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 559 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 560 |
+
"source_url": "www.mediamatters.org"
|
| 561 |
+
},
|
| 562 |
+
"Media Research Center": {
|
| 563 |
+
"article_count": 22,
|
| 564 |
+
"bias": "Center",
|
| 565 |
+
"label_counts": {
|
| 566 |
+
"Center": 22
|
| 567 |
+
},
|
| 568 |
+
"majority_share": 1.0,
|
| 569 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 570 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 571 |
+
"source_url": "www.newsbusters.org"
|
| 572 |
+
},
|
| 573 |
+
"Michael Barone": {
|
| 574 |
+
"article_count": 4,
|
| 575 |
+
"bias": "Center",
|
| 576 |
+
"label_counts": {
|
| 577 |
+
"Center": 4
|
| 578 |
+
},
|
| 579 |
+
"majority_share": 1.0,
|
| 580 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 581 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 582 |
+
"source_url": "www.townhall.com"
|
| 583 |
+
},
|
| 584 |
+
"Michael Brendan Dougherty": {
|
| 585 |
+
"article_count": 8,
|
| 586 |
+
"bias": "Center",
|
| 587 |
+
"label_counts": {
|
| 588 |
+
"Center": 8
|
| 589 |
+
},
|
| 590 |
+
"majority_share": 1.0,
|
| 591 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 592 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 593 |
+
"source_url": "www.theweek.com"
|
| 594 |
+
},
|
| 595 |
+
"Michael Goodwin": {
|
| 596 |
+
"article_count": 4,
|
| 597 |
+
"bias": "Center",
|
| 598 |
+
"label_counts": {
|
| 599 |
+
"Center": 4
|
| 600 |
+
},
|
| 601 |
+
"majority_share": 1.0,
|
| 602 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 603 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 604 |
+
"source_url": "www.nypost.com"
|
| 605 |
+
},
|
| 606 |
+
"Michelle Malkin": {
|
| 607 |
+
"article_count": 12,
|
| 608 |
+
"bias": "Center",
|
| 609 |
+
"label_counts": {
|
| 610 |
+
"Center": 12
|
| 611 |
+
},
|
| 612 |
+
"majority_share": 1.0,
|
| 613 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 614 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 615 |
+
"source_url": "www.townhall.com"
|
| 616 |
+
},
|
| 617 |
+
"Mother Jones": {
|
| 618 |
+
"article_count": 114,
|
| 619 |
+
"bias": "Left",
|
| 620 |
+
"label_counts": {
|
| 621 |
+
"Left": 114
|
| 622 |
+
},
|
| 623 |
+
"majority_share": 1.0,
|
| 624 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 625 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 626 |
+
"source_url": "www.motherjones.com"
|
| 627 |
+
},
|
| 628 |
+
"NBC News (Online)": {
|
| 629 |
+
"article_count": 38,
|
| 630 |
+
"bias": "Left",
|
| 631 |
+
"label_counts": {
|
| 632 |
+
"Left": 38
|
| 633 |
+
},
|
| 634 |
+
"majority_share": 1.0,
|
| 635 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 636 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 637 |
+
"source_url": "www.nbcnews.com"
|
| 638 |
+
},
|
| 639 |
+
"NBCNews.com": {
|
| 640 |
+
"article_count": 14,
|
| 641 |
+
"bias": "Left",
|
| 642 |
+
"label_counts": {
|
| 643 |
+
"Left": 14
|
| 644 |
+
},
|
| 645 |
+
"majority_share": 1.0,
|
| 646 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 647 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 648 |
+
"source_url": "www.nbcnews.com"
|
| 649 |
+
},
|
| 650 |
+
"NPR Editorial": {
|
| 651 |
+
"article_count": 8,
|
| 652 |
+
"bias": "Left",
|
| 653 |
+
"label_counts": {
|
| 654 |
+
"Left": 8
|
| 655 |
+
},
|
| 656 |
+
"majority_share": 1.0,
|
| 657 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 658 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 659 |
+
"source_url": "www.npr.org"
|
| 660 |
+
},
|
| 661 |
+
"NPR Online News": {
|
| 662 |
+
"article_count": 2007,
|
| 663 |
+
"bias": "Right",
|
| 664 |
+
"label_counts": {
|
| 665 |
+
"Right": 2007
|
| 666 |
+
},
|
| 667 |
+
"majority_share": 1.0,
|
| 668 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 669 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 670 |
+
"source_url": "www.npr.org"
|
| 671 |
+
},
|
| 672 |
+
"National Review": {
|
| 673 |
+
"article_count": 1013,
|
| 674 |
+
"bias": "Center",
|
| 675 |
+
"label_counts": {
|
| 676 |
+
"Center": 1013
|
| 677 |
+
},
|
| 678 |
+
"majority_share": 1.0,
|
| 679 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 680 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 681 |
+
"source_url": "www.nationalreview.com"
|
| 682 |
+
},
|
| 683 |
+
"New York Post": {
|
| 684 |
+
"article_count": 175,
|
| 685 |
+
"bias": "Center",
|
| 686 |
+
"label_counts": {
|
| 687 |
+
"Center": 175
|
| 688 |
+
},
|
| 689 |
+
"majority_share": 1.0,
|
| 690 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 691 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 692 |
+
"source_url": "www.nypost.com"
|
| 693 |
+
},
|
| 694 |
+
"New York Post (News)": {
|
| 695 |
+
"article_count": 5,
|
| 696 |
+
"bias": "Center",
|
| 697 |
+
"label_counts": {
|
| 698 |
+
"Center": 5
|
| 699 |
+
},
|
| 700 |
+
"majority_share": 1.0,
|
| 701 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 702 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 703 |
+
"source_url": "www.nypost.com"
|
| 704 |
+
},
|
| 705 |
+
"New York Post (Opinion)": {
|
| 706 |
+
"article_count": 5,
|
| 707 |
+
"bias": "Center",
|
| 708 |
+
"label_counts": {
|
| 709 |
+
"Center": 5
|
| 710 |
+
},
|
| 711 |
+
"majority_share": 1.0,
|
| 712 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 713 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 714 |
+
"source_url": "www.nypost.com"
|
| 715 |
+
},
|
| 716 |
+
"NewsBusters": {
|
| 717 |
+
"article_count": 44,
|
| 718 |
+
"bias": "Center",
|
| 719 |
+
"label_counts": {
|
| 720 |
+
"Center": 44
|
| 721 |
+
},
|
| 722 |
+
"majority_share": 1.0,
|
| 723 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 724 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 725 |
+
"source_url": "www.newsbusters.org"
|
| 726 |
+
},
|
| 727 |
+
"Newt Gingrich": {
|
| 728 |
+
"article_count": 14,
|
| 729 |
+
"bias": "Center",
|
| 730 |
+
"label_counts": {
|
| 731 |
+
"Center": 14
|
| 732 |
+
},
|
| 733 |
+
"majority_share": 1.0,
|
| 734 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 735 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 736 |
+
"source_url": "www.washingtontimes.com"
|
| 737 |
+
},
|
| 738 |
+
"Pew Research Center": {
|
| 739 |
+
"article_count": 27,
|
| 740 |
+
"bias": "Right",
|
| 741 |
+
"label_counts": {
|
| 742 |
+
"Right": 27
|
| 743 |
+
},
|
| 744 |
+
"majority_share": 1.0,
|
| 745 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 746 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 747 |
+
"source_url": "www.pewresearch.org"
|
| 748 |
+
},
|
| 749 |
+
"Politico": {
|
| 750 |
+
"article_count": 2493,
|
| 751 |
+
"bias": "Left",
|
| 752 |
+
"label_counts": {
|
| 753 |
+
"Left": 2493
|
| 754 |
+
},
|
| 755 |
+
"majority_share": 1.0,
|
| 756 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 757 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 758 |
+
"source_url": "www.politico.com"
|
| 759 |
+
},
|
| 760 |
+
"Rand Paul": {
|
| 761 |
+
"article_count": 8,
|
| 762 |
+
"bias": "Center",
|
| 763 |
+
"label_counts": {
|
| 764 |
+
"Center": 8
|
| 765 |
+
},
|
| 766 |
+
"majority_share": 1.0,
|
| 767 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 768 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 769 |
+
"source_url": "www.washingtontimes.com"
|
| 770 |
+
},
|
| 771 |
+
"Rich Lowry": {
|
| 772 |
+
"article_count": 44,
|
| 773 |
+
"bias": "Center",
|
| 774 |
+
"label_counts": {
|
| 775 |
+
"Center": 44
|
| 776 |
+
},
|
| 777 |
+
"majority_share": 1.0,
|
| 778 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 779 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 780 |
+
"source_url": "www.nationalreview.com"
|
| 781 |
+
},
|
| 782 |
+
"Ryan Cooper": {
|
| 783 |
+
"article_count": 6,
|
| 784 |
+
"bias": "Left",
|
| 785 |
+
"label_counts": {
|
| 786 |
+
"Left": 6
|
| 787 |
+
},
|
| 788 |
+
"majority_share": 1.0,
|
| 789 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 790 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 791 |
+
"source_url": "www.theweek.com"
|
| 792 |
+
},
|
| 793 |
+
"S.E. Cupp": {
|
| 794 |
+
"article_count": 4,
|
| 795 |
+
"bias": "Center",
|
| 796 |
+
"label_counts": {
|
| 797 |
+
"Center": 4
|
| 798 |
+
},
|
| 799 |
+
"majority_share": 1.0,
|
| 800 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 801 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 802 |
+
"source_url": "www.chicago.suntimes.com"
|
| 803 |
+
},
|
| 804 |
+
"Scientific American": {
|
| 805 |
+
"article_count": 35,
|
| 806 |
+
"bias": "Right",
|
| 807 |
+
"label_counts": {
|
| 808 |
+
"Left": 2,
|
| 809 |
+
"Right": 33
|
| 810 |
+
},
|
| 811 |
+
"majority_share": 0.9429,
|
| 812 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 813 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 814 |
+
"source_url": "www.scientificamerican.com"
|
| 815 |
+
},
|
| 816 |
+
"Slate": {
|
| 817 |
+
"article_count": 158,
|
| 818 |
+
"bias": "Left",
|
| 819 |
+
"label_counts": {
|
| 820 |
+
"Left": 158
|
| 821 |
+
},
|
| 822 |
+
"majority_share": 1.0,
|
| 823 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 824 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 825 |
+
"source_url": "www.slate.com"
|
| 826 |
+
},
|
| 827 |
+
"The Atlantic": {
|
| 828 |
+
"article_count": 172,
|
| 829 |
+
"bias": "Left",
|
| 830 |
+
"label_counts": {
|
| 831 |
+
"Left": 172
|
| 832 |
+
},
|
| 833 |
+
"majority_share": 1.0,
|
| 834 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 835 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 836 |
+
"source_url": "www.theatlantic.com"
|
| 837 |
+
},
|
| 838 |
+
"The Boston Globe": {
|
| 839 |
+
"article_count": 24,
|
| 840 |
+
"bias": "Left",
|
| 841 |
+
"label_counts": {
|
| 842 |
+
"Left": 24
|
| 843 |
+
},
|
| 844 |
+
"majority_share": 1.0,
|
| 845 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 846 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 847 |
+
"source_url": "www.bostonglobe.com"
|
| 848 |
+
},
|
| 849 |
+
"The Daily Wire": {
|
| 850 |
+
"article_count": 122,
|
| 851 |
+
"bias": "Center",
|
| 852 |
+
"label_counts": {
|
| 853 |
+
"Center": 122
|
| 854 |
+
},
|
| 855 |
+
"majority_share": 1.0,
|
| 856 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 857 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 858 |
+
"source_url": "www.dailywire.com"
|
| 859 |
+
},
|
| 860 |
+
"The Economist": {
|
| 861 |
+
"article_count": 28,
|
| 862 |
+
"bias": "Left",
|
| 863 |
+
"label_counts": {
|
| 864 |
+
"Left": 28
|
| 865 |
+
},
|
| 866 |
+
"majority_share": 1.0,
|
| 867 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 868 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 869 |
+
"source_url": "www.economist.com"
|
| 870 |
+
},
|
| 871 |
+
"The Flip Side": {
|
| 872 |
+
"article_count": 239,
|
| 873 |
+
"bias": "Right",
|
| 874 |
+
"label_counts": {
|
| 875 |
+
"Right": 239
|
| 876 |
+
},
|
| 877 |
+
"majority_share": 1.0,
|
| 878 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 879 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 880 |
+
"source_url": "www.theflipside.io"
|
| 881 |
+
},
|
| 882 |
+
"The Hill": {
|
| 883 |
+
"article_count": 1377,
|
| 884 |
+
"bias": "Right",
|
| 885 |
+
"label_counts": {
|
| 886 |
+
"Right": 1377
|
| 887 |
+
},
|
| 888 |
+
"majority_share": 1.0,
|
| 889 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 890 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 891 |
+
"source_url": "www.thehill.com"
|
| 892 |
+
},
|
| 893 |
+
"The Intercept": {
|
| 894 |
+
"article_count": 43,
|
| 895 |
+
"bias": "Left",
|
| 896 |
+
"label_counts": {
|
| 897 |
+
"Left": 43
|
| 898 |
+
},
|
| 899 |
+
"majority_share": 1.0,
|
| 900 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 901 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 902 |
+
"source_url": "www.theintercept.com"
|
| 903 |
+
},
|
| 904 |
+
"The Marshall Project": {
|
| 905 |
+
"article_count": 27,
|
| 906 |
+
"bias": "Right",
|
| 907 |
+
"label_counts": {
|
| 908 |
+
"Right": 27
|
| 909 |
+
},
|
| 910 |
+
"majority_share": 1.0,
|
| 911 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 912 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 913 |
+
"source_url": "www.themarshallproject.org"
|
| 914 |
+
},
|
| 915 |
+
"The Nation": {
|
| 916 |
+
"article_count": 32,
|
| 917 |
+
"bias": "Left",
|
| 918 |
+
"label_counts": {
|
| 919 |
+
"Left": 32
|
| 920 |
+
},
|
| 921 |
+
"majority_share": 1.0,
|
| 922 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 923 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 924 |
+
"source_url": "www.thenation.com"
|
| 925 |
+
},
|
| 926 |
+
"The New Yorker": {
|
| 927 |
+
"article_count": 21,
|
| 928 |
+
"bias": "Left",
|
| 929 |
+
"label_counts": {
|
| 930 |
+
"Left": 21
|
| 931 |
+
},
|
| 932 |
+
"majority_share": 1.0,
|
| 933 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 934 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 935 |
+
"source_url": "www.newyorker.com"
|
| 936 |
+
},
|
| 937 |
+
"The Week - News": {
|
| 938 |
+
"article_count": 119,
|
| 939 |
+
"bias": "Right",
|
| 940 |
+
"label_counts": {
|
| 941 |
+
"Right": 119
|
| 942 |
+
},
|
| 943 |
+
"majority_share": 1.0,
|
| 944 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 945 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 946 |
+
"source_url": "www.theweek.com"
|
| 947 |
+
},
|
| 948 |
+
"The Week - Opinion": {
|
| 949 |
+
"article_count": 24,
|
| 950 |
+
"bias": "Left",
|
| 951 |
+
"label_counts": {
|
| 952 |
+
"Left": 24
|
| 953 |
+
},
|
| 954 |
+
"majority_share": 1.0,
|
| 955 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 956 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 957 |
+
"source_url": "www.theweek.com"
|
| 958 |
+
},
|
| 959 |
+
"TheBlaze.com": {
|
| 960 |
+
"article_count": 219,
|
| 961 |
+
"bias": "Center",
|
| 962 |
+
"label_counts": {
|
| 963 |
+
"Center": 219
|
| 964 |
+
},
|
| 965 |
+
"majority_share": 1.0,
|
| 966 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 967 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 968 |
+
"source_url": "www.theblaze.com"
|
| 969 |
+
},
|
| 970 |
+
"ThinkProgress": {
|
| 971 |
+
"article_count": 33,
|
| 972 |
+
"bias": "Left",
|
| 973 |
+
"label_counts": {
|
| 974 |
+
"Left": 33
|
| 975 |
+
},
|
| 976 |
+
"majority_share": 1.0,
|
| 977 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 978 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 979 |
+
"source_url": "www.thinkprogress.org"
|
| 980 |
+
},
|
| 981 |
+
"Thomas Sowell": {
|
| 982 |
+
"article_count": 3,
|
| 983 |
+
"bias": "Center",
|
| 984 |
+
"label_counts": {
|
| 985 |
+
"Center": 3
|
| 986 |
+
},
|
| 987 |
+
"majority_share": 1.0,
|
| 988 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 989 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 990 |
+
"source_url": "www.townhall.com"
|
| 991 |
+
},
|
| 992 |
+
"Time Magazine": {
|
| 993 |
+
"article_count": 70,
|
| 994 |
+
"bias": "Left",
|
| 995 |
+
"label_counts": {
|
| 996 |
+
"Left": 70
|
| 997 |
+
},
|
| 998 |
+
"majority_share": 1.0,
|
| 999 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 1000 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 1001 |
+
"source_url": "www.time.com"
|
| 1002 |
+
},
|
| 1003 |
+
"Townhall": {
|
| 1004 |
+
"article_count": 1273,
|
| 1005 |
+
"bias": "Center",
|
| 1006 |
+
"label_counts": {
|
| 1007 |
+
"Center": 1273
|
| 1008 |
+
},
|
| 1009 |
+
"majority_share": 1.0,
|
| 1010 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 1011 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 1012 |
+
"source_url": "www.townhall.com"
|
| 1013 |
+
},
|
| 1014 |
+
"USA TODAY": {
|
| 1015 |
+
"article_count": 1785,
|
| 1016 |
+
"bias": "Right",
|
| 1017 |
+
"label_counts": {
|
| 1018 |
+
"Right": 1785
|
| 1019 |
+
},
|
| 1020 |
+
"majority_share": 1.0,
|
| 1021 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 1022 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 1023 |
+
"source_url": "www.usatoday.com"
|
| 1024 |
+
},
|
| 1025 |
+
"Vanity Fair": {
|
| 1026 |
+
"article_count": 157,
|
| 1027 |
+
"bias": "Left",
|
| 1028 |
+
"label_counts": {
|
| 1029 |
+
"Left": 157
|
| 1030 |
+
},
|
| 1031 |
+
"majority_share": 1.0,
|
| 1032 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 1033 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 1034 |
+
"source_url": "www.vanityfair.com"
|
| 1035 |
+
},
|
| 1036 |
+
"Vice": {
|
| 1037 |
+
"article_count": 67,
|
| 1038 |
+
"bias": "Left",
|
| 1039 |
+
"label_counts": {
|
| 1040 |
+
"Left": 67
|
| 1041 |
+
},
|
| 1042 |
+
"majority_share": 1.0,
|
| 1043 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 1044 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 1045 |
+
"source_url": "www.vice.com"
|
| 1046 |
+
},
|
| 1047 |
+
"Victor Hanson": {
|
| 1048 |
+
"article_count": 62,
|
| 1049 |
+
"bias": "Center",
|
| 1050 |
+
"label_counts": {
|
| 1051 |
+
"Center": 62
|
| 1052 |
+
},
|
| 1053 |
+
"majority_share": 1.0,
|
| 1054 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 1055 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 1056 |
+
"source_url": "www.nationalreview.com"
|
| 1057 |
+
},
|
| 1058 |
+
"Vox": {
|
| 1059 |
+
"article_count": 1460,
|
| 1060 |
+
"bias": "Left",
|
| 1061 |
+
"label_counts": {
|
| 1062 |
+
"Left": 1460
|
| 1063 |
+
},
|
| 1064 |
+
"majority_share": 1.0,
|
| 1065 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 1066 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 1067 |
+
"source_url": "www.vox.com"
|
| 1068 |
+
},
|
| 1069 |
+
"Wall Street Journal - Editorial": {
|
| 1070 |
+
"article_count": 7,
|
| 1071 |
+
"bias": "Center",
|
| 1072 |
+
"label_counts": {
|
| 1073 |
+
"Center": 7
|
| 1074 |
+
},
|
| 1075 |
+
"majority_share": 1.0,
|
| 1076 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 1077 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 1078 |
+
"source_url": "www.wsj.com"
|
| 1079 |
+
},
|
| 1080 |
+
"Wall Street Journal - News": {
|
| 1081 |
+
"article_count": 255,
|
| 1082 |
+
"bias": "Right",
|
| 1083 |
+
"label_counts": {
|
| 1084 |
+
"Right": 255
|
| 1085 |
+
},
|
| 1086 |
+
"majority_share": 1.0,
|
| 1087 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 1088 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 1089 |
+
"source_url": "www.wsj.com"
|
| 1090 |
+
},
|
| 1091 |
+
"Washington Post": {
|
| 1092 |
+
"article_count": 108,
|
| 1093 |
+
"bias": "Left",
|
| 1094 |
+
"label_counts": {
|
| 1095 |
+
"Left": 108
|
| 1096 |
+
},
|
| 1097 |
+
"majority_share": 1.0,
|
| 1098 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 1099 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 1100 |
+
"source_url": "www.washingtonpost.com"
|
| 1101 |
+
},
|
| 1102 |
+
"Washington Times": {
|
| 1103 |
+
"article_count": 2883,
|
| 1104 |
+
"bias": "Center",
|
| 1105 |
+
"label_counts": {
|
| 1106 |
+
"Center": 2883
|
| 1107 |
+
},
|
| 1108 |
+
"majority_share": 1.0,
|
| 1109 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 1110 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 1111 |
+
"source_url": "www.washingtontimes.com"
|
| 1112 |
+
},
|
| 1113 |
+
"Yahoo! News": {
|
| 1114 |
+
"article_count": 11,
|
| 1115 |
+
"bias": "Left",
|
| 1116 |
+
"label_counts": {
|
| 1117 |
+
"Left": 11
|
| 1118 |
+
},
|
| 1119 |
+
"majority_share": 1.0,
|
| 1120 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 1121 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 1122 |
+
"source_url": "www.news.yahoo.com"
|
| 1123 |
+
},
|
| 1124 |
+
"Yahoo! The 360": {
|
| 1125 |
+
"article_count": 80,
|
| 1126 |
+
"bias": "Right",
|
| 1127 |
+
"label_counts": {
|
| 1128 |
+
"Right": 80
|
| 1129 |
+
},
|
| 1130 |
+
"majority_share": 1.0,
|
| 1131 |
+
"notes": "Generated by aggregating article-level political bias labels by source.",
|
| 1132 |
+
"provenance": "siddharthmb/article-bias-prediction-media-splits",
|
| 1133 |
+
"source_url": "www.news.yahoo.com"
|
| 1134 |
+
}
|
| 1135 |
+
}
|
| 1136 |
+
}
|
src/data/source_bias.json
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sources": {
|
| 3 |
+
"Fox News": {
|
| 4 |
+
"bias": "Right",
|
| 5 |
+
"provenance": "manual_demo",
|
| 6 |
+
"source_url": null,
|
| 7 |
+
"article_count": null,
|
| 8 |
+
"label_counts": null,
|
| 9 |
+
"notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
|
| 10 |
+
},
|
| 11 |
+
"Breitbart": {
|
| 12 |
+
"bias": "Right",
|
| 13 |
+
"provenance": "manual_demo",
|
| 14 |
+
"source_url": null,
|
| 15 |
+
"article_count": null,
|
| 16 |
+
"label_counts": null,
|
| 17 |
+
"notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
|
| 18 |
+
},
|
| 19 |
+
"The Daily Wire": {
|
| 20 |
+
"bias": "Right",
|
| 21 |
+
"provenance": "manual_demo",
|
| 22 |
+
"source_url": null,
|
| 23 |
+
"article_count": null,
|
| 24 |
+
"label_counts": null,
|
| 25 |
+
"notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
|
| 26 |
+
},
|
| 27 |
+
"New York Post": {
|
| 28 |
+
"bias": "Right",
|
| 29 |
+
"provenance": "manual_demo",
|
| 30 |
+
"source_url": null,
|
| 31 |
+
"article_count": null,
|
| 32 |
+
"label_counts": null,
|
| 33 |
+
"notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
|
| 34 |
+
},
|
| 35 |
+
"TechRadar": {
|
| 36 |
+
"bias": "Right",
|
| 37 |
+
"provenance": "manual_demo",
|
| 38 |
+
"source_url": null,
|
| 39 |
+
"article_count": null,
|
| 40 |
+
"label_counts": null,
|
| 41 |
+
"notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
|
| 42 |
+
},
|
| 43 |
+
"BBC News": {
|
| 44 |
+
"bias": "Center",
|
| 45 |
+
"provenance": "manual_demo",
|
| 46 |
+
"source_url": null,
|
| 47 |
+
"article_count": null,
|
| 48 |
+
"label_counts": null,
|
| 49 |
+
"notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
|
| 50 |
+
},
|
| 51 |
+
"Reuters": {
|
| 52 |
+
"bias": "Center",
|
| 53 |
+
"provenance": "manual_demo",
|
| 54 |
+
"source_url": null,
|
| 55 |
+
"article_count": null,
|
| 56 |
+
"label_counts": null,
|
| 57 |
+
"notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
|
| 58 |
+
},
|
| 59 |
+
"Associated Press": {
|
| 60 |
+
"bias": "Center",
|
| 61 |
+
"provenance": "manual_demo",
|
| 62 |
+
"source_url": null,
|
| 63 |
+
"article_count": null,
|
| 64 |
+
"label_counts": null,
|
| 65 |
+
"notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
|
| 66 |
+
},
|
| 67 |
+
"Mental Floss": {
|
| 68 |
+
"bias": "Center",
|
| 69 |
+
"provenance": "manual_demo",
|
| 70 |
+
"source_url": null,
|
| 71 |
+
"article_count": null,
|
| 72 |
+
"label_counts": null,
|
| 73 |
+
"notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
|
| 74 |
+
},
|
| 75 |
+
"New Scientist": {
|
| 76 |
+
"bias": "Center",
|
| 77 |
+
"provenance": "manual_demo",
|
| 78 |
+
"source_url": null,
|
| 79 |
+
"article_count": null,
|
| 80 |
+
"label_counts": null,
|
| 81 |
+
"notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
|
| 82 |
+
},
|
| 83 |
+
"Nature.com": {
|
| 84 |
+
"bias": "Center",
|
| 85 |
+
"provenance": "manual_demo",
|
| 86 |
+
"source_url": null,
|
| 87 |
+
"article_count": null,
|
| 88 |
+
"label_counts": null,
|
| 89 |
+
"notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
|
| 90 |
+
},
|
| 91 |
+
"Futurity: Research News": {
|
| 92 |
+
"bias": "Center",
|
| 93 |
+
"provenance": "manual_demo",
|
| 94 |
+
"source_url": null,
|
| 95 |
+
"article_count": null,
|
| 96 |
+
"label_counts": null,
|
| 97 |
+
"notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
|
| 98 |
+
},
|
| 99 |
+
"Yahoo Entertainment": {
|
| 100 |
+
"bias": "Center",
|
| 101 |
+
"provenance": "manual_demo",
|
| 102 |
+
"source_url": null,
|
| 103 |
+
"article_count": null,
|
| 104 |
+
"label_counts": null,
|
| 105 |
+
"notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
|
| 106 |
+
},
|
| 107 |
+
"NPR": {
|
| 108 |
+
"bias": "Center-Left",
|
| 109 |
+
"provenance": "manual_demo",
|
| 110 |
+
"source_url": null,
|
| 111 |
+
"article_count": null,
|
| 112 |
+
"label_counts": null,
|
| 113 |
+
"notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
|
| 114 |
+
},
|
| 115 |
+
"The Guardian": {
|
| 116 |
+
"bias": "Center-Left",
|
| 117 |
+
"provenance": "manual_demo",
|
| 118 |
+
"source_url": null,
|
| 119 |
+
"article_count": null,
|
| 120 |
+
"label_counts": null,
|
| 121 |
+
"notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
|
| 122 |
+
},
|
| 123 |
+
"Techdirt": {
|
| 124 |
+
"bias": "Center-Left",
|
| 125 |
+
"provenance": "manual_demo",
|
| 126 |
+
"source_url": null,
|
| 127 |
+
"article_count": null,
|
| 128 |
+
"label_counts": null,
|
| 129 |
+
"notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
|
| 130 |
+
},
|
| 131 |
+
"Vox": {
|
| 132 |
+
"bias": "Center-Left",
|
| 133 |
+
"provenance": "manual_demo",
|
| 134 |
+
"source_url": null,
|
| 135 |
+
"article_count": null,
|
| 136 |
+
"label_counts": null,
|
| 137 |
+
"notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
|
| 138 |
+
},
|
| 139 |
+
"Wired": {
|
| 140 |
+
"bias": "Center-Left",
|
| 141 |
+
"provenance": "manual_demo",
|
| 142 |
+
"source_url": null,
|
| 143 |
+
"article_count": null,
|
| 144 |
+
"label_counts": null,
|
| 145 |
+
"notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
|
| 146 |
+
},
|
| 147 |
+
"Al Jazeera English": {
|
| 148 |
+
"bias": "Left",
|
| 149 |
+
"provenance": "manual_demo",
|
| 150 |
+
"source_url": null,
|
| 151 |
+
"article_count": null,
|
| 152 |
+
"label_counts": null,
|
| 153 |
+
"notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
|
| 154 |
+
},
|
| 155 |
+
"Jezebel": {
|
| 156 |
+
"bias": "Left",
|
| 157 |
+
"provenance": "manual_demo",
|
| 158 |
+
"source_url": null,
|
| 159 |
+
"article_count": null,
|
| 160 |
+
"label_counts": null,
|
| 161 |
+
"notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
|
| 162 |
+
},
|
| 163 |
+
"Gizmodo.com": {
|
| 164 |
+
"bias": "Left",
|
| 165 |
+
"provenance": "manual_demo",
|
| 166 |
+
"source_url": null,
|
| 167 |
+
"article_count": null,
|
| 168 |
+
"label_counts": null,
|
| 169 |
+
"notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
|
| 170 |
+
},
|
| 171 |
+
"Gothamist": {
|
| 172 |
+
"bias": "Left",
|
| 173 |
+
"provenance": "manual_demo",
|
| 174 |
+
"source_url": null,
|
| 175 |
+
"article_count": null,
|
| 176 |
+
"label_counts": null,
|
| 177 |
+
"notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
|
| 178 |
+
},
|
| 179 |
+
"The Intercept": {
|
| 180 |
+
"bias": "Left",
|
| 181 |
+
"provenance": "manual_demo",
|
| 182 |
+
"source_url": null,
|
| 183 |
+
"article_count": null,
|
| 184 |
+
"label_counts": null,
|
| 185 |
+
"notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
|
| 186 |
+
}
|
| 187 |
+
},
|
| 188 |
+
"aliases": {
|
| 189 |
+
"ap news": "Associated Press",
|
| 190 |
+
"associated press": "Associated Press",
|
| 191 |
+
"bbc": "BBC News",
|
| 192 |
+
"bbc news": "BBC News",
|
| 193 |
+
"fox": "Fox News",
|
| 194 |
+
"fox news": "Fox News",
|
| 195 |
+
"gizmodo": "Gizmodo.com",
|
| 196 |
+
"npr": "NPR",
|
| 197 |
+
"reuters": "Reuters",
|
| 198 |
+
"the guardian": "The Guardian",
|
| 199 |
+
"wired": "Wired",
|
| 200 |
+
"yahoo entertainment": "Yahoo Entertainment"
|
| 201 |
+
}
|
| 202 |
+
}
|
src/db/__init__.py
ADDED
|
File without changes
|
src/db/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (127 Bytes). View file
|
|
|
src/db/__pycache__/vector_store.cpython-313.pyc
ADDED
|
Binary file (5.81 kB). View file
|
|
|
src/db/vector_store.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import hashlib
|
| 2 |
+
import chromadb
|
| 3 |
+
from sentence_transformers import SentenceTransformer
|
| 4 |
+
from src.config import CHROMA_DB_PATH, HF_TOKEN
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
CHROMA_DB_PATH.mkdir(parents=True, exist_ok=True)
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class NewsVectorStore:
|
| 11 |
+
_model = None
|
| 12 |
+
|
| 13 |
+
def __init__(self, collection_name = "news_articles"):
|
| 14 |
+
print(f"Initializing ChromaDB at {CHROMA_DB_PATH}...")
|
| 15 |
+
self.client = chromadb.PersistentClient(path=str(CHROMA_DB_PATH))
|
| 16 |
+
self.collection = self.client.get_or_create_collection(
|
| 17 |
+
name=collection_name,
|
| 18 |
+
metadata={"hnsw:space": "cosine"}
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
if NewsVectorStore._model is None:
|
| 22 |
+
print("Loading embedding model (this takes a few seconds)...")
|
| 23 |
+
NewsVectorStore._model = SentenceTransformer(
|
| 24 |
+
'all-MiniLM-L6-v2',
|
| 25 |
+
token=HF_TOKEN,
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
self.embedding_model = NewsVectorStore._model
|
| 29 |
+
print("ChromaDB initialized and embedding model loaded.")
|
| 30 |
+
|
| 31 |
+
def store_articles(self, articles_data):
|
| 32 |
+
"""
|
| 33 |
+
Expects a list of dictionaries from NewsAPI.
|
| 34 |
+
"""
|
| 35 |
+
if not articles_data:
|
| 36 |
+
print("No articles to store.")
|
| 37 |
+
return
|
| 38 |
+
documents = []
|
| 39 |
+
metadatas = []
|
| 40 |
+
ids = []
|
| 41 |
+
|
| 42 |
+
for article in articles_data:
|
| 43 |
+
url = article.get('url')
|
| 44 |
+
if not url:
|
| 45 |
+
continue
|
| 46 |
+
|
| 47 |
+
title = article.get('title') or ""
|
| 48 |
+
desc = article.get('description') or ""
|
| 49 |
+
content = article.get("content") or ""
|
| 50 |
+
text_to_embed = f"{title}. {desc}. {content}"
|
| 51 |
+
|
| 52 |
+
if len(text_to_embed.strip()) > 5:
|
| 53 |
+
documents.append(text_to_embed)
|
| 54 |
+
|
| 55 |
+
# Store metadata so we can display it later in the UI
|
| 56 |
+
metadatas.append({
|
| 57 |
+
"source": article.get('source', {}).get('name', 'Unknown'),
|
| 58 |
+
"url": url,
|
| 59 |
+
"publishedAt": article.get('publishedAt', ''),
|
| 60 |
+
"title": article.get('title') or "",
|
| 61 |
+
"description": article.get('description') or ""
|
| 62 |
+
})
|
| 63 |
+
doc_id = hashlib.md5(url.encode()).hexdigest()
|
| 64 |
+
ids.append(doc_id)
|
| 65 |
+
|
| 66 |
+
if not documents:
|
| 67 |
+
print("No valid documents to store.")
|
| 68 |
+
return
|
| 69 |
+
|
| 70 |
+
# Generate embeddings
|
| 71 |
+
print(f"Generating embeddings for {len(documents)} articles...")
|
| 72 |
+
embeddings = self.embedding_model.encode(documents,batch_size=32).tolist()
|
| 73 |
+
|
| 74 |
+
# Insert into ChromaDB
|
| 75 |
+
self.collection.upsert(
|
| 76 |
+
embeddings=embeddings,
|
| 77 |
+
documents=documents,
|
| 78 |
+
metadatas=metadatas,
|
| 79 |
+
ids=ids
|
| 80 |
+
)
|
| 81 |
+
print(f"Successfully stored {len(documents)} articles in ChromaDB!")
|
| 82 |
+
|
| 83 |
+
def query(self, topic: str, top_k: int = 10) -> list[dict]:
|
| 84 |
+
"""
|
| 85 |
+
Embed the query topic and retrieve the top-k most similar articles.
|
| 86 |
+
"""
|
| 87 |
+
|
| 88 |
+
print(f"querying chromaDB for the topic: '{topic}'")
|
| 89 |
+
query_embedding = self.embedding_model.encode([topic]).tolist()
|
| 90 |
+
results = self.collection.query(
|
| 91 |
+
query_embeddings=query_embedding,
|
| 92 |
+
n_results=top_k,
|
| 93 |
+
include=["documents", "metadatas", "distances"]
|
| 94 |
+
)
|
| 95 |
+
articles = []
|
| 96 |
+
for doc, meta, dist in zip(
|
| 97 |
+
results["documents"][0],
|
| 98 |
+
results["metadatas"][0],
|
| 99 |
+
results["distances"][0]
|
| 100 |
+
):
|
| 101 |
+
articles.append({
|
| 102 |
+
"text": doc,
|
| 103 |
+
"source": meta.get("source", "Unknown"),
|
| 104 |
+
"url": meta.get("url", ""),
|
| 105 |
+
"publishedAt": meta.get("publishedAt", ""),
|
| 106 |
+
"similarity_score": round(1 - dist, 4),
|
| 107 |
+
"title": meta.get("title", ""),
|
| 108 |
+
"description": meta.get("description", ""),
|
| 109 |
+
})
|
| 110 |
+
|
| 111 |
+
print(f"Retrieved {len(articles)} articles.")
|
| 112 |
+
return articles
|
| 113 |
+
|
| 114 |
+
if __name__ == "__main__":
|
| 115 |
+
db = NewsVectorStore()
|
| 116 |
+
print(f"Total documents in collection: {db.collection.count()}")
|
| 117 |
+
results = db.collection.get()
|
| 118 |
+
urls = [m.get("url") for m in results["metadatas"]]
|
| 119 |
+
for url in urls:
|
| 120 |
+
print(url)
|
src/ingestion/__init__.py
ADDED
|
File without changes
|
src/ingestion/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (134 Bytes). View file
|
|
|
src/ingestion/__pycache__/newsapi_client.cpython-313.pyc
ADDED
|
Binary file (2.11 kB). View file
|
|
|
src/ingestion/newsapi_client.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from src.db.vector_store import NewsVectorStore
|
| 2 |
+
from newsapi import NewsApiClient
|
| 3 |
+
from src.config import NEWS_API_KEY
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def fetch_news(topic="AI regulation", lang="en", page_size=10):
|
| 7 |
+
if not NEWS_API_KEY:
|
| 8 |
+
raise RuntimeError("NEWSAPI_KEY is not configured. Add it to .env before using /ingest.")
|
| 9 |
+
|
| 10 |
+
news_instance = NewsApiClient(api_key=NEWS_API_KEY)
|
| 11 |
+
try:
|
| 12 |
+
print("Fetching latest articles...")
|
| 13 |
+
response = news_instance.get_everything(q=topic, language=lang, sort_by='relevancy', page_size=page_size)
|
| 14 |
+
|
| 15 |
+
if response['status'] == 'ok':
|
| 16 |
+
articles = response['articles']
|
| 17 |
+
if not articles:
|
| 18 |
+
print("No articles found.")
|
| 19 |
+
return
|
| 20 |
+
|
| 21 |
+
print(f"Successfully fetched {len(articles)} articles.")
|
| 22 |
+
print("-" * 40)
|
| 23 |
+
|
| 24 |
+
return articles
|
| 25 |
+
else:
|
| 26 |
+
print(f"API Error: {response.get('message', 'Unknown error')}")
|
| 27 |
+
return []
|
| 28 |
+
|
| 29 |
+
except Exception as e:
|
| 30 |
+
print(f"Pipeline failed: {str(e)}")
|
| 31 |
+
return []
|
| 32 |
+
|
| 33 |
+
def run_pipeline():
|
| 34 |
+
print("Fetching articles...")
|
| 35 |
+
articles = fetch_news()
|
| 36 |
+
|
| 37 |
+
if not articles:
|
| 38 |
+
print("No articles found.")
|
| 39 |
+
return
|
| 40 |
+
|
| 41 |
+
print(f"Fetched {len(articles)} articles.")
|
| 42 |
+
|
| 43 |
+
db = NewsVectorStore()
|
| 44 |
+
db.store_articles(articles)
|
| 45 |
+
|
| 46 |
+
print("Pipeline complete.")
|
| 47 |
+
|
| 48 |
+
if __name__ == "__main__":
|
| 49 |
+
run_pipeline()
|
src/models/__pycache__/dataset_prep.cpython-313.pyc
ADDED
|
Binary file (1.01 kB). View file
|
|
|
src/models/__pycache__/test_inference.cpython-313.pyc
ADDED
|
Binary file (5.48 kB). View file
|
|
|
src/models/__pycache__/train_model.cpython-313.pyc
ADDED
|
Binary file (4.4 kB). View file
|
|
|
src/models/dataset_prep.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from datasets import load_dataset
|
| 3 |
+
from collections import Counter
|
| 4 |
+
from src.config import HF_ENDPOINT, HF_TOKEN
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
if HF_ENDPOINT:
|
| 8 |
+
os.environ["HF_ENDPOINT"] = HF_ENDPOINT
|
| 9 |
+
|
| 10 |
+
def fetch_and_inspect_data():
|
| 11 |
+
try:
|
| 12 |
+
dataset = load_dataset("mediabiasgroup/BABE", token=HF_TOKEN)
|
| 13 |
+
print(Counter(dataset["train"]["label"]))
|
| 14 |
+
print(Counter(dataset["train"]["label_opinion"]))
|
| 15 |
+
except Exception as e:
|
| 16 |
+
print(f"Failed to load dataset: {e}")
|
| 17 |
+
|
| 18 |
+
if __name__ == "__main__":
|
| 19 |
+
fetch_and_inspect_data()
|
src/models/test_inference.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import torch
|
| 3 |
+
from transformers import RobertaTokenizer, RobertaForSequenceClassification
|
| 4 |
+
import torch.nn.functional as F
|
| 5 |
+
from src.config import BIAS_MODEL_PATH, HF_ENDPOINT, HF_TOKEN
|
| 6 |
+
|
| 7 |
+
if HF_ENDPOINT:
|
| 8 |
+
os.environ["HF_ENDPOINT"] = HF_ENDPOINT
|
| 9 |
+
|
| 10 |
+
class BiasPredictor:
|
| 11 |
+
def __init__(self, model_dir=BIAS_MODEL_PATH, base_model_name="roberta-base"):
|
| 12 |
+
|
| 13 |
+
print("Loading model and tokenizer once...")
|
| 14 |
+
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 15 |
+
self.tokenizer = RobertaTokenizer.from_pretrained(str(model_dir), token=HF_TOKEN)
|
| 16 |
+
self.model = RobertaForSequenceClassification.from_pretrained(str(model_dir), token=HF_TOKEN)
|
| 17 |
+
self.model.to(self.device)
|
| 18 |
+
self.model.eval()
|
| 19 |
+
print("\n--- CLASSIFIER PARAM CHECK ---")
|
| 20 |
+
for name, param in self.model.named_parameters():
|
| 21 |
+
if "classifier" in name:
|
| 22 |
+
print(name, param.requires_grad, param.data.mean().item())
|
| 23 |
+
print("--- END CHECK ---\n")
|
| 24 |
+
|
| 25 |
+
self.label_map = {
|
| 26 |
+
0: "Not Biased",
|
| 27 |
+
1: "Biased"
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
def predict(self, text):
|
| 31 |
+
inputs = self.tokenizer(
|
| 32 |
+
text,
|
| 33 |
+
return_tensors="pt",
|
| 34 |
+
truncation=True,
|
| 35 |
+
max_length=128,
|
| 36 |
+
padding=True
|
| 37 |
+
).to(self.device)
|
| 38 |
+
with torch.no_grad():
|
| 39 |
+
outputs = self.model(**inputs)
|
| 40 |
+
logits = outputs.logits
|
| 41 |
+
|
| 42 |
+
probs = F.softmax(logits, dim=-1)
|
| 43 |
+
predicted_class_id = probs.argmax().item()
|
| 44 |
+
confidence = probs[0][predicted_class_id].item()
|
| 45 |
+
|
| 46 |
+
return {
|
| 47 |
+
"text": text,
|
| 48 |
+
"class_id": predicted_class_id,
|
| 49 |
+
"label": self.label_map.get(predicted_class_id, "Unknown"),
|
| 50 |
+
"confidence": confidence,
|
| 51 |
+
"probabilities": probs[0].tolist()
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
def predict_batch(self, texts: list[str]) -> list[dict]:
|
| 55 |
+
inputs = self.tokenizer(
|
| 56 |
+
texts,
|
| 57 |
+
return_tensors="pt",
|
| 58 |
+
truncation=True,
|
| 59 |
+
max_length=128,
|
| 60 |
+
padding=True
|
| 61 |
+
).to(self.device)
|
| 62 |
+
|
| 63 |
+
with torch.no_grad():
|
| 64 |
+
outputs = self.model(**inputs)
|
| 65 |
+
logits = outputs.logits
|
| 66 |
+
probs = F.softmax(logits, dim=-1)
|
| 67 |
+
|
| 68 |
+
results = []
|
| 69 |
+
for i, text in enumerate(texts):
|
| 70 |
+
predicted_class_id = probs[i].argmax().item()
|
| 71 |
+
confidence = probs[i][predicted_class_id].item()
|
| 72 |
+
results.append({
|
| 73 |
+
"text": text,
|
| 74 |
+
"class_id": predicted_class_id,
|
| 75 |
+
"label": self.label_map.get(predicted_class_id, "Unknown"),
|
| 76 |
+
"confidence": confidence,
|
| 77 |
+
"probabilities": probs[i].tolist()
|
| 78 |
+
})
|
| 79 |
+
|
| 80 |
+
return results
|
| 81 |
+
|
| 82 |
+
if __name__ == "__main__":
|
| 83 |
+
predictor = BiasPredictor()
|
| 84 |
+
|
| 85 |
+
texts = [
|
| 86 |
+
"The government brutally crushed the peaceful protesters.",
|
| 87 |
+
"The government deployed police officers to the protest site.",
|
| 88 |
+
"Scientists warn of accelerating climate change impacts.",
|
| 89 |
+
"Climate alarmists continue pushing their radical agenda."
|
| 90 |
+
]
|
| 91 |
+
|
| 92 |
+
print("\n--- BATCH TEST ---")
|
| 93 |
+
results = predictor.predict_batch(texts)
|
| 94 |
+
for r in results:
|
| 95 |
+
print(f"[{r['label']}] ({r['confidence']:.4f}) {r['text'][:60]}")
|
| 96 |
+
|
| 97 |
+
print("\n ------- Single pass test for each text seprately ----------")
|
| 98 |
+
for text in [
|
| 99 |
+
"The government brutally crushed the peaceful protesters.",
|
| 100 |
+
"The government deployed police officers to the protest site.",
|
| 101 |
+
"Scientists warn of accelerating climate change impacts.",
|
| 102 |
+
"Climate alarmists continue pushing their radical agenda."
|
| 103 |
+
]:
|
| 104 |
+
r = predictor.predict(text)
|
| 105 |
+
print(f"[{r['label']}] ({r['confidence']:.4f}) {r['text'][:60]}")
|
src/models/train_model.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from datasets import load_dataset
|
| 3 |
+
from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments
|
| 4 |
+
from peft import LoraConfig, get_peft_model, TaskType
|
| 5 |
+
from sklearn.metrics import accuracy_score, f1_score
|
| 6 |
+
import torch
|
| 7 |
+
import numpy as np
|
| 8 |
+
from transformers import set_seed
|
| 9 |
+
from transformers import DataCollatorWithPadding
|
| 10 |
+
from datetime import datetime
|
| 11 |
+
from src.config import HF_ENDPOINT, HF_TOKEN, MODEL_DIR
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
if HF_ENDPOINT:
|
| 15 |
+
os.environ["HF_ENDPOINT"] = HF_ENDPOINT
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
set_seed(42)
|
| 19 |
+
np.random.seed(42)
|
| 20 |
+
torch.manual_seed(42)
|
| 21 |
+
|
| 22 |
+
output_dir=os.path.join(MODEL_DIR, "bias_checkpoints")
|
| 23 |
+
os.makedirs(MODEL_DIR, exist_ok=True)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def main():
|
| 27 |
+
|
| 28 |
+
dataset = load_dataset("mediabiasgroup/BABE", token=HF_TOKEN)
|
| 29 |
+
dataset = dataset["train"].train_test_split(test_size=0.2, seed=42)
|
| 30 |
+
model_name = "roberta-base"
|
| 31 |
+
tokenizer = RobertaTokenizer.from_pretrained(model_name, token=HF_TOKEN)
|
| 32 |
+
|
| 33 |
+
def collapse_labels(example):
|
| 34 |
+
old = example["label"]
|
| 35 |
+
|
| 36 |
+
if old in [0, 1]:
|
| 37 |
+
example["label"] = 0 # Right
|
| 38 |
+
elif old == 2:
|
| 39 |
+
example["label"] = 1 # Center
|
| 40 |
+
else:
|
| 41 |
+
example["label"] = 2 # Left
|
| 42 |
+
|
| 43 |
+
return example
|
| 44 |
+
|
| 45 |
+
#dataset = dataset.map(collapse_labels)
|
| 46 |
+
|
| 47 |
+
def tokenize_function(examples):
|
| 48 |
+
return tokenizer(examples["text"], truncation=True, max_length=128)
|
| 49 |
+
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
|
| 50 |
+
tokenized_datasets = dataset.map(tokenize_function, batched = True)
|
| 51 |
+
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
|
| 52 |
+
tokenized_datasets.set_format("torch", columns=["input_ids", "attention_mask", "labels"])
|
| 53 |
+
|
| 54 |
+
model = RobertaForSequenceClassification.from_pretrained(
|
| 55 |
+
model_name,
|
| 56 |
+
num_labels=2,
|
| 57 |
+
token=HF_TOKEN,
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
peft_config = LoraConfig(
|
| 61 |
+
task_type=TaskType.SEQ_CLS,
|
| 62 |
+
r=8,
|
| 63 |
+
lora_alpha=32,
|
| 64 |
+
lora_dropout=0.1,
|
| 65 |
+
target_modules=["query", "value"]
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
model = get_peft_model(model, peft_config)
|
| 69 |
+
model.print_trainable_parameters()
|
| 70 |
+
|
| 71 |
+
training_args = TrainingArguments(
|
| 72 |
+
output_dir=output_dir,
|
| 73 |
+
learning_rate=2e-4,
|
| 74 |
+
per_device_train_batch_size=8,
|
| 75 |
+
per_device_eval_batch_size=8,
|
| 76 |
+
num_train_epochs=3,
|
| 77 |
+
eval_strategy="epoch",
|
| 78 |
+
save_strategy="epoch",
|
| 79 |
+
logging_steps=10,
|
| 80 |
+
report_to="none"
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
+
def compute_metrics(eval_pred):
|
| 84 |
+
logits, labels = eval_pred
|
| 85 |
+
preds = np.argmax(logits, axis=1)
|
| 86 |
+
|
| 87 |
+
return {
|
| 88 |
+
"accuracy": accuracy_score(labels, preds),
|
| 89 |
+
"f1_weighted": f1_score(labels, preds, average="weighted")
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
trainer = Trainer(
|
| 94 |
+
model=model,
|
| 95 |
+
args=training_args,
|
| 96 |
+
train_dataset=tokenized_datasets["train"],
|
| 97 |
+
eval_dataset=tokenized_datasets["test"],
|
| 98 |
+
compute_metrics=compute_metrics,
|
| 99 |
+
data_collator=data_collator
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
trainer.train()
|
| 103 |
+
|
| 104 |
+
# CRITICAL FIX
|
| 105 |
+
model = model.merge_and_unload()
|
| 106 |
+
|
| 107 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 108 |
+
save_path = os.path.join(MODEL_DIR, f"bias_lora_{timestamp}")
|
| 109 |
+
|
| 110 |
+
model.save_pretrained(save_path)
|
| 111 |
+
tokenizer.save_pretrained(save_path)
|
| 112 |
+
|
| 113 |
+
if __name__ == "__main__":
|
| 114 |
+
main()
|
src/ui/__init__.py
ADDED
|
File without changes
|
src/ui/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (127 Bytes). View file
|
|
|
src/ui/__pycache__/app.cpython-313.pyc
ADDED
|
Binary file (20.5 kB). View file
|
|
|
src/ui/app.py
ADDED
|
@@ -0,0 +1,518 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from collections import defaultdict
|
| 2 |
+
from html import escape
|
| 3 |
+
|
| 4 |
+
import streamlit as st
|
| 5 |
+
|
| 6 |
+
from src.ui.components.article_card import inject_article_card_styles, render_article_card
|
| 7 |
+
from src.ui.components.charts import build_bias_distribution_chart, build_lean_bias_chart
|
| 8 |
+
from src.ui.services.api_client import NewsLensClient
|
| 9 |
+
from src.ui.services.api_client import DirectPipelineClient
|
| 10 |
+
|
| 11 |
+
MODEL_EVAL = {
|
| 12 |
+
"eval_accuracy": 0.8544,
|
| 13 |
+
"eval_f1_weighted": 0.8546,
|
| 14 |
+
"eval_loss": 0.3933,
|
| 15 |
+
"train_loss": 0.3888,
|
| 16 |
+
"epochs": 3,
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
st.set_page_config(
|
| 21 |
+
page_title="NewsLens",
|
| 22 |
+
layout="wide",
|
| 23 |
+
initial_sidebar_state="expanded",
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def inject_styles() -> None:
|
| 28 |
+
st.markdown(
|
| 29 |
+
"""
|
| 30 |
+
<style>
|
| 31 |
+
:root {
|
| 32 |
+
--nl-ink: #15202b;
|
| 33 |
+
--nl-muted: #64748b;
|
| 34 |
+
--nl-line: #d8dee9;
|
| 35 |
+
--nl-panel: #ffffff;
|
| 36 |
+
--nl-soft: #f6f8fb;
|
| 37 |
+
--nl-blue: #2457c5;
|
| 38 |
+
--nl-teal: #087f8c;
|
| 39 |
+
--nl-red: #c24138;
|
| 40 |
+
--nl-green: #247857;
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
.block-container {
|
| 44 |
+
padding-top: 1.4rem;
|
| 45 |
+
padding-bottom: 2rem;
|
| 46 |
+
max-width: 1240px;
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
[data-testid="stSidebar"] {
|
| 50 |
+
background: #f7f9fc;
|
| 51 |
+
border-right: 1px solid var(--nl-line);
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
[data-testid="stSidebar"] h1,
|
| 55 |
+
[data-testid="stSidebar"] h2,
|
| 56 |
+
[data-testid="stSidebar"] h3 {
|
| 57 |
+
color: var(--nl-ink);
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
h1, h2, h3 {
|
| 61 |
+
letter-spacing: 0;
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
.nl-topbar {
|
| 65 |
+
border-bottom: 1px solid var(--nl-line);
|
| 66 |
+
padding: 0 0 1rem 0;
|
| 67 |
+
margin-bottom: 1.2rem;
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
.nl-kicker {
|
| 71 |
+
color: var(--nl-teal);
|
| 72 |
+
font-size: 0.78rem;
|
| 73 |
+
font-weight: 800;
|
| 74 |
+
letter-spacing: 0.08em;
|
| 75 |
+
text-transform: uppercase;
|
| 76 |
+
margin-bottom: 0.25rem;
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
.nl-title {
|
| 80 |
+
color: var(--nl-ink);
|
| 81 |
+
font-size: 2.25rem;
|
| 82 |
+
font-weight: 800;
|
| 83 |
+
line-height: 1.1;
|
| 84 |
+
margin: 0;
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
.nl-subtitle {
|
| 88 |
+
color: var(--nl-muted);
|
| 89 |
+
max-width: 780px;
|
| 90 |
+
margin-top: 0.55rem;
|
| 91 |
+
font-size: 1rem;
|
| 92 |
+
line-height: 1.55;
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
.nl-empty {
|
| 96 |
+
background: linear-gradient(135deg, #f7f9fc 0%, #eef6f2 100%);
|
| 97 |
+
border: 1px solid var(--nl-line);
|
| 98 |
+
border-radius: 8px;
|
| 99 |
+
padding: 2.2rem;
|
| 100 |
+
margin-top: 1rem;
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
.nl-empty h3 {
|
| 104 |
+
color: var(--nl-ink);
|
| 105 |
+
margin: 0 0 0.5rem 0;
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
.nl-empty p {
|
| 109 |
+
color: var(--nl-muted);
|
| 110 |
+
margin: 0;
|
| 111 |
+
line-height: 1.6;
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
.nl-section-heading {
|
| 115 |
+
color: var(--nl-ink);
|
| 116 |
+
font-size: 1.05rem;
|
| 117 |
+
font-weight: 800;
|
| 118 |
+
margin: 1.1rem 0 0.45rem 0;
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
.nl-source-heading {
|
| 122 |
+
border-top: 1px solid var(--nl-line);
|
| 123 |
+
color: var(--nl-ink);
|
| 124 |
+
display: flex;
|
| 125 |
+
justify-content: space-between;
|
| 126 |
+
align-items: center;
|
| 127 |
+
gap: 1rem;
|
| 128 |
+
padding-top: 1rem;
|
| 129 |
+
margin: 1.1rem 0 0.5rem 0;
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
.nl-source-heading h3 {
|
| 133 |
+
font-size: 1.05rem;
|
| 134 |
+
margin: 0;
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
.nl-source-meta {
|
| 138 |
+
color: var(--nl-muted);
|
| 139 |
+
font-size: 0.85rem;
|
| 140 |
+
white-space: nowrap;
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
.nl-insight {
|
| 144 |
+
border-left: 4px solid var(--nl-teal);
|
| 145 |
+
background: #f5fbfa;
|
| 146 |
+
padding: 0.9rem 1rem;
|
| 147 |
+
color: var(--nl-ink);
|
| 148 |
+
margin: 0.25rem 0 0.9rem 0;
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
.nl-insight strong {
|
| 152 |
+
color: var(--nl-teal);
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
.nl-model-panel {
|
| 156 |
+
background: #f7f9fc;
|
| 157 |
+
border: 1px solid var(--nl-line);
|
| 158 |
+
border-radius: 8px;
|
| 159 |
+
padding: 1rem;
|
| 160 |
+
margin-top: 0.8rem;
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
.nl-model-panel h3 {
|
| 164 |
+
color: var(--nl-ink);
|
| 165 |
+
font-size: 1rem;
|
| 166 |
+
margin: 0 0 0.6rem 0;
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
.nl-model-grid {
|
| 170 |
+
display: grid;
|
| 171 |
+
gap: 0.65rem;
|
| 172 |
+
grid-template-columns: repeat(4, minmax(0, 1fr));
|
| 173 |
+
}
|
| 174 |
+
|
| 175 |
+
.nl-model-stat {
|
| 176 |
+
background: #ffffff;
|
| 177 |
+
border: 1px solid var(--nl-line);
|
| 178 |
+
border-radius: 8px;
|
| 179 |
+
padding: 0.75rem;
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
.nl-model-stat span {
|
| 183 |
+
color: var(--nl-muted);
|
| 184 |
+
display: block;
|
| 185 |
+
font-size: 0.72rem;
|
| 186 |
+
font-weight: 800;
|
| 187 |
+
letter-spacing: 0.04em;
|
| 188 |
+
text-transform: uppercase;
|
| 189 |
+
}
|
| 190 |
+
|
| 191 |
+
.nl-model-stat strong {
|
| 192 |
+
color: var(--nl-ink);
|
| 193 |
+
display: block;
|
| 194 |
+
font-size: 1.25rem;
|
| 195 |
+
margin-top: 0.2rem;
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
div[data-testid="stMetric"] {
|
| 199 |
+
background: var(--nl-panel);
|
| 200 |
+
border: 1px solid var(--nl-line);
|
| 201 |
+
border-radius: 8px;
|
| 202 |
+
padding: 0.85rem 1rem;
|
| 203 |
+
}
|
| 204 |
+
|
| 205 |
+
div[data-testid="stMetric"] label {
|
| 206 |
+
color: var(--nl-muted);
|
| 207 |
+
}
|
| 208 |
+
|
| 209 |
+
.stButton > button {
|
| 210 |
+
background: var(--nl-blue);
|
| 211 |
+
border: 1px solid var(--nl-blue);
|
| 212 |
+
color: #ffffff;
|
| 213 |
+
font-weight: 700;
|
| 214 |
+
min-height: 2.6rem;
|
| 215 |
+
width: 100%;
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
.stButton > button:hover {
|
| 219 |
+
background: #1f4dac;
|
| 220 |
+
border-color: #1f4dac;
|
| 221 |
+
color: #ffffff;
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
@media (max-width: 760px) {
|
| 225 |
+
.nl-title {
|
| 226 |
+
font-size: 1.75rem;
|
| 227 |
+
}
|
| 228 |
+
|
| 229 |
+
.nl-empty {
|
| 230 |
+
padding: 1.4rem;
|
| 231 |
+
}
|
| 232 |
+
|
| 233 |
+
.nl-source-heading {
|
| 234 |
+
align-items: flex-start;
|
| 235 |
+
flex-direction: column;
|
| 236 |
+
gap: 0.2rem;
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
.nl-model-grid {
|
| 240 |
+
grid-template-columns: repeat(2, minmax(0, 1fr));
|
| 241 |
+
}
|
| 242 |
+
}
|
| 243 |
+
</style>
|
| 244 |
+
""",
|
| 245 |
+
unsafe_allow_html=True,
|
| 246 |
+
)
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
def summarize_bias(summary: dict) -> tuple[int, int, float]:
|
| 250 |
+
total = sum(source.get("total", 0) for source in summary.values())
|
| 251 |
+
biased = sum(source.get("Biased", 0) for source in summary.values())
|
| 252 |
+
ratio = biased / total if total else 0
|
| 253 |
+
return total, biased, ratio
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
def insight_copy(ratio: float) -> str:
|
| 257 |
+
percent = int(round(ratio * 100))
|
| 258 |
+
if ratio >= 0.6:
|
| 259 |
+
return f"<strong>{percent}% biased coverage.</strong> The retrieved articles lean noticeably toward biased framing."
|
| 260 |
+
if ratio <= 0.4:
|
| 261 |
+
return f"<strong>{percent}% biased coverage.</strong> The article set is mostly neutral by the current model."
|
| 262 |
+
return f"<strong>{percent}% biased coverage.</strong> The result set is mixed and worth comparing source by source."
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
def render_model_panel() -> None:
|
| 266 |
+
st.markdown(
|
| 267 |
+
f"""
|
| 268 |
+
<div class="nl-model-panel">
|
| 269 |
+
<h3>Model Snapshot</h3>
|
| 270 |
+
<div class="nl-model-grid">
|
| 271 |
+
<div class="nl-model-stat">
|
| 272 |
+
<span>Eval Accuracy</span>
|
| 273 |
+
<strong>{MODEL_EVAL["eval_accuracy"]:.1%}</strong>
|
| 274 |
+
</div>
|
| 275 |
+
<div class="nl-model-stat">
|
| 276 |
+
<span>Weighted F1</span>
|
| 277 |
+
<strong>{MODEL_EVAL["eval_f1_weighted"]:.1%}</strong>
|
| 278 |
+
</div>
|
| 279 |
+
<div class="nl-model-stat">
|
| 280 |
+
<span>Eval Loss</span>
|
| 281 |
+
<strong>{MODEL_EVAL["eval_loss"]:.3f}</strong>
|
| 282 |
+
</div>
|
| 283 |
+
<div class="nl-model-stat">
|
| 284 |
+
<span>Epochs</span>
|
| 285 |
+
<strong>{MODEL_EVAL["epochs"]}</strong>
|
| 286 |
+
</div>
|
| 287 |
+
</div>
|
| 288 |
+
</div>
|
| 289 |
+
""",
|
| 290 |
+
unsafe_allow_html=True,
|
| 291 |
+
)
|
| 292 |
+
|
| 293 |
+
|
| 294 |
+
def render_empty_state() -> None:
|
| 295 |
+
st.markdown(
|
| 296 |
+
"""
|
| 297 |
+
<div class="nl-empty">
|
| 298 |
+
<h3>Run a topic analysis</h3>
|
| 299 |
+
<p>
|
| 300 |
+
Search a public issue, company, policy, or event to compare retrieved articles by source,
|
| 301 |
+
model label, and confidence. Results will appear as a dashboard with source-level evidence.
|
| 302 |
+
</p>
|
| 303 |
+
</div>
|
| 304 |
+
""",
|
| 305 |
+
unsafe_allow_html=True,
|
| 306 |
+
)
|
| 307 |
+
|
| 308 |
+
|
| 309 |
+
inject_styles()
|
| 310 |
+
inject_article_card_styles()
|
| 311 |
+
client = DirectPipelineClient()
|
| 312 |
+
|
| 313 |
+
if "analysis" not in st.session_state:
|
| 314 |
+
st.session_state.analysis = None
|
| 315 |
+
if "last_ingest" not in st.session_state:
|
| 316 |
+
st.session_state.last_ingest = None
|
| 317 |
+
|
| 318 |
+
with st.sidebar:
|
| 319 |
+
st.title("NewsLens")
|
| 320 |
+
st.caption("News bias analysis dashboard")
|
| 321 |
+
|
| 322 |
+
topic = st.text_input("Topic", value="climate change", max_chars=120)
|
| 323 |
+
top_k = st.slider("Articles to retrieve", min_value=1, max_value=20, value=10)
|
| 324 |
+
page_size = st.slider("Articles to ingest", min_value=5, max_value=50, value=15, step=5)
|
| 325 |
+
|
| 326 |
+
with st.expander("Advanced", expanded=False):
|
| 327 |
+
debug = st.checkbox("Show model internals", value=False)
|
| 328 |
+
|
| 329 |
+
ingest = st.button("Ingest latest articles")
|
| 330 |
+
analyze = st.button("Analyze topic", type="primary")
|
| 331 |
+
|
| 332 |
+
if st.session_state.last_ingest:
|
| 333 |
+
st.success(
|
| 334 |
+
f"Stored {st.session_state.last_ingest['articles_stored']} "
|
| 335 |
+
f"article(s) for {st.session_state.last_ingest['topic']}."
|
| 336 |
+
)
|
| 337 |
+
|
| 338 |
+
st.divider()
|
| 339 |
+
st.caption("Suggested searches")
|
| 340 |
+
sample_topics = ["climate change", "electric vehicles", "AI regulation", "public health"]
|
| 341 |
+
selected_sample = st.selectbox(
|
| 342 |
+
"Sample topics",
|
| 343 |
+
["Use typed topic"] + sample_topics,
|
| 344 |
+
label_visibility="collapsed",
|
| 345 |
+
)
|
| 346 |
+
|
| 347 |
+
if selected_sample != "Use typed topic":
|
| 348 |
+
topic = selected_sample
|
| 349 |
+
|
| 350 |
+
st.markdown(
|
| 351 |
+
"""
|
| 352 |
+
<div class="nl-topbar">
|
| 353 |
+
<div class="nl-kicker">Media Intelligence</div>
|
| 354 |
+
<h1 class="nl-title">NewsLens Bias Analyzer</h1>
|
| 355 |
+
<div class="nl-subtitle">
|
| 356 |
+
Compare how news sources frame a topic using retrieval, source metadata, and a text-bias classifier.
|
| 357 |
+
</div>
|
| 358 |
+
</div>
|
| 359 |
+
""",
|
| 360 |
+
unsafe_allow_html=True,
|
| 361 |
+
)
|
| 362 |
+
|
| 363 |
+
if analyze:
|
| 364 |
+
if not topic.strip():
|
| 365 |
+
st.error("Topic cannot be empty.")
|
| 366 |
+
st.stop()
|
| 367 |
+
|
| 368 |
+
with st.spinner("Analyzing coverage..."):
|
| 369 |
+
try:
|
| 370 |
+
st.session_state.analysis = client.analyze(topic.strip(), top_k)
|
| 371 |
+
except Exception as exc:
|
| 372 |
+
st.error(str(exc))
|
| 373 |
+
st.stop()
|
| 374 |
+
|
| 375 |
+
if ingest:
|
| 376 |
+
if not topic.strip():
|
| 377 |
+
st.error("Topic cannot be empty.")
|
| 378 |
+
st.stop()
|
| 379 |
+
|
| 380 |
+
with st.spinner("Fetching and indexing articles..."):
|
| 381 |
+
try:
|
| 382 |
+
st.session_state.last_ingest = client.ingest(topic.strip(), page_size)
|
| 383 |
+
st.session_state.analysis = client.analyze(topic.strip(), top_k)
|
| 384 |
+
except Exception as exc:
|
| 385 |
+
st.error(str(exc))
|
| 386 |
+
st.stop()
|
| 387 |
+
|
| 388 |
+
data = st.session_state.analysis
|
| 389 |
+
|
| 390 |
+
if data is None:
|
| 391 |
+
render_empty_state()
|
| 392 |
+
st.stop()
|
| 393 |
+
|
| 394 |
+
summary = data.get("summary", {})
|
| 395 |
+
results = data.get("results", [])
|
| 396 |
+
total, biased, bias_ratio = summarize_bias(summary)
|
| 397 |
+
neutral = max(total - biased, 0)
|
| 398 |
+
source_count = len(summary)
|
| 399 |
+
|
| 400 |
+
metric_cols = st.columns(4)
|
| 401 |
+
metric_cols[0].metric("Articles", total)
|
| 402 |
+
metric_cols[1].metric("Sources", source_count)
|
| 403 |
+
metric_cols[2].metric("Biased", biased)
|
| 404 |
+
metric_cols[3].metric("Not biased", neutral)
|
| 405 |
+
|
| 406 |
+
st.markdown(
|
| 407 |
+
f"""<div class="nl-insight">{insight_copy(bias_ratio)}</div>""",
|
| 408 |
+
unsafe_allow_html=True,
|
| 409 |
+
)
|
| 410 |
+
|
| 411 |
+
tab_overview, tab_articles, tab_model = st.tabs(["Overview", "Articles", "Model"])
|
| 412 |
+
|
| 413 |
+
with tab_overview:
|
| 414 |
+
st.markdown('<div class="nl-section-heading">Bias Distribution by Source</div>', unsafe_allow_html=True)
|
| 415 |
+
chart = build_bias_distribution_chart(summary)
|
| 416 |
+
if chart:
|
| 417 |
+
st.plotly_chart(chart, use_container_width=True, config={"displayModeBar": False})
|
| 418 |
+
else:
|
| 419 |
+
st.warning("No chart data available.")
|
| 420 |
+
|
| 421 |
+
st.markdown('<div class="nl-section-heading">Bias by Political Lean</div>', unsafe_allow_html=True)
|
| 422 |
+
st.caption("Are left-leaning or right-leaning sources more biased on this topic?")
|
| 423 |
+
lean_chart = build_lean_bias_chart(results)
|
| 424 |
+
if lean_chart:
|
| 425 |
+
st.plotly_chart(lean_chart, use_container_width=True, config={"displayModeBar": False})
|
| 426 |
+
else:
|
| 427 |
+
st.warning("Not enough source lean data.")
|
| 428 |
+
|
| 429 |
+
with tab_articles:
|
| 430 |
+
st.markdown('<div class="nl-section-heading">Evidence Articles</div>', unsafe_allow_html=True)
|
| 431 |
+
|
| 432 |
+
if not results:
|
| 433 |
+
st.warning("No articles found.")
|
| 434 |
+
else:
|
| 435 |
+
labels = sorted({article.get("text_label", "Unknown") for article in results})
|
| 436 |
+
leans = sorted({article.get("source_bias", "Unknown") for article in results})
|
| 437 |
+
|
| 438 |
+
filter_cols = st.columns([1, 1, 1])
|
| 439 |
+
selected_label = filter_cols[0].selectbox("Classification", ["All"] + labels)
|
| 440 |
+
selected_lean = filter_cols[1].selectbox("Source lean", ["All"] + leans)
|
| 441 |
+
sort_by = filter_cols[2].selectbox(
|
| 442 |
+
"Sort by",
|
| 443 |
+
["Confidence", "Similarity", "Source"],
|
| 444 |
+
)
|
| 445 |
+
|
| 446 |
+
filtered_results = results
|
| 447 |
+
if selected_label != "All":
|
| 448 |
+
filtered_results = [
|
| 449 |
+
article for article in filtered_results
|
| 450 |
+
if article.get("text_label", "Unknown") == selected_label
|
| 451 |
+
]
|
| 452 |
+
if selected_lean != "All":
|
| 453 |
+
filtered_results = [
|
| 454 |
+
article for article in filtered_results
|
| 455 |
+
if article.get("source_bias", "Unknown") == selected_lean
|
| 456 |
+
]
|
| 457 |
+
|
| 458 |
+
if sort_by == "Confidence":
|
| 459 |
+
filtered_results = sorted(
|
| 460 |
+
filtered_results,
|
| 461 |
+
key=lambda article: article.get("confidence", 0),
|
| 462 |
+
reverse=True,
|
| 463 |
+
)
|
| 464 |
+
elif sort_by == "Similarity":
|
| 465 |
+
filtered_results = sorted(
|
| 466 |
+
filtered_results,
|
| 467 |
+
key=lambda article: article.get("similarity_score", 0),
|
| 468 |
+
reverse=True,
|
| 469 |
+
)
|
| 470 |
+
else:
|
| 471 |
+
filtered_results = sorted(
|
| 472 |
+
filtered_results,
|
| 473 |
+
key=lambda article: article.get("source", "Unknown source"),
|
| 474 |
+
)
|
| 475 |
+
|
| 476 |
+
st.caption(f"Showing {len(filtered_results)} of {len(results)} retrieved articles.")
|
| 477 |
+
|
| 478 |
+
if not filtered_results:
|
| 479 |
+
st.warning("No articles match the selected filters.")
|
| 480 |
+
else:
|
| 481 |
+
grouped = defaultdict(list)
|
| 482 |
+
for article in filtered_results:
|
| 483 |
+
grouped[article.get("source", "Unknown source")].append(article)
|
| 484 |
+
|
| 485 |
+
for source, articles in grouped.items():
|
| 486 |
+
source_bias = articles[0].get("source_bias", "Unknown")
|
| 487 |
+
st.markdown(
|
| 488 |
+
f"""
|
| 489 |
+
<div class="nl-source-heading">
|
| 490 |
+
<h3>{escape(str(source))}</h3>
|
| 491 |
+
<div class="nl-source-meta">{escape(str(source_bias))} source bias | {len(articles)} article(s)</div>
|
| 492 |
+
</div>
|
| 493 |
+
""",
|
| 494 |
+
unsafe_allow_html=True,
|
| 495 |
+
)
|
| 496 |
+
for article in articles:
|
| 497 |
+
render_article_card(article, debug=debug)
|
| 498 |
+
|
| 499 |
+
with tab_model:
|
| 500 |
+
render_model_panel()
|
| 501 |
+
st.markdown('<div class="nl-section-heading">Training Run</div>', unsafe_allow_html=True)
|
| 502 |
+
st.write(
|
| 503 |
+
"RoBERTa was fine-tuned for binary text-bias classification with LoRA. "
|
| 504 |
+
"The best supplied run finished at 85.44% evaluation accuracy and 85.46% weighted F1."
|
| 505 |
+
)
|
| 506 |
+
st.dataframe(
|
| 507 |
+
[
|
| 508 |
+
{"Epoch": 1, "Eval loss": 0.3576, "Accuracy": 0.8432, "Weighted F1": 0.8434},
|
| 509 |
+
{"Epoch": 2, "Eval loss": 0.3656, "Accuracy": 0.8512, "Weighted F1": 0.8512},
|
| 510 |
+
{"Epoch": 3, "Eval loss": 0.3933, "Accuracy": 0.8544, "Weighted F1": 0.8546},
|
| 511 |
+
],
|
| 512 |
+
hide_index=True,
|
| 513 |
+
use_container_width=True,
|
| 514 |
+
)
|
| 515 |
+
st.info(
|
| 516 |
+
"Use these labels as decision support, not ground truth. Bias classification is sensitive "
|
| 517 |
+
"to dataset definitions, article excerpts, and source coverage."
|
| 518 |
+
)
|
src/ui/components/__init__.py
ADDED
|
File without changes
|
src/ui/components/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (138 Bytes). View file
|
|
|
src/ui/components/__pycache__/article_card.cpython-313.pyc
ADDED
|
Binary file (6.52 kB). View file
|
|
|
src/ui/components/__pycache__/charts.cpython-313.pyc
ADDED
|
Binary file (4.32 kB). View file
|
|
|
src/ui/components/article_card.py
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from html import escape
|
| 2 |
+
import streamlit as st
|
| 3 |
+
|
| 4 |
+
ARTICLE_CARD_STYLES = """
|
| 5 |
+
<style>
|
| 6 |
+
.nl-article-card {
|
| 7 |
+
background: #ffffff;
|
| 8 |
+
border: 1px solid #d8dee9;
|
| 9 |
+
border-radius: 8px;
|
| 10 |
+
padding: 1rem;
|
| 11 |
+
margin: 0.65rem 0 0.9rem 0;
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
.nl-article-header {
|
| 15 |
+
display: flex;
|
| 16 |
+
align-items: flex-start;
|
| 17 |
+
justify-content: space-between;
|
| 18 |
+
gap: 1rem;
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
.nl-article-source {
|
| 22 |
+
color: #64748b;
|
| 23 |
+
font-size: 0.78rem;
|
| 24 |
+
font-weight: 700;
|
| 25 |
+
letter-spacing: 0.04em;
|
| 26 |
+
margin-bottom: 0.25rem;
|
| 27 |
+
text-transform: uppercase;
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
.nl-article-card h4 {
|
| 31 |
+
color: #15202b;
|
| 32 |
+
font-size: 1rem;
|
| 33 |
+
line-height: 1.35;
|
| 34 |
+
margin: 0;
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
.nl-article-card p {
|
| 38 |
+
color: #475569;
|
| 39 |
+
line-height: 1.55;
|
| 40 |
+
margin: 0.65rem 0 0.8rem 0;
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
.nl-label {
|
| 44 |
+
border: 1px solid;
|
| 45 |
+
border-radius: 999px;
|
| 46 |
+
font-size: 0.75rem;
|
| 47 |
+
font-weight: 800;
|
| 48 |
+
padding: 0.25rem 0.55rem;
|
| 49 |
+
white-space: nowrap;
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
.nl-confidence-row {
|
| 53 |
+
color: #64748b;
|
| 54 |
+
display: flex;
|
| 55 |
+
justify-content: space-between;
|
| 56 |
+
font-size: 0.82rem;
|
| 57 |
+
margin-bottom: 0.3rem;
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
.nl-confidence-row strong {
|
| 61 |
+
color: #15202b;
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
.nl-confidence-track {
|
| 65 |
+
background: #eef2f7;
|
| 66 |
+
border-radius: 999px;
|
| 67 |
+
height: 0.45rem;
|
| 68 |
+
overflow: hidden;
|
| 69 |
+
width: 100%;
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
.nl-confidence-track div {
|
| 73 |
+
height: 100%;
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
.nl-read-link {
|
| 77 |
+
color: #2457c5;
|
| 78 |
+
display: inline-block;
|
| 79 |
+
font-weight: 800;
|
| 80 |
+
margin-top: 0.75rem;
|
| 81 |
+
text-decoration: none;
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
.nl-read-link:hover {
|
| 85 |
+
color: #1f4dac;
|
| 86 |
+
text-decoration: underline;
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
@media (max-width: 760px) {
|
| 90 |
+
.nl-article-header {
|
| 91 |
+
flex-direction: column;
|
| 92 |
+
gap: 0.5rem;
|
| 93 |
+
}
|
| 94 |
+
}
|
| 95 |
+
</style>
|
| 96 |
+
"""
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def inject_article_card_styles() -> None:
|
| 100 |
+
st.markdown(ARTICLE_CARD_STYLES, unsafe_allow_html=True)
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def _safe_text(value: object, fallback: str = "") -> str:
|
| 104 |
+
if value is None:
|
| 105 |
+
return fallback
|
| 106 |
+
text = str(value).strip()
|
| 107 |
+
return text or fallback
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
def _label_style(label: str) -> tuple[str, str]:
|
| 111 |
+
if label.lower() == "biased":
|
| 112 |
+
return "#c24138", "#fff4f2"
|
| 113 |
+
return "#247857", "#effaf5"
|
| 114 |
+
|
| 115 |
+
def smart_truncate(text, limit=80):
|
| 116 |
+
if len(text) <= limit:
|
| 117 |
+
return text
|
| 118 |
+
return text[:limit].rsplit(" ", 1)[0] + "..."
|
| 119 |
+
|
| 120 |
+
def render_article_card(article: dict, debug: bool = False) -> None:
|
| 121 |
+
label = _safe_text(article.get("text_label"), "Unknown")
|
| 122 |
+
confidence = float(article.get("confidence", 0) or 0)
|
| 123 |
+
source = _safe_text(article.get("source"), "Unknown source")
|
| 124 |
+
source_bias = _safe_text(article.get("source_bias"), "Unknown bias")
|
| 125 |
+
source_bias_provenance = _safe_text(article.get("source_bias_provenance"))
|
| 126 |
+
source_meta = f"{source} / {source_bias}"
|
| 127 |
+
if source_bias_provenance and source_bias_provenance != "manual_demo":
|
| 128 |
+
source_meta = f"{source_meta} / {source_bias_provenance}"
|
| 129 |
+
url = _safe_text(article.get("url"), "#")
|
| 130 |
+
|
| 131 |
+
description = _safe_text(article.get("description"))
|
| 132 |
+
fallback_text = _safe_text(article.get("text"))[:280]
|
| 133 |
+
excerpt = description or fallback_text or "No article excerpt was returned by the API."
|
| 134 |
+
title = _safe_text(article.get("title")) or smart_truncate(excerpt, 80)
|
| 135 |
+
|
| 136 |
+
accent, soft = _label_style(label)
|
| 137 |
+
confidence_pct = max(0, min(confidence, 1)) * 100
|
| 138 |
+
|
| 139 |
+
st.markdown(
|
| 140 |
+
f"""
|
| 141 |
+
<div class="nl-article-card">
|
| 142 |
+
<div class="nl-article-header">
|
| 143 |
+
<div>
|
| 144 |
+
<div class="nl-article-source">{escape(source_meta)}</div>
|
| 145 |
+
<h4>{escape(title)}</h4>
|
| 146 |
+
</div>
|
| 147 |
+
<span class="nl-label" style="color:{accent}; background:{soft}; border-color:{accent};">
|
| 148 |
+
{escape(label)}
|
| 149 |
+
</span>
|
| 150 |
+
</div>
|
| 151 |
+
<p>{escape(excerpt)}</p>
|
| 152 |
+
<div class="nl-confidence-row">
|
| 153 |
+
<span>Confidence</span>
|
| 154 |
+
<strong>{confidence:.2f}</strong>
|
| 155 |
+
</div>
|
| 156 |
+
<div class="nl-confidence-track">
|
| 157 |
+
<div style="width:{confidence_pct:.0f}%; background:{accent};"></div>
|
| 158 |
+
</div>
|
| 159 |
+
<a class="nl-read-link" href="{escape(url)}" target="_blank" rel="noopener noreferrer">
|
| 160 |
+
Read article
|
| 161 |
+
</a>
|
| 162 |
+
</div>
|
| 163 |
+
""",
|
| 164 |
+
unsafe_allow_html=True,
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
if debug:
|
| 168 |
+
with st.expander("Model internals", expanded=False):
|
| 169 |
+
if "similarity_score" in article:
|
| 170 |
+
st.caption(f"Similarity score: {article['similarity_score']:.4f}")
|
| 171 |
+
if "probabilities" in article:
|
| 172 |
+
st.json(article["probabilities"])
|
src/ui/components/charts.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import plotly.express as px
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def build_bias_distribution_chart(summary: dict):
|
| 6 |
+
rows = []
|
| 7 |
+
|
| 8 |
+
for source, stats in summary.items():
|
| 9 |
+
biased = stats.get("Biased", 0)
|
| 10 |
+
not_biased = stats.get("Not Biased", stats.get("Not_Biased", 0))
|
| 11 |
+
total = stats.get("total", biased + not_biased)
|
| 12 |
+
|
| 13 |
+
rows.append(
|
| 14 |
+
{
|
| 15 |
+
"Source": source,
|
| 16 |
+
"Biased": biased,
|
| 17 |
+
"Not biased": not_biased,
|
| 18 |
+
"Total": total,
|
| 19 |
+
}
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
df = pd.DataFrame(rows)
|
| 23 |
+
|
| 24 |
+
if df.empty:
|
| 25 |
+
return None
|
| 26 |
+
|
| 27 |
+
df = df.sort_values("Total", ascending=False)
|
| 28 |
+
df_melted = df.melt(
|
| 29 |
+
id_vars=["Source", "Total"],
|
| 30 |
+
value_vars=["Biased", "Not biased"],
|
| 31 |
+
var_name="Classification",
|
| 32 |
+
value_name="Articles",
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
fig = px.bar(
|
| 36 |
+
df_melted,
|
| 37 |
+
x="Source",
|
| 38 |
+
y="Articles",
|
| 39 |
+
color="Classification",
|
| 40 |
+
barmode="group",
|
| 41 |
+
text="Articles",
|
| 42 |
+
color_discrete_map={
|
| 43 |
+
"Biased": "#c24138",
|
| 44 |
+
"Not biased": "#247857",
|
| 45 |
+
},
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
fig.update_traces(
|
| 49 |
+
textposition="outside",
|
| 50 |
+
marker_line_width=0,
|
| 51 |
+
cliponaxis=False,
|
| 52 |
+
)
|
| 53 |
+
fig.update_layout(
|
| 54 |
+
height=430,
|
| 55 |
+
margin=dict(l=12, r=12, t=24, b=12),
|
| 56 |
+
paper_bgcolor="rgba(0,0,0,0)",
|
| 57 |
+
plot_bgcolor="rgba(0,0,0,0)",
|
| 58 |
+
bargap=0.26,
|
| 59 |
+
legend=dict(
|
| 60 |
+
orientation="h",
|
| 61 |
+
yanchor="bottom",
|
| 62 |
+
y=1.02,
|
| 63 |
+
xanchor="right",
|
| 64 |
+
x=1,
|
| 65 |
+
title=None,
|
| 66 |
+
),
|
| 67 |
+
xaxis=dict(
|
| 68 |
+
title=None,
|
| 69 |
+
tickangle=-20,
|
| 70 |
+
showgrid=False,
|
| 71 |
+
linecolor="#d8dee9",
|
| 72 |
+
),
|
| 73 |
+
yaxis=dict(
|
| 74 |
+
title="Articles",
|
| 75 |
+
gridcolor="#e8edf4",
|
| 76 |
+
zeroline=False,
|
| 77 |
+
),
|
| 78 |
+
font=dict(color="#15202b", family="Arial, sans-serif"),
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
return fig
|
| 82 |
+
|
| 83 |
+
def build_lean_bias_chart(results: list) -> object:
|
| 84 |
+
from collections import defaultdict
|
| 85 |
+
|
| 86 |
+
lean_counts = defaultdict(lambda: {"Biased": 0, "Not biased": 0})
|
| 87 |
+
|
| 88 |
+
for article in results:
|
| 89 |
+
lean = article.get("source_bias", "Unknown")
|
| 90 |
+
label = article.get("text_label", "Unknown")
|
| 91 |
+
if label == "Biased":
|
| 92 |
+
lean_counts[lean]["Biased"] += 1
|
| 93 |
+
elif label == "Not Biased":
|
| 94 |
+
lean_counts[lean]["Not biased"] += 1
|
| 95 |
+
|
| 96 |
+
rows = []
|
| 97 |
+
for lean, counts in lean_counts.items():
|
| 98 |
+
rows.append({
|
| 99 |
+
"Lean": lean,
|
| 100 |
+
"Biased": counts["Biased"],
|
| 101 |
+
"Not biased": counts["Not biased"],
|
| 102 |
+
})
|
| 103 |
+
|
| 104 |
+
df = pd.DataFrame(rows)
|
| 105 |
+
if df.empty:
|
| 106 |
+
return None
|
| 107 |
+
|
| 108 |
+
lean_order = ["Left", "Center-Left", "Center", "Center-Right", "Right", "Unknown"]
|
| 109 |
+
df["Lean"] = pd.Categorical(df["Lean"], categories=lean_order, ordered=True)
|
| 110 |
+
df = df.sort_values("Lean")
|
| 111 |
+
|
| 112 |
+
df_melted = df.melt(
|
| 113 |
+
id_vars="Lean",
|
| 114 |
+
value_vars=["Biased", "Not biased"],
|
| 115 |
+
var_name="Classification",
|
| 116 |
+
value_name="Articles",
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
fig = px.bar(
|
| 120 |
+
df_melted,
|
| 121 |
+
x="Lean",
|
| 122 |
+
y="Articles",
|
| 123 |
+
color="Classification",
|
| 124 |
+
barmode="group",
|
| 125 |
+
text="Articles",
|
| 126 |
+
color_discrete_map={"Biased": "#c24138", "Not biased": "#247857"},
|
| 127 |
+
)
|
| 128 |
+
|
| 129 |
+
fig.update_traces(textposition="outside", marker_line_width=0, cliponaxis=False)
|
| 130 |
+
fig.update_layout(
|
| 131 |
+
height=380,
|
| 132 |
+
margin=dict(l=12, r=12, t=24, b=12),
|
| 133 |
+
paper_bgcolor="rgba(0,0,0,0)",
|
| 134 |
+
plot_bgcolor="rgba(0,0,0,0)",
|
| 135 |
+
bargap=0.3,
|
| 136 |
+
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1, title=None),
|
| 137 |
+
xaxis=dict(title=None, showgrid=False, linecolor="#d8dee9"),
|
| 138 |
+
yaxis=dict(title="Articles", gridcolor="#e8edf4", zeroline=False),
|
| 139 |
+
font=dict(color="#15202b", family="Arial, sans-serif"),
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
return fig
|
src/ui/services/__init__.py
ADDED
|
File without changes
|
src/ui/services/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (136 Bytes). View file
|
|
|
src/ui/services/__pycache__/api_client.cpython-313.pyc
ADDED
|
Binary file (3.41 kB). View file
|
|
|
src/ui/services/api_client.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
from src.config import API_BASE_URL
|
| 3 |
+
|
| 4 |
+
class NewsLensClient:
|
| 5 |
+
def __init__(self, base_url: str = API_BASE_URL):
|
| 6 |
+
self.base_url = base_url
|
| 7 |
+
|
| 8 |
+
def analyze(self, topic: str, top_k: int = 10) -> dict:
|
| 9 |
+
url = f"{self.base_url}/analyze"
|
| 10 |
+
payload = {
|
| 11 |
+
"topic": topic,
|
| 12 |
+
"top_k": top_k
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
try:
|
| 16 |
+
response = requests.post(url, json=payload, timeout=30)
|
| 17 |
+
response.raise_for_status()
|
| 18 |
+
return response.json()
|
| 19 |
+
except requests.exceptions.RequestException as e:
|
| 20 |
+
raise RuntimeError(f"API request failed: {str(e)}")
|
| 21 |
+
|
| 22 |
+
def ingest(self, topic: str, page_size: int = 10) -> dict:
|
| 23 |
+
url = f"{self.base_url}/ingest"
|
| 24 |
+
payload = {
|
| 25 |
+
"topic": topic,
|
| 26 |
+
"page_size": page_size,
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
try:
|
| 30 |
+
response = requests.post(url, json=payload, timeout=45)
|
| 31 |
+
response.raise_for_status()
|
| 32 |
+
return response.json()
|
| 33 |
+
except requests.exceptions.RequestException as e:
|
| 34 |
+
raise RuntimeError(f"API request failed: {str(e)}")
|
| 35 |
+
|
| 36 |
+
class DirectPipelineClient:
|
| 37 |
+
def __init__(self):
|
| 38 |
+
from src.analysis.rag_pipeline import NewsAnalysisPipeline
|
| 39 |
+
self.pipeline = NewsAnalysisPipeline()
|
| 40 |
+
|
| 41 |
+
def analyze(self, topic: str, top_k: int = 10) -> dict:
|
| 42 |
+
return self.pipeline.analyze(topic, top_k)
|
| 43 |
+
|
| 44 |
+
def ingest(self, topic: str, page_size: int = 10) -> dict:
|
| 45 |
+
from src.ingestion.newsapi_client import fetch_news
|
| 46 |
+
articles = fetch_news(topic=topic, page_size=page_size)
|
| 47 |
+
self.pipeline.vector_store.store_articles(articles)
|
| 48 |
+
return {
|
| 49 |
+
"topic": topic,
|
| 50 |
+
"articles_fetched": len(articles),
|
| 51 |
+
"articles_stored": len(articles),
|
| 52 |
+
"status": "success"
|
| 53 |
+
}
|