Spaces:

Jitender20
/

newslens

Sleeping

App Files Files Community

Jitender20 commited on 11 days ago

Commit

208266a

1 Parent(s): 9bb094d

Add NewsLens Streamlit app

Browse files

Files changed (50) hide show

Dockerfile +3 -9
requirements.txt +16 -2
src/__init__.py +0 -0
src/__pycache__/__init__.cpython-313.pyc +0 -0
src/__pycache__/config.cpython-313.pyc +0 -0
src/analysis/__init__.py +0 -0
src/analysis/__pycache__/__init__.cpython-313.pyc +0 -0
src/analysis/__pycache__/rag_pipeline.cpython-313.pyc +0 -0
src/analysis/__pycache__/source_bias.cpython-313.pyc +0 -0
src/analysis/rag_pipeline.py +74 -0
src/analysis/source_bias.py +47 -0
src/api/__init__.py +0 -0
src/api/__pycache__/__init__.cpython-313.pyc +0 -0
src/api/__pycache__/main.cpython-313.pyc +0 -0
src/api/__pycache__/models.cpython-313.pyc +0 -0
src/api/__pycache__/routes.cpython-313.pyc +0 -0
src/api/main.py +29 -0
src/api/models.py +45 -0
src/api/routes.py +68 -0
src/config.py +35 -0
src/data/source_bias.generated.json +1136 -0
src/data/source_bias.json +202 -0
src/db/__init__.py +0 -0
src/db/__pycache__/__init__.cpython-313.pyc +0 -0
src/db/__pycache__/vector_store.cpython-313.pyc +0 -0
src/db/vector_store.py +120 -0
src/ingestion/__init__.py +0 -0
src/ingestion/__pycache__/__init__.cpython-313.pyc +0 -0
src/ingestion/__pycache__/newsapi_client.cpython-313.pyc +0 -0
src/ingestion/newsapi_client.py +49 -0
src/models/__pycache__/dataset_prep.cpython-313.pyc +0 -0
src/models/__pycache__/test_inference.cpython-313.pyc +0 -0
src/models/__pycache__/train_model.cpython-313.pyc +0 -0
src/models/dataset_prep.py +19 -0
src/models/test_inference.py +105 -0
src/models/train_model.py +114 -0
src/ui/__init__.py +0 -0
src/ui/__pycache__/__init__.cpython-313.pyc +0 -0
src/ui/__pycache__/app.cpython-313.pyc +0 -0
src/ui/app.py +518 -0
src/ui/components/__init__.py +0 -0
src/ui/components/__pycache__/__init__.cpython-313.pyc +0 -0
src/ui/components/__pycache__/article_card.cpython-313.pyc +0 -0
src/ui/components/__pycache__/charts.cpython-313.pyc +0 -0
src/ui/components/article_card.py +172 -0
src/ui/components/charts.py +142 -0
src/ui/services/__init__.py +0 -0
src/ui/services/__pycache__/__init__.cpython-313.pyc +0 -0
src/ui/services/__pycache__/api_client.cpython-313.pyc +0 -0
src/ui/services/api_client.py +53 -0

Dockerfile CHANGED Viewed

@@ -1,20 +1,14 @@
-FROM python:3.13.5-slim
 WORKDIR /app
 RUN apt-get update && apt-get install -y \
     build-essential \
     curl \
     git \
     && rm -rf /var/lib/apt/lists/*
 COPY requirements.txt ./
 COPY src/ ./src/
 RUN pip3 install -r requirements.txt
 EXPOSE 8501
 HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
-ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]

+FROM python:3.13-slim
 WORKDIR /app
 RUN apt-get update && apt-get install -y \
     build-essential \
     curl \
     git \
     && rm -rf /var/lib/apt/lists/*
 COPY requirements.txt ./
 COPY src/ ./src/
 RUN pip3 install -r requirements.txt
 EXPOSE 8501
+ENV PYTHONPATH=/app
 HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
+ENTRYPOINT ["streamlit", "run", "src/ui/app.py", "--server.port=8501", "--server.address=0.0.0.0"]

requirements.txt CHANGED Viewed

@@ -1,3 +1,17 @@
-altair
 pandas
-streamlit

+accelerate
+chromadb
+datasets
+fastapi
+newsapi-python
+numpy
 pandas
+peft
+plotly
+python-dotenv
+requests
+scikit-learn
+sentence-transformers
+streamlit
+torch
+transformers
+uvicorn

src/__init__.py ADDED Viewed

File without changes

src/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (124 Bytes). View file

src/__pycache__/config.cpython-313.pyc ADDED Viewed

Binary file (2.22 kB). View file

src/analysis/__init__.py ADDED Viewed

File without changes

src/analysis/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (133 Bytes). View file

src/analysis/__pycache__/rag_pipeline.cpython-313.pyc ADDED Viewed

Binary file (3.97 kB). View file

src/analysis/__pycache__/source_bias.cpython-313.pyc ADDED Viewed

Binary file (2.29 kB). View file

src/analysis/rag_pipeline.py ADDED Viewed

	@@ -0,0 +1,74 @@

+from src.db.vector_store import NewsVectorStore
+from src.models.test_inference import BiasPredictor
+from src.analysis.source_bias import get_source_bias, get_source_record
+from collections import defaultdict
+class NewsAnalysisPipeline:
+    def __init__(self):
+        print("Initializing NewsLens pipeline...")
+        self.vector_store = NewsVectorStore()
+        self.bias_predictor = BiasPredictor()
+        print("Pipeline ready.")
+    def analyze(self, topic: str, top_k: int = 10) -> dict:
+        articles = self.vector_store.query(topic, top_k=top_k)
+        if not articles:
+            return {"topic": topic, "results": [], "summary": {}}
+        results = []
+        texts = [article["text"] for article in articles]
+        predictions = self.bias_predictor.predict_batch(texts)
+        for article, prediction in zip(articles, predictions):
+            source_record = get_source_record(article["source"])
+            results.append({
+                "source": article["source"],
+                "source_bias": source_record["bias"],
+                "source_bias_provenance": source_record["provenance"],
+                "url": article["url"],
+                "title": article.get("title", ""),
+                "description": article.get("description", ""),
+                "publishedAt": article.get("publishedAt", ""),
+                "text": article["text"],
+                "text_label": prediction["label"],
+                "confidence": prediction["confidence"],
+                "probabilities": {
+                    "Not Biased": round(prediction["probabilities"][0], 4),
+                    "Biased": round(prediction["probabilities"][1], 4),
+                },
+                "similarity_score": article["similarity_score"]
+            })
+        # Aggregate per source
+        summary = defaultdict(lambda: {
+            "source_bias": "Unknown",
+            "Biased": 0,
+            "Not Biased": 0,
+            "total": 0
+        })
+        for r in results:
+            source = r["source"]
+            summary[source]["source_bias"] = r["source_bias"]
+            summary[source][r["text_label"]] += 1
+            summary[source]["total"] += 1
+        return {
+            "topic": topic,
+            "results": results,
+            "summary": dict(summary)
+        }
+if __name__ == "__main__":
+    pipeline = NewsAnalysisPipeline()
+    output = pipeline.analyze("climate change", top_k=10)
+    print(f"\n=== Results for: '{output['topic']}' ===")
+    for r in output["results"]:
+        print(f"[{r['text_label']}] ({r['confidence']:.2f}) | Source lean: {r['source_bias']} — {r['source']}: {r['text'][:80]}...")
+    print("\n=== Source Summary ===")
+    for source, counts in output["summary"].items():
+        print(f"{source} ({counts['source_bias']}): Biased={counts['Biased']}, Not Biased={counts['Not Biased']}, Total={counts['total']}")

src/analysis/source_bias.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import json
+from functools import lru_cache
+from pathlib import Path
+from typing import Any
+REGISTRY_PATH = Path(__file__).resolve().parents[1] / "data" / "source_bias.json"
+@lru_cache(maxsize=1)
+def load_source_registry() -> dict[str, Any]:
+    with REGISTRY_PATH.open("r", encoding="utf-8") as f:
+        return json.load(f)
+def normalize_source_name(source: str) -> str:
+    return " ".join((source or "").strip().lower().split())
+def get_source_record(source: str) -> dict[str, Any]:
+    registry = load_source_registry()
+    sources = registry.get("sources", {})
+    aliases = registry.get("aliases", {})
+    normalized = normalize_source_name(source)
+    canonical = aliases.get(normalized, source)
+    record = sources.get(canonical)
+    if record is None:
+        return {
+            "name": source or "Unknown",
+            "bias": "Unknown",
+            "provenance": "unmatched",
+            "source_url": None,
+            "article_count": None,
+            "label_counts": None,
+            "notes": "No source-level registry match found.",
+        }
+    return {
+        "name": canonical,
+        **record,
+    }
+def get_source_bias(source: str) -> str:
+    return str(get_source_record(source).get("bias", "Unknown"))

src/api/__init__.py ADDED Viewed

File without changes

src/api/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (128 Bytes). View file

src/api/__pycache__/main.cpython-313.pyc ADDED Viewed

Binary file (1.2 kB). View file

src/api/__pycache__/models.cpython-313.pyc ADDED Viewed

Binary file (2.91 kB). View file

src/api/__pycache__/routes.cpython-313.pyc ADDED Viewed

Binary file (3.71 kB). View file

src/api/main.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from contextlib import asynccontextmanager
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from src.analysis.rag_pipeline import NewsAnalysisPipeline
+from src.api import routes
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    print("Loading pipeline at startup...")
+    app.state.pipeline = NewsAnalysisPipeline()
+    print("Pipeline ready.")
+    yield
+    print("Shutting down.")
+app = FastAPI(
+    title="NewsLens API",
+    description="Bias analysis for news articles",
+    version="1.0.0",
+    lifespan=lifespan
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+app.include_router(routes.router)

src/api/models.py ADDED Viewed

	@@ -0,0 +1,45 @@

+from pydantic import BaseModel, Field
+from typing import Optional
+class AnalyzeRequest(BaseModel):
+    topic: str = Field(..., min_length=1, max_length=200)
+    top_k: int = Field(default=10, ge=1, le=20)
+class ArticleResult(BaseModel):
+    source: str
+    source_bias: str
+    source_bias_provenance: Optional[str] = None
+    url: str
+    title: Optional[str] = None
+    description: Optional[str] = None
+    publishedAt: Optional[str] = None
+    text: str
+    text_label: str
+    confidence: float
+    similarity_score: float
+    probabilities: dict
+class SourceSummary(BaseModel):
+    source_bias: str
+    Biased: int
+    Not_Biased: int = Field(alias="Not Biased")
+    total: int
+    class Config:
+        populate_by_name = True
+class AnalyzeResponse(BaseModel):
+    topic: str
+    total_articles: int
+    results: list[ArticleResult]
+    summary: dict[str, SourceSummary]
+class IngestRequest(BaseModel):
+    topic: str = Field(..., min_length=1, max_length=200)
+    page_size: int = Field(default=10, ge=1, le=50)
+class IngestResponse(BaseModel):
+    topic: str
+    articles_fetched: int
+    articles_stored: int
+    status: str

src/api/routes.py ADDED Viewed

	@@ -0,0 +1,68 @@

+from fastapi import APIRouter, HTTPException, Request
+import time
+from src.ingestion.newsapi_client import fetch_news
+from src.api.models import AnalyzeRequest, AnalyzeResponse, IngestRequest, IngestResponse
+router = APIRouter()
+_cache: dict = {}
+CACHE_TTL_SECONDS = 300
+@router.get("/health")
+def health():
+    return {"status": "ok"}
+@router.post("/analyze", response_model=AnalyzeResponse)
+def analyze(request: Request, payload: AnalyzeRequest):
+    if not payload.topic.strip():
+        raise HTTPException(status_code=400, detail="Topic cannot be empty.")
+    cache_key = (payload.topic.lower().strip(), payload.top_k)
+    now = time.time()
+    if cache_key in _cache:
+        cached = _cache[cache_key]
+        if now - cached["timestamp"] < CACHE_TTL_SECONDS:
+            print(f"Cache hit for: {payload.topic}")
+            return cached["data"]
+    pipeline = request.app.state.pipeline
+    if pipeline is None:
+        raise HTTPException(status_code=503, detail="Pipeline not initialized.")
+    try:
+        raw = pipeline.analyze(payload.topic, top_k=payload.top_k)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Pipeline error: {str(e)}")
+    sorted_results = sorted(raw["results"], key=lambda x: x["confidence"], reverse=True)
+    response = AnalyzeResponse(
+        topic=raw["topic"],
+        total_articles=len(sorted_results),
+        results=sorted_results,
+        summary=raw["summary"]
+    )
+    _cache[cache_key] = {"data": response, "timestamp": now}
+    return response
+@router.post("/ingest", response_model=IngestResponse)
+def ingest(request: Request, payload: IngestRequest):
+    try:
+        articles = fetch_news(topic=payload.topic, page_size=payload.page_size)
+    except RuntimeError as exc:
+        raise HTTPException(status_code=503, detail=str(exc))
+    if not articles:
+        raise HTTPException(status_code=404, detail=f"No articles found for topic: {payload.topic}")
+    vector_store = request.app.state.pipeline.vector_store
+    vector_store.store_articles(articles)
+    _cache.clear()
+    return IngestResponse(
+        topic=payload.topic,
+        articles_fetched=len(articles),
+        articles_stored=len(articles),
+        status="success"
+    )

src/config.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import os
+from pathlib import Path
+try:
+    from dotenv import load_dotenv
+except ImportError:
+    load_dotenv = None
+if load_dotenv is not None:
+    load_dotenv()
+else:
+    env_path = Path.cwd() / ".env"
+    if env_path.exists():
+        for line in env_path.read_text(encoding="utf-8").splitlines():
+            line = line.strip()
+            if not line or line.startswith("#") or "=" not in line:
+                continue
+            key, value = line.split("=", 1)
+            os.environ.setdefault(key.strip(), value.strip().strip('"').strip("'"))
+_bias_model_env = os.getenv("NEWSLENS_BIAS_MODEL_PATH")
+BASE_DIR = Path(__file__).resolve().parents[1]
+DATA_DIR = Path(os.getenv("NEWSLENS_DATA_DIR", BASE_DIR / "data"))
+CHROMA_DB_PATH = Path(os.getenv("NEWSLENS_CHROMA_DB_PATH", DATA_DIR / "chromadb"))
+MODEL_DIR = Path(os.getenv("NEWSLENS_MODEL_DIR", DATA_DIR / "models"))
+if _bias_model_env:
+    BIAS_MODEL_PATH = _bias_model_env
+else:
+    BIAS_MODEL_PATH = Path(MODEL_DIR / "bias_lora_20260503_010859")
+HF_ENDPOINT = os.getenv("NEWSLENS_HF_ENDPOINT")
+HF_TOKEN = os.getenv("HF_TOKEN")
+NEWS_API_KEY = os.getenv("NEWSAPI_KEY")
+API_BASE_URL = os.getenv("NEWSLENS_API_BASE_URL", "http://localhost:8000")

src/data/source_bias.generated.json ADDED Viewed

	@@ -0,0 +1,1136 @@

+{
+  "aliases": {
+    "al jazeera": "Al Jazeera",
+    "allysia finley (wall street journal)": "Allysia Finley (Wall Street Journal)",
+    "ann coulter": "Ann Coulter",
+    "ben shapiro": "Ben Shapiro",
+    "brent bozell": "Brent Bozell",
+    "business insider": "Business Insider",
+    "buzzfeed news": "BuzzFeed News",
+    "cbn": "CBN",
+    "cbs news": "CBS News",
+    "charles krauthammer": "Charles Krauthammer",
+    "chicago sun-times": "Chicago Sun-Times",
+    "christian science monitor": "Christian Science Monitor",
+    "cnn (web news)": "CNN (Web News)",
+    "cnn - editorial": "CNN - Editorial",
+    "daily beast": "Daily Beast",
+    "daily kos": "Daily Kos",
+    "daily mail": "Daily Mail",
+    "damon linker": "Damon Linker",
+    "democracy now": "Democracy Now",
+    "elizabeth warren": "Elizabeth Warren",
+    "ezra klein": "Ezra Klein",
+    "fox news": "Fox News",
+    "fox news (online)": "Fox News (Online)",
+    "fox news opinion": "Fox News Opinion",
+    "fox online news": "Fox Online News",
+    "george will": "George Will",
+    "guest writer": "Guest Writer",
+    "guest writer - center": "Guest Writer - Center",
+    "guest writer - left": "Guest Writer - Left",
+    "guest writer - right": "Guest Writer - Right",
+    "hotair": "HotAir",
+    "howard kurtz": "Howard Kurtz",
+    "international business times": "International Business Times",
+    "jacobin": "Jacobin",
+    "john fund": "John Fund",
+    "john stossel": "John Stossel",
+    "jon terbush": "Jon Terbush",
+    "jonah goldberg": "Jonah Goldberg",
+    "juan williams": "Juan Williams",
+    "julian zelizer": "Julian Zelizer",
+    "marketwatch": "MarketWatch",
+    "media matters": "Media Matters",
+    "media research center": "Media Research Center",
+    "michael barone": "Michael Barone",
+    "michael brendan dougherty": "Michael Brendan Dougherty",
+    "michael goodwin": "Michael Goodwin",
+    "michelle malkin": "Michelle Malkin",
+    "mother jones": "Mother Jones",
+    "national review": "National Review",
+    "nbc news (online)": "NBC News (Online)",
+    "nbcnews.com": "NBCNews.com",
+    "new york post": "New York Post",
+    "new york post (news)": "New York Post (News)",
+    "new york post (opinion)": "New York Post (Opinion)",
+    "newsbusters": "NewsBusters",
+    "newt gingrich": "Newt Gingrich",
+    "npr editorial": "NPR Editorial",
+    "npr online news": "NPR Online News",
+    "pew research center": "Pew Research Center",
+    "politico": "Politico",
+    "rand paul": "Rand Paul",
+    "rich lowry": "Rich Lowry",
+    "ryan cooper": "Ryan Cooper",
+    "s.e. cupp": "S.E. Cupp",
+    "scientific american": "Scientific American",
+    "slate": "Slate",
+    "the atlantic": "The Atlantic",
+    "the boston globe": "The Boston Globe",
+    "the daily wire": "The Daily Wire",
+    "the economist": "The Economist",
+    "the flip side": "The Flip Side",
+    "the hill": "The Hill",
+    "the intercept": "The Intercept",
+    "the marshall project": "The Marshall Project",
+    "the nation": "The Nation",
+    "the new yorker": "The New Yorker",
+    "the week - news": "The Week - News",
+    "the week - opinion": "The Week - Opinion",
+    "theblaze.com": "TheBlaze.com",
+    "thinkprogress": "ThinkProgress",
+    "thomas sowell": "Thomas Sowell",
+    "time magazine": "Time Magazine",
+    "townhall": "Townhall",
+    "usa today": "USA TODAY",
+    "vanity fair": "Vanity Fair",
+    "vice": "Vice",
+    "victor hanson": "Victor Hanson",
+    "vox": "Vox",
+    "wall street journal - editorial": "Wall Street Journal - Editorial",
+    "wall street journal - news": "Wall Street Journal - News",
+    "washington post": "Washington Post",
+    "washington times": "Washington Times",
+    "yahoo! news": "Yahoo! News",
+    "yahoo! the 360": "Yahoo! The 360"
+  },
+  "sources": {
+    "Al Jazeera": {
+      "article_count": 142,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 141,
+        "Right": 1
+      },
+      "majority_share": 0.993,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.aljazeera.com"
+    },
+    "Allysia Finley (Wall Street Journal)": {
+      "article_count": 4,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 4
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.cnn.com"
+    },
+    "Ann Coulter": {
+      "article_count": 6,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 6
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.townhall.com"
+    },
+    "Ben Shapiro": {
+      "article_count": 26,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 26
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.dailywire.com"
+    },
+    "Brent Bozell": {
+      "article_count": 5,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 5
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.foxnews.com"
+    },
+    "Business Insider": {
+      "article_count": 74,
+      "bias": "Right",
+      "label_counts": {
+        "Right": 74
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.businessinsider.com"
+    },
+    "BuzzFeed News": {
+      "article_count": 64,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 64
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.buzzfeednews.com"
+    },
+    "CBN": {
+      "article_count": 27,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 27
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.cbn.com"
+    },
+    "CBS News": {
+      "article_count": 163,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 163
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.cbsnews.com"
+    },
+    "CNN (Web News)": {
+      "article_count": 2485,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 2485
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.cnn.com"
+    },
+    "CNN - Editorial": {
+      "article_count": 87,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 87
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.cnn.com"
+    },
+    "Charles Krauthammer": {
+      "article_count": 9,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 9
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.nationalreview.com"
+    },
+    "Chicago Sun-Times": {
+      "article_count": 83,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 83
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.chicago.suntimes.com"
+    },
+    "Christian Science Monitor": {
+      "article_count": 1300,
+      "bias": "Right",
+      "label_counts": {
+        "Right": 1300
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.csmonitor.com"
+    },
+    "Daily Beast": {
+      "article_count": 240,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 240
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.thedailybeast.com"
+    },
+    "Daily Kos": {
+      "article_count": 127,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 127
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.dailykos.com"
+    },
+    "Daily Mail": {
+      "article_count": 46,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 46
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.dailymail.co.uk"
+    },
+    "Damon Linker": {
+      "article_count": 14,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 14
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.theweek.com"
+    },
+    "Democracy Now": {
+      "article_count": 75,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 75
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.democracynow.org"
+    },
+    "Elizabeth Warren": {
+      "article_count": 4,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 4
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.time.com"
+    },
+    "Ezra Klein": {
+      "article_count": 10,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 10
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.npr.org"
+    },
+    "Fox News": {
+      "article_count": 1353,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 1353
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.foxnews.com"
+    },
+    "Fox News (Online)": {
+      "article_count": 86,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 86
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.foxnews.com"
+    },
+    "Fox News Opinion": {
+      "article_count": 58,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 58
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.foxnews.com"
+    },
+    "Fox Online News": {
+      "article_count": 2035,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 2035
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.foxnews.com"
+    },
+    "George Will": {
+      "article_count": 14,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 14
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.nationalreview.com"
+    },
+    "Guest Writer": {
+      "article_count": 84,
+      "bias": "Right",
+      "label_counts": {
+        "Right": 84
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.washingtontimes.com"
+    },
+    "Guest Writer - Center": {
+      "article_count": 3,
+      "bias": "Right",
+      "label_counts": {
+        "Right": 3
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.theatlantic.com"
+    },
+    "Guest Writer - Left": {
+      "article_count": 109,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 109
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.cnn.com"
+    },
+    "Guest Writer - Right": {
+      "article_count": 385,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 385
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.nationalreview.com"
+    },
+    "HotAir": {
+      "article_count": 64,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 64
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.hotair.com"
+    },
+    "Howard Kurtz": {
+      "article_count": 14,
+      "bias": "Right",
+      "label_counts": {
+        "Right": 14
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.foxnews.com"
+    },
+    "International Business Times": {
+      "article_count": 48,
+      "bias": "Right",
+      "label_counts": {
+        "Right": 48
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.ibtimes.com"
+    },
+    "Jacobin": {
+      "article_count": 23,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 23
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.jacobinmag.com"
+    },
+    "John Fund": {
+      "article_count": 16,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 16
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.nationalreview.com"
+    },
+    "John Stossel": {
+      "article_count": 26,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 26
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.foxnews.com"
+    },
+    "Jon Terbush": {
+      "article_count": 3,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 3
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.theweek.com"
+    },
+    "Jonah Goldberg": {
+      "article_count": 8,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 8
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.nationalreview.com"
+    },
+    "Juan Williams": {
+      "article_count": 10,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 10
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.foxnews.com"
+    },
+    "Julian Zelizer": {
+      "article_count": 10,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 10
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.cnn.com"
+    },
+    "MarketWatch": {
+      "article_count": 106,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 106
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.marketwatch.com"
+    },
+    "Media Matters": {
+      "article_count": 107,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 107
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.mediamatters.org"
+    },
+    "Media Research Center": {
+      "article_count": 22,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 22
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.newsbusters.org"
+    },
+    "Michael Barone": {
+      "article_count": 4,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 4
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.townhall.com"
+    },
+    "Michael Brendan Dougherty": {
+      "article_count": 8,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 8
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.theweek.com"
+    },
+    "Michael Goodwin": {
+      "article_count": 4,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 4
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.nypost.com"
+    },
+    "Michelle Malkin": {
+      "article_count": 12,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 12
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.townhall.com"
+    },
+    "Mother Jones": {
+      "article_count": 114,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 114
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.motherjones.com"
+    },
+    "NBC News (Online)": {
+      "article_count": 38,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 38
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.nbcnews.com"
+    },
+    "NBCNews.com": {
+      "article_count": 14,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 14
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.nbcnews.com"
+    },
+    "NPR Editorial": {
+      "article_count": 8,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 8
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.npr.org"
+    },
+    "NPR Online News": {
+      "article_count": 2007,
+      "bias": "Right",
+      "label_counts": {
+        "Right": 2007
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.npr.org"
+    },
+    "National Review": {
+      "article_count": 1013,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 1013
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.nationalreview.com"
+    },
+    "New York Post": {
+      "article_count": 175,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 175
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.nypost.com"
+    },
+    "New York Post (News)": {
+      "article_count": 5,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 5
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.nypost.com"
+    },
+    "New York Post (Opinion)": {
+      "article_count": 5,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 5
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.nypost.com"
+    },
+    "NewsBusters": {
+      "article_count": 44,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 44
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.newsbusters.org"
+    },
+    "Newt Gingrich": {
+      "article_count": 14,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 14
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.washingtontimes.com"
+    },
+    "Pew Research Center": {
+      "article_count": 27,
+      "bias": "Right",
+      "label_counts": {
+        "Right": 27
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.pewresearch.org"
+    },
+    "Politico": {
+      "article_count": 2493,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 2493
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.politico.com"
+    },
+    "Rand Paul": {
+      "article_count": 8,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 8
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.washingtontimes.com"
+    },
+    "Rich Lowry": {
+      "article_count": 44,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 44
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.nationalreview.com"
+    },
+    "Ryan Cooper": {
+      "article_count": 6,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 6
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.theweek.com"
+    },
+    "S.E. Cupp": {
+      "article_count": 4,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 4
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.chicago.suntimes.com"
+    },
+    "Scientific American": {
+      "article_count": 35,
+      "bias": "Right",
+      "label_counts": {
+        "Left": 2,
+        "Right": 33
+      },
+      "majority_share": 0.9429,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.scientificamerican.com"
+    },
+    "Slate": {
+      "article_count": 158,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 158
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.slate.com"
+    },
+    "The Atlantic": {
+      "article_count": 172,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 172
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.theatlantic.com"
+    },
+    "The Boston Globe": {
+      "article_count": 24,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 24
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.bostonglobe.com"
+    },
+    "The Daily Wire": {
+      "article_count": 122,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 122
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.dailywire.com"
+    },
+    "The Economist": {
+      "article_count": 28,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 28
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.economist.com"
+    },
+    "The Flip Side": {
+      "article_count": 239,
+      "bias": "Right",
+      "label_counts": {
+        "Right": 239
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.theflipside.io"
+    },
+    "The Hill": {
+      "article_count": 1377,
+      "bias": "Right",
+      "label_counts": {
+        "Right": 1377
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.thehill.com"
+    },
+    "The Intercept": {
+      "article_count": 43,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 43
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.theintercept.com"
+    },
+    "The Marshall Project": {
+      "article_count": 27,
+      "bias": "Right",
+      "label_counts": {
+        "Right": 27
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.themarshallproject.org"
+    },
+    "The Nation": {
+      "article_count": 32,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 32
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.thenation.com"
+    },
+    "The New Yorker": {
+      "article_count": 21,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 21
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.newyorker.com"
+    },
+    "The Week - News": {
+      "article_count": 119,
+      "bias": "Right",
+      "label_counts": {
+        "Right": 119
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.theweek.com"
+    },
+    "The Week - Opinion": {
+      "article_count": 24,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 24
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.theweek.com"
+    },
+    "TheBlaze.com": {
+      "article_count": 219,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 219
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.theblaze.com"
+    },
+    "ThinkProgress": {
+      "article_count": 33,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 33
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.thinkprogress.org"
+    },
+    "Thomas Sowell": {
+      "article_count": 3,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 3
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.townhall.com"
+    },
+    "Time Magazine": {
+      "article_count": 70,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 70
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.time.com"
+    },
+    "Townhall": {
+      "article_count": 1273,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 1273
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.townhall.com"
+    },
+    "USA TODAY": {
+      "article_count": 1785,
+      "bias": "Right",
+      "label_counts": {
+        "Right": 1785
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.usatoday.com"
+    },
+    "Vanity Fair": {
+      "article_count": 157,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 157
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.vanityfair.com"
+    },
+    "Vice": {
+      "article_count": 67,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 67
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.vice.com"
+    },
+    "Victor Hanson": {
+      "article_count": 62,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 62
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.nationalreview.com"
+    },
+    "Vox": {
+      "article_count": 1460,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 1460
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.vox.com"
+    },
+    "Wall Street Journal - Editorial": {
+      "article_count": 7,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 7
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.wsj.com"
+    },
+    "Wall Street Journal - News": {
+      "article_count": 255,
+      "bias": "Right",
+      "label_counts": {
+        "Right": 255
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.wsj.com"
+    },
+    "Washington Post": {
+      "article_count": 108,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 108
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.washingtonpost.com"
+    },
+    "Washington Times": {
+      "article_count": 2883,
+      "bias": "Center",
+      "label_counts": {
+        "Center": 2883
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.washingtontimes.com"
+    },
+    "Yahoo! News": {
+      "article_count": 11,
+      "bias": "Left",
+      "label_counts": {
+        "Left": 11
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.news.yahoo.com"
+    },
+    "Yahoo! The 360": {
+      "article_count": 80,
+      "bias": "Right",
+      "label_counts": {
+        "Right": 80
+      },
+      "majority_share": 1.0,
+      "notes": "Generated by aggregating article-level political bias labels by source.",
+      "provenance": "siddharthmb/article-bias-prediction-media-splits",
+      "source_url": "www.news.yahoo.com"
+    }
+  }
+}

src/data/source_bias.json ADDED Viewed

	@@ -0,0 +1,202 @@

+{
+  "sources": {
+    "Fox News": {
+      "bias": "Right",
+      "provenance": "manual_demo",
+      "source_url": null,
+      "article_count": null,
+      "label_counts": null,
+      "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
+    },
+    "Breitbart": {
+      "bias": "Right",
+      "provenance": "manual_demo",
+      "source_url": null,
+      "article_count": null,
+      "label_counts": null,
+      "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
+    },
+    "The Daily Wire": {
+      "bias": "Right",
+      "provenance": "manual_demo",
+      "source_url": null,
+      "article_count": null,
+      "label_counts": null,
+      "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
+    },
+    "New York Post": {
+      "bias": "Right",
+      "provenance": "manual_demo",
+      "source_url": null,
+      "article_count": null,
+      "label_counts": null,
+      "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
+    },
+    "TechRadar": {
+      "bias": "Right",
+      "provenance": "manual_demo",
+      "source_url": null,
+      "article_count": null,
+      "label_counts": null,
+      "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
+    },
+    "BBC News": {
+      "bias": "Center",
+      "provenance": "manual_demo",
+      "source_url": null,
+      "article_count": null,
+      "label_counts": null,
+      "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
+    },
+    "Reuters": {
+      "bias": "Center",
+      "provenance": "manual_demo",
+      "source_url": null,
+      "article_count": null,
+      "label_counts": null,
+      "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
+    },
+    "Associated Press": {
+      "bias": "Center",
+      "provenance": "manual_demo",
+      "source_url": null,
+      "article_count": null,
+      "label_counts": null,
+      "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
+    },
+    "Mental Floss": {
+      "bias": "Center",
+      "provenance": "manual_demo",
+      "source_url": null,
+      "article_count": null,
+      "label_counts": null,
+      "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
+    },
+    "New Scientist": {
+      "bias": "Center",
+      "provenance": "manual_demo",
+      "source_url": null,
+      "article_count": null,
+      "label_counts": null,
+      "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
+    },
+    "Nature.com": {
+      "bias": "Center",
+      "provenance": "manual_demo",
+      "source_url": null,
+      "article_count": null,
+      "label_counts": null,
+      "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
+    },
+    "Futurity: Research News": {
+      "bias": "Center",
+      "provenance": "manual_demo",
+      "source_url": null,
+      "article_count": null,
+      "label_counts": null,
+      "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
+    },
+    "Yahoo Entertainment": {
+      "bias": "Center",
+      "provenance": "manual_demo",
+      "source_url": null,
+      "article_count": null,
+      "label_counts": null,
+      "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
+    },
+    "NPR": {
+      "bias": "Center-Left",
+      "provenance": "manual_demo",
+      "source_url": null,
+      "article_count": null,
+      "label_counts": null,
+      "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
+    },
+    "The Guardian": {
+      "bias": "Center-Left",
+      "provenance": "manual_demo",
+      "source_url": null,
+      "article_count": null,
+      "label_counts": null,
+      "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
+    },
+    "Techdirt": {
+      "bias": "Center-Left",
+      "provenance": "manual_demo",
+      "source_url": null,
+      "article_count": null,
+      "label_counts": null,
+      "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
+    },
+    "Vox": {
+      "bias": "Center-Left",
+      "provenance": "manual_demo",
+      "source_url": null,
+      "article_count": null,
+      "label_counts": null,
+      "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
+    },
+    "Wired": {
+      "bias": "Center-Left",
+      "provenance": "manual_demo",
+      "source_url": null,
+      "article_count": null,
+      "label_counts": null,
+      "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
+    },
+    "Al Jazeera English": {
+      "bias": "Left",
+      "provenance": "manual_demo",
+      "source_url": null,
+      "article_count": null,
+      "label_counts": null,
+      "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
+    },
+    "Jezebel": {
+      "bias": "Left",
+      "provenance": "manual_demo",
+      "source_url": null,
+      "article_count": null,
+      "label_counts": null,
+      "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
+    },
+    "Gizmodo.com": {
+      "bias": "Left",
+      "provenance": "manual_demo",
+      "source_url": null,
+      "article_count": null,
+      "label_counts": null,
+      "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
+    },
+    "Gothamist": {
+      "bias": "Left",
+      "provenance": "manual_demo",
+      "source_url": null,
+      "article_count": null,
+      "label_counts": null,
+      "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
+    },
+    "The Intercept": {
+      "bias": "Left",
+      "provenance": "manual_demo",
+      "source_url": null,
+      "article_count": null,
+      "label_counts": null,
+      "notes": "Demo registry entry. Replace or enrich with a cited source-level dataset."
+    }
+  },
+  "aliases": {
+    "ap news": "Associated Press",
+    "associated press": "Associated Press",
+    "bbc": "BBC News",
+    "bbc news": "BBC News",
+    "fox": "Fox News",
+    "fox news": "Fox News",
+    "gizmodo": "Gizmodo.com",
+    "npr": "NPR",
+    "reuters": "Reuters",
+    "the guardian": "The Guardian",
+    "wired": "Wired",
+    "yahoo entertainment": "Yahoo Entertainment"
+  }
+}

src/db/__init__.py ADDED Viewed

File without changes

src/db/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (127 Bytes). View file

src/db/__pycache__/vector_store.cpython-313.pyc ADDED Viewed

Binary file (5.81 kB). View file

src/db/vector_store.py ADDED Viewed

	@@ -0,0 +1,120 @@

+import hashlib
+import chromadb
+from sentence_transformers import SentenceTransformer
+from src.config import CHROMA_DB_PATH, HF_TOKEN
+CHROMA_DB_PATH.mkdir(parents=True, exist_ok=True)
+class NewsVectorStore:
+    _model = None
+    def __init__(self, collection_name = "news_articles"):
+        print(f"Initializing ChromaDB at {CHROMA_DB_PATH}...")
+        self.client = chromadb.PersistentClient(path=str(CHROMA_DB_PATH))
+        self.collection = self.client.get_or_create_collection(
+            name=collection_name,
+            metadata={"hnsw:space": "cosine"}
+        )
+        if NewsVectorStore._model is None:
+            print("Loading embedding model (this takes a few seconds)...")
+            NewsVectorStore._model = SentenceTransformer(
+                'all-MiniLM-L6-v2',
+                token=HF_TOKEN,
+            )
+        self.embedding_model = NewsVectorStore._model
+        print("ChromaDB initialized and embedding model loaded.")
+    def store_articles(self, articles_data):
+        """
+        Expects a list of dictionaries from NewsAPI.
+        """
+        if not articles_data:
+            print("No articles to store.")
+            return
+        documents = []
+        metadatas = []
+        ids = []
+        for article in articles_data:
+            url = article.get('url')
+            if not url:
+                continue
+            title = article.get('title') or ""
+            desc = article.get('description') or ""
+            content = article.get("content") or ""
+            text_to_embed = f"{title}. {desc}. {content}"
+            if len(text_to_embed.strip()) > 5:
+                documents.append(text_to_embed)
+                # Store metadata so we can display it later in the UI
+                metadatas.append({
+                    "source": article.get('source', {}).get('name', 'Unknown'),
+                    "url": url,
+                    "publishedAt": article.get('publishedAt', ''),
+                    "title": article.get('title') or "",
+                    "description": article.get('description') or ""
+                })
+                doc_id = hashlib.md5(url.encode()).hexdigest()
+                ids.append(doc_id)
+        if not documents:
+            print("No valid documents to store.")
+            return
+        # Generate embeddings
+        print(f"Generating embeddings for {len(documents)} articles...")
+        embeddings = self.embedding_model.encode(documents,batch_size=32).tolist()
+        # Insert into ChromaDB
+        self.collection.upsert(
+            embeddings=embeddings,
+            documents=documents,
+            metadatas=metadatas,
+            ids=ids
+        )
+        print(f"Successfully stored {len(documents)} articles in ChromaDB!")
+    def query(self, topic: str, top_k: int = 10) -> list[dict]:
+        """
+        Embed the query topic and retrieve the top-k most similar articles.
+        """
+        print(f"querying chromaDB for the topic: '{topic}'")
+        query_embedding = self.embedding_model.encode([topic]).tolist()
+        results = self.collection.query(
+            query_embeddings=query_embedding,
+            n_results=top_k,
+            include=["documents", "metadatas", "distances"]
+        )
+        articles = []
+        for doc, meta, dist in zip(
+            results["documents"][0],
+            results["metadatas"][0],
+            results["distances"][0]
+        ):
+            articles.append({
+                "text": doc,
+                "source": meta.get("source", "Unknown"),
+                "url": meta.get("url", ""),
+                "publishedAt": meta.get("publishedAt", ""),
+                "similarity_score": round(1 - dist, 4),
+                "title": meta.get("title", ""),
+                "description": meta.get("description", ""),
+            })
+        print(f"Retrieved {len(articles)} articles.")
+        return articles
+if __name__ == "__main__":
+    db = NewsVectorStore()
+    print(f"Total documents in collection: {db.collection.count()}")
+    results = db.collection.get()
+    urls = [m.get("url") for m in results["metadatas"]]
+    for url in urls:
+        print(url)

src/ingestion/__init__.py ADDED Viewed

File without changes

src/ingestion/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (134 Bytes). View file

src/ingestion/__pycache__/newsapi_client.cpython-313.pyc ADDED Viewed

Binary file (2.11 kB). View file

src/ingestion/newsapi_client.py ADDED Viewed

	@@ -0,0 +1,49 @@

+from src.db.vector_store import NewsVectorStore
+from newsapi import NewsApiClient
+from src.config import NEWS_API_KEY
+def fetch_news(topic="AI regulation", lang="en", page_size=10):
+    if not NEWS_API_KEY:
+        raise RuntimeError("NEWSAPI_KEY is not configured. Add it to .env before using /ingest.")
+    news_instance = NewsApiClient(api_key=NEWS_API_KEY)
+    try:
+        print("Fetching latest articles...")
+        response = news_instance.get_everything(q=topic, language=lang, sort_by='relevancy', page_size=page_size)
+        if response['status'] == 'ok':
+            articles = response['articles']
+            if not articles:
+                print("No articles found.")
+                return
+            print(f"Successfully fetched {len(articles)} articles.")
+            print("-" * 40)
+            return articles
+        else:
+            print(f"API Error: {response.get('message', 'Unknown error')}")
+            return []
+    except Exception as e:
+        print(f"Pipeline failed: {str(e)}")
+        return []
+def run_pipeline():
+    print("Fetching articles...")
+    articles = fetch_news()
+    if not articles:
+        print("No articles found.")
+        return
+    print(f"Fetched {len(articles)} articles.")
+    db = NewsVectorStore()
+    db.store_articles(articles)
+    print("Pipeline complete.")
+if __name__ == "__main__":
+    run_pipeline()

src/models/__pycache__/dataset_prep.cpython-313.pyc ADDED Viewed

Binary file (1.01 kB). View file

src/models/__pycache__/test_inference.cpython-313.pyc ADDED Viewed

Binary file (5.48 kB). View file

src/models/__pycache__/train_model.cpython-313.pyc ADDED Viewed

Binary file (4.4 kB). View file

src/models/dataset_prep.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import os
+from datasets import load_dataset
+from collections import Counter
+from src.config import HF_ENDPOINT, HF_TOKEN
+if HF_ENDPOINT:
+    os.environ["HF_ENDPOINT"] = HF_ENDPOINT
+def fetch_and_inspect_data():
+    try:
+        dataset = load_dataset("mediabiasgroup/BABE", token=HF_TOKEN)
+        print(Counter(dataset["train"]["label"]))
+        print(Counter(dataset["train"]["label_opinion"]))
+    except Exception as e:
+        print(f"Failed to load dataset: {e}")
+if __name__ == "__main__":
+    fetch_and_inspect_data()

src/models/test_inference.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import os
+import torch
+from transformers import RobertaTokenizer, RobertaForSequenceClassification
+import torch.nn.functional as F
+from src.config import BIAS_MODEL_PATH, HF_ENDPOINT, HF_TOKEN
+if HF_ENDPOINT:
+    os.environ["HF_ENDPOINT"] = HF_ENDPOINT
+class BiasPredictor:
+    def __init__(self, model_dir=BIAS_MODEL_PATH, base_model_name="roberta-base"):
+        print("Loading model and tokenizer once...")
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.tokenizer = RobertaTokenizer.from_pretrained(str(model_dir), token=HF_TOKEN)
+        self.model = RobertaForSequenceClassification.from_pretrained(str(model_dir), token=HF_TOKEN)
+        self.model.to(self.device)
+        self.model.eval()
+        print("\n--- CLASSIFIER PARAM CHECK ---")
+        for name, param in self.model.named_parameters():
+            if "classifier" in name:
+                print(name, param.requires_grad, param.data.mean().item())
+        print("--- END CHECK ---\n")
+        self.label_map = {
+            0: "Not Biased",
+            1: "Biased"
+        }
+    def predict(self, text):
+        inputs = self.tokenizer(
+            text,
+            return_tensors="pt",
+            truncation=True,
+            max_length=128,
+            padding=True
+        ).to(self.device)
+        with torch.no_grad():
+            outputs = self.model(**inputs)
+            logits = outputs.logits
+            probs = F.softmax(logits, dim=-1)
+            predicted_class_id = probs.argmax().item()
+            confidence = probs[0][predicted_class_id].item()
+        return {
+            "text": text,
+            "class_id": predicted_class_id,
+            "label": self.label_map.get(predicted_class_id, "Unknown"),
+            "confidence": confidence,
+            "probabilities": probs[0].tolist()
+        }
+    def predict_batch(self, texts: list[str]) -> list[dict]:
+        inputs = self.tokenizer(
+            texts,
+            return_tensors="pt",
+            truncation=True,
+            max_length=128,
+            padding=True
+        ).to(self.device)
+        with torch.no_grad():
+            outputs = self.model(**inputs)
+            logits = outputs.logits
+            probs = F.softmax(logits, dim=-1)
+        results = []
+        for i, text in enumerate(texts):
+            predicted_class_id = probs[i].argmax().item()
+            confidence = probs[i][predicted_class_id].item()
+            results.append({
+                "text": text,
+                "class_id": predicted_class_id,
+                "label": self.label_map.get(predicted_class_id, "Unknown"),
+                "confidence": confidence,
+                "probabilities": probs[i].tolist()
+            })
+        return results
+if __name__ == "__main__":
+    predictor = BiasPredictor()
+    texts = [
+        "The government brutally crushed the peaceful protesters.",
+        "The government deployed police officers to the protest site.",
+        "Scientists warn of accelerating climate change impacts.",
+        "Climate alarmists continue pushing their radical agenda."
+    ]
+    print("\n--- BATCH TEST ---")
+    results = predictor.predict_batch(texts)
+    for r in results:
+        print(f"[{r['label']}] ({r['confidence']:.4f}) {r['text'][:60]}")
+    print("\n ------- Single pass test for each text seprately ----------")
+    for text in [
+            "The government brutally crushed the peaceful protesters.",
+            "The government deployed police officers to the protest site.",
+            "Scientists warn of accelerating climate change impacts.",
+            "Climate alarmists continue pushing their radical agenda."
+        ]:
+            r = predictor.predict(text)
+            print(f"[{r['label']}] ({r['confidence']:.4f}) {r['text'][:60]}")

src/models/train_model.py ADDED Viewed

	@@ -0,0 +1,114 @@

+import os
+from datasets import load_dataset
+from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments
+from peft import LoraConfig, get_peft_model, TaskType
+from sklearn.metrics import accuracy_score, f1_score
+import torch
+import numpy as np
+from transformers import set_seed
+from transformers import DataCollatorWithPadding
+from datetime import datetime
+from src.config import HF_ENDPOINT, HF_TOKEN, MODEL_DIR
+if HF_ENDPOINT:
+    os.environ["HF_ENDPOINT"] = HF_ENDPOINT
+set_seed(42)
+np.random.seed(42)
+torch.manual_seed(42)
+output_dir=os.path.join(MODEL_DIR, "bias_checkpoints")
+os.makedirs(MODEL_DIR, exist_ok=True)
+def main():
+    dataset = load_dataset("mediabiasgroup/BABE", token=HF_TOKEN)
+    dataset = dataset["train"].train_test_split(test_size=0.2, seed=42)
+    model_name = "roberta-base"
+    tokenizer  = RobertaTokenizer.from_pretrained(model_name, token=HF_TOKEN)
+    def collapse_labels(example):
+        old = example["label"]
+        if old in [0, 1]:
+            example["label"] = 0   # Right
+        elif old == 2:
+            example["label"] = 1   # Center
+        else:
+            example["label"] = 2   # Left
+        return example
+    #dataset = dataset.map(collapse_labels)
+    def tokenize_function(examples):
+        return tokenizer(examples["text"], truncation=True, max_length=128)
+    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
+    tokenized_datasets = dataset.map(tokenize_function, batched = True)
+    tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
+    tokenized_datasets.set_format("torch", columns=["input_ids", "attention_mask", "labels"])
+    model = RobertaForSequenceClassification.from_pretrained(
+        model_name,
+        num_labels=2,
+        token=HF_TOKEN,
+    )
+    peft_config = LoraConfig(
+        task_type=TaskType.SEQ_CLS,
+        r=8,
+        lora_alpha=32,
+        lora_dropout=0.1,
+        target_modules=["query", "value"]
+    )
+    model = get_peft_model(model, peft_config)
+    model.print_trainable_parameters()
+    training_args = TrainingArguments(
+        output_dir=output_dir,
+        learning_rate=2e-4,
+        per_device_train_batch_size=8,
+        per_device_eval_batch_size=8,
+        num_train_epochs=3,
+        eval_strategy="epoch",
+        save_strategy="epoch",
+        logging_steps=10,
+        report_to="none"
+    )
+    def compute_metrics(eval_pred):
+        logits, labels = eval_pred
+        preds = np.argmax(logits, axis=1)
+        return {
+            "accuracy": accuracy_score(labels, preds),
+            "f1_weighted": f1_score(labels, preds, average="weighted")
+        }
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=tokenized_datasets["train"],
+        eval_dataset=tokenized_datasets["test"],
+        compute_metrics=compute_metrics,
+        data_collator=data_collator
+    )
+    trainer.train()
+    # CRITICAL FIX
+    model = model.merge_and_unload()
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    save_path = os.path.join(MODEL_DIR, f"bias_lora_{timestamp}")
+    model.save_pretrained(save_path)
+    tokenizer.save_pretrained(save_path)
+if __name__ == "__main__":
+    main()

src/ui/__init__.py ADDED Viewed

File without changes

src/ui/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (127 Bytes). View file

src/ui/__pycache__/app.cpython-313.pyc ADDED Viewed

Binary file (20.5 kB). View file

src/ui/app.py ADDED Viewed

	@@ -0,0 +1,518 @@

+from collections import defaultdict
+from html import escape
+import streamlit as st
+from src.ui.components.article_card import inject_article_card_styles, render_article_card
+from src.ui.components.charts import build_bias_distribution_chart, build_lean_bias_chart
+from src.ui.services.api_client import NewsLensClient
+from src.ui.services.api_client import DirectPipelineClient
+MODEL_EVAL = {
+    "eval_accuracy": 0.8544,
+    "eval_f1_weighted": 0.8546,
+    "eval_loss": 0.3933,
+    "train_loss": 0.3888,
+    "epochs": 3,
+}
+st.set_page_config(
+    page_title="NewsLens",
+    layout="wide",
+    initial_sidebar_state="expanded",
+)
+def inject_styles() -> None:
+    st.markdown(
+        """
+        <style>
+            :root {
+                --nl-ink: #15202b;
+                --nl-muted: #64748b;
+                --nl-line: #d8dee9;
+                --nl-panel: #ffffff;
+                --nl-soft: #f6f8fb;
+                --nl-blue: #2457c5;
+                --nl-teal: #087f8c;
+                --nl-red: #c24138;
+                --nl-green: #247857;
+            }
+            .block-container {
+                padding-top: 1.4rem;
+                padding-bottom: 2rem;
+                max-width: 1240px;
+            }
+            [data-testid="stSidebar"] {
+                background: #f7f9fc;
+                border-right: 1px solid var(--nl-line);
+            }
+            [data-testid="stSidebar"] h1,
+            [data-testid="stSidebar"] h2,
+            [data-testid="stSidebar"] h3 {
+                color: var(--nl-ink);
+            }
+            h1, h2, h3 {
+                letter-spacing: 0;
+            }
+            .nl-topbar {
+                border-bottom: 1px solid var(--nl-line);
+                padding: 0 0 1rem 0;
+                margin-bottom: 1.2rem;
+            }
+            .nl-kicker {
+                color: var(--nl-teal);
+                font-size: 0.78rem;
+                font-weight: 800;
+                letter-spacing: 0.08em;
+                text-transform: uppercase;
+                margin-bottom: 0.25rem;
+            }
+            .nl-title {
+                color: var(--nl-ink);
+                font-size: 2.25rem;
+                font-weight: 800;
+                line-height: 1.1;
+                margin: 0;
+            }
+            .nl-subtitle {
+                color: var(--nl-muted);
+                max-width: 780px;
+                margin-top: 0.55rem;
+                font-size: 1rem;
+                line-height: 1.55;
+            }
+            .nl-empty {
+                background: linear-gradient(135deg, #f7f9fc 0%, #eef6f2 100%);
+                border: 1px solid var(--nl-line);
+                border-radius: 8px;
+                padding: 2.2rem;
+                margin-top: 1rem;
+            }
+            .nl-empty h3 {
+                color: var(--nl-ink);
+                margin: 0 0 0.5rem 0;
+            }
+            .nl-empty p {
+                color: var(--nl-muted);
+                margin: 0;
+                line-height: 1.6;
+            }
+            .nl-section-heading {
+                color: var(--nl-ink);
+                font-size: 1.05rem;
+                font-weight: 800;
+                margin: 1.1rem 0 0.45rem 0;
+            }
+            .nl-source-heading {
+                border-top: 1px solid var(--nl-line);
+                color: var(--nl-ink);
+                display: flex;
+                justify-content: space-between;
+                align-items: center;
+                gap: 1rem;
+                padding-top: 1rem;
+                margin: 1.1rem 0 0.5rem 0;
+            }
+            .nl-source-heading h3 {
+                font-size: 1.05rem;
+                margin: 0;
+            }
+            .nl-source-meta {
+                color: var(--nl-muted);
+                font-size: 0.85rem;
+                white-space: nowrap;
+            }
+            .nl-insight {
+                border-left: 4px solid var(--nl-teal);
+                background: #f5fbfa;
+                padding: 0.9rem 1rem;
+                color: var(--nl-ink);
+                margin: 0.25rem 0 0.9rem 0;
+            }
+            .nl-insight strong {
+                color: var(--nl-teal);
+            }
+            .nl-model-panel {
+                background: #f7f9fc;
+                border: 1px solid var(--nl-line);
+                border-radius: 8px;
+                padding: 1rem;
+                margin-top: 0.8rem;
+            }
+            .nl-model-panel h3 {
+                color: var(--nl-ink);
+                font-size: 1rem;
+                margin: 0 0 0.6rem 0;
+            }
+            .nl-model-grid {
+                display: grid;
+                gap: 0.65rem;
+                grid-template-columns: repeat(4, minmax(0, 1fr));
+            }
+            .nl-model-stat {
+                background: #ffffff;
+                border: 1px solid var(--nl-line);
+                border-radius: 8px;
+                padding: 0.75rem;
+            }
+            .nl-model-stat span {
+                color: var(--nl-muted);
+                display: block;
+                font-size: 0.72rem;
+                font-weight: 800;
+                letter-spacing: 0.04em;
+                text-transform: uppercase;
+            }
+            .nl-model-stat strong {
+                color: var(--nl-ink);
+                display: block;
+                font-size: 1.25rem;
+                margin-top: 0.2rem;
+            }
+            div[data-testid="stMetric"] {
+                background: var(--nl-panel);
+                border: 1px solid var(--nl-line);
+                border-radius: 8px;
+                padding: 0.85rem 1rem;
+            }
+            div[data-testid="stMetric"] label {
+                color: var(--nl-muted);
+            }
+            .stButton > button {
+                background: var(--nl-blue);
+                border: 1px solid var(--nl-blue);
+                color: #ffffff;
+                font-weight: 700;
+                min-height: 2.6rem;
+                width: 100%;
+            }
+            .stButton > button:hover {
+                background: #1f4dac;
+                border-color: #1f4dac;
+                color: #ffffff;
+            }
+            @media (max-width: 760px) {
+                .nl-title {
+                    font-size: 1.75rem;
+                }
+                .nl-empty {
+                    padding: 1.4rem;
+                }
+                .nl-source-heading {
+                    align-items: flex-start;
+                    flex-direction: column;
+                    gap: 0.2rem;
+                }
+                .nl-model-grid {
+                    grid-template-columns: repeat(2, minmax(0, 1fr));
+                }
+            }
+        </style>
+        """,
+        unsafe_allow_html=True,
+    )
+def summarize_bias(summary: dict) -> tuple[int, int, float]:
+    total = sum(source.get("total", 0) for source in summary.values())
+    biased = sum(source.get("Biased", 0) for source in summary.values())
+    ratio = biased / total if total else 0
+    return total, biased, ratio
+def insight_copy(ratio: float) -> str:
+    percent = int(round(ratio * 100))
+    if ratio >= 0.6:
+        return f"<strong>{percent}% biased coverage.</strong> The retrieved articles lean noticeably toward biased framing."
+    if ratio <= 0.4:
+        return f"<strong>{percent}% biased coverage.</strong> The article set is mostly neutral by the current model."
+    return f"<strong>{percent}% biased coverage.</strong> The result set is mixed and worth comparing source by source."
+def render_model_panel() -> None:
+    st.markdown(
+        f"""
+        <div class="nl-model-panel">
+            <h3>Model Snapshot</h3>
+            <div class="nl-model-grid">
+                <div class="nl-model-stat">
+                    <span>Eval Accuracy</span>
+                    <strong>{MODEL_EVAL["eval_accuracy"]:.1%}</strong>
+                </div>
+                <div class="nl-model-stat">
+                    <span>Weighted F1</span>
+                    <strong>{MODEL_EVAL["eval_f1_weighted"]:.1%}</strong>
+                </div>
+                <div class="nl-model-stat">
+                    <span>Eval Loss</span>
+                    <strong>{MODEL_EVAL["eval_loss"]:.3f}</strong>
+                </div>
+                <div class="nl-model-stat">
+                    <span>Epochs</span>
+                    <strong>{MODEL_EVAL["epochs"]}</strong>
+                </div>
+            </div>
+        </div>
+        """,
+        unsafe_allow_html=True,
+    )
+def render_empty_state() -> None:
+    st.markdown(
+        """
+        <div class="nl-empty">
+            <h3>Run a topic analysis</h3>
+            <p>
+                Search a public issue, company, policy, or event to compare retrieved articles by source,
+                model label, and confidence. Results will appear as a dashboard with source-level evidence.
+            </p>
+        </div>
+        """,
+        unsafe_allow_html=True,
+    )
+inject_styles()
+inject_article_card_styles()
+client = DirectPipelineClient()
+if "analysis" not in st.session_state:
+    st.session_state.analysis = None
+if "last_ingest" not in st.session_state:
+    st.session_state.last_ingest = None
+with st.sidebar:
+    st.title("NewsLens")
+    st.caption("News bias analysis dashboard")
+    topic = st.text_input("Topic", value="climate change", max_chars=120)
+    top_k = st.slider("Articles to retrieve", min_value=1, max_value=20, value=10)
+    page_size = st.slider("Articles to ingest", min_value=5, max_value=50, value=15, step=5)
+    with st.expander("Advanced", expanded=False):
+        debug = st.checkbox("Show model internals", value=False)
+    ingest = st.button("Ingest latest articles")
+    analyze = st.button("Analyze topic", type="primary")
+    if st.session_state.last_ingest:
+        st.success(
+            f"Stored {st.session_state.last_ingest['articles_stored']} "
+            f"article(s) for {st.session_state.last_ingest['topic']}."
+        )
+    st.divider()
+    st.caption("Suggested searches")
+    sample_topics = ["climate change", "electric vehicles", "AI regulation", "public health"]
+    selected_sample = st.selectbox(
+        "Sample topics",
+        ["Use typed topic"] + sample_topics,
+        label_visibility="collapsed",
+    )
+    if selected_sample != "Use typed topic":
+        topic = selected_sample
+st.markdown(
+    """
+    <div class="nl-topbar">
+        <div class="nl-kicker">Media Intelligence</div>
+        <h1 class="nl-title">NewsLens Bias Analyzer</h1>
+        <div class="nl-subtitle">
+            Compare how news sources frame a topic using retrieval, source metadata, and a text-bias classifier.
+        </div>
+    </div>
+    """,
+    unsafe_allow_html=True,
+)
+if analyze:
+    if not topic.strip():
+        st.error("Topic cannot be empty.")
+        st.stop()
+    with st.spinner("Analyzing coverage..."):
+        try:
+            st.session_state.analysis = client.analyze(topic.strip(), top_k)
+        except Exception as exc:
+            st.error(str(exc))
+            st.stop()
+if ingest:
+    if not topic.strip():
+        st.error("Topic cannot be empty.")
+        st.stop()
+    with st.spinner("Fetching and indexing articles..."):
+        try:
+            st.session_state.last_ingest = client.ingest(topic.strip(), page_size)
+            st.session_state.analysis = client.analyze(topic.strip(), top_k)
+        except Exception as exc:
+            st.error(str(exc))
+            st.stop()
+data = st.session_state.analysis
+if data is None:
+    render_empty_state()
+    st.stop()
+summary = data.get("summary", {})
+results = data.get("results", [])
+total, biased, bias_ratio = summarize_bias(summary)
+neutral = max(total - biased, 0)
+source_count = len(summary)
+metric_cols = st.columns(4)
+metric_cols[0].metric("Articles", total)
+metric_cols[1].metric("Sources", source_count)
+metric_cols[2].metric("Biased", biased)
+metric_cols[3].metric("Not biased", neutral)
+st.markdown(
+    f"""<div class="nl-insight">{insight_copy(bias_ratio)}</div>""",
+    unsafe_allow_html=True,
+)
+tab_overview, tab_articles, tab_model = st.tabs(["Overview", "Articles", "Model"])
+with tab_overview:
+    st.markdown('<div class="nl-section-heading">Bias Distribution by Source</div>', unsafe_allow_html=True)
+    chart = build_bias_distribution_chart(summary)
+    if chart:
+        st.plotly_chart(chart, use_container_width=True, config={"displayModeBar": False})
+    else:
+        st.warning("No chart data available.")
+    st.markdown('<div class="nl-section-heading">Bias by Political Lean</div>', unsafe_allow_html=True)
+    st.caption("Are left-leaning or right-leaning sources more biased on this topic?")
+    lean_chart = build_lean_bias_chart(results)
+    if lean_chart:
+        st.plotly_chart(lean_chart, use_container_width=True, config={"displayModeBar": False})
+    else:
+        st.warning("Not enough source lean data.")
+with tab_articles:
+    st.markdown('<div class="nl-section-heading">Evidence Articles</div>', unsafe_allow_html=True)
+    if not results:
+        st.warning("No articles found.")
+    else:
+        labels = sorted({article.get("text_label", "Unknown") for article in results})
+        leans = sorted({article.get("source_bias", "Unknown") for article in results})
+        filter_cols = st.columns([1, 1, 1])
+        selected_label = filter_cols[0].selectbox("Classification", ["All"] + labels)
+        selected_lean = filter_cols[1].selectbox("Source lean", ["All"] + leans)
+        sort_by = filter_cols[2].selectbox(
+            "Sort by",
+            ["Confidence", "Similarity", "Source"],
+        )
+        filtered_results = results
+        if selected_label != "All":
+            filtered_results = [
+                article for article in filtered_results
+                if article.get("text_label", "Unknown") == selected_label
+            ]
+        if selected_lean != "All":
+            filtered_results = [
+                article for article in filtered_results
+                if article.get("source_bias", "Unknown") == selected_lean
+            ]
+        if sort_by == "Confidence":
+            filtered_results = sorted(
+                filtered_results,
+                key=lambda article: article.get("confidence", 0),
+                reverse=True,
+            )
+        elif sort_by == "Similarity":
+            filtered_results = sorted(
+                filtered_results,
+                key=lambda article: article.get("similarity_score", 0),
+                reverse=True,
+            )
+        else:
+            filtered_results = sorted(
+                filtered_results,
+                key=lambda article: article.get("source", "Unknown source"),
+            )
+        st.caption(f"Showing {len(filtered_results)} of {len(results)} retrieved articles.")
+        if not filtered_results:
+            st.warning("No articles match the selected filters.")
+        else:
+            grouped = defaultdict(list)
+            for article in filtered_results:
+                grouped[article.get("source", "Unknown source")].append(article)
+            for source, articles in grouped.items():
+                source_bias = articles[0].get("source_bias", "Unknown")
+                st.markdown(
+                    f"""
+                    <div class="nl-source-heading">
+                        <h3>{escape(str(source))}</h3>
+                        <div class="nl-source-meta">{escape(str(source_bias))} source bias | {len(articles)} article(s)</div>
+                    </div>
+                    """,
+                    unsafe_allow_html=True,
+                )
+                for article in articles:
+                    render_article_card(article, debug=debug)
+with tab_model:
+    render_model_panel()
+    st.markdown('<div class="nl-section-heading">Training Run</div>', unsafe_allow_html=True)
+    st.write(
+        "RoBERTa was fine-tuned for binary text-bias classification with LoRA. "
+        "The best supplied run finished at 85.44% evaluation accuracy and 85.46% weighted F1."
+    )
+    st.dataframe(
+        [
+            {"Epoch": 1, "Eval loss": 0.3576, "Accuracy": 0.8432, "Weighted F1": 0.8434},
+            {"Epoch": 2, "Eval loss": 0.3656, "Accuracy": 0.8512, "Weighted F1": 0.8512},
+            {"Epoch": 3, "Eval loss": 0.3933, "Accuracy": 0.8544, "Weighted F1": 0.8546},
+        ],
+        hide_index=True,
+        use_container_width=True,
+    )
+    st.info(
+        "Use these labels as decision support, not ground truth. Bias classification is sensitive "
+        "to dataset definitions, article excerpts, and source coverage."
+    )

src/ui/components/__init__.py ADDED Viewed

File without changes

src/ui/components/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (138 Bytes). View file

src/ui/components/__pycache__/article_card.cpython-313.pyc ADDED Viewed

Binary file (6.52 kB). View file

src/ui/components/__pycache__/charts.cpython-313.pyc ADDED Viewed

Binary file (4.32 kB). View file

src/ui/components/article_card.py ADDED Viewed

	@@ -0,0 +1,172 @@

+from html import escape
+import streamlit as st
+ARTICLE_CARD_STYLES = """
+<style>
+    .nl-article-card {
+        background: #ffffff;
+        border: 1px solid #d8dee9;
+        border-radius: 8px;
+        padding: 1rem;
+        margin: 0.65rem 0 0.9rem 0;
+    }
+    .nl-article-header {
+        display: flex;
+        align-items: flex-start;
+        justify-content: space-between;
+        gap: 1rem;
+    }
+    .nl-article-source {
+        color: #64748b;
+        font-size: 0.78rem;
+        font-weight: 700;
+        letter-spacing: 0.04em;
+        margin-bottom: 0.25rem;
+        text-transform: uppercase;
+    }
+    .nl-article-card h4 {
+        color: #15202b;
+        font-size: 1rem;
+        line-height: 1.35;
+        margin: 0;
+    }
+    .nl-article-card p {
+        color: #475569;
+        line-height: 1.55;
+        margin: 0.65rem 0 0.8rem 0;
+    }
+    .nl-label {
+        border: 1px solid;
+        border-radius: 999px;
+        font-size: 0.75rem;
+        font-weight: 800;
+        padding: 0.25rem 0.55rem;
+        white-space: nowrap;
+    }
+    .nl-confidence-row {
+        color: #64748b;
+        display: flex;
+        justify-content: space-between;
+        font-size: 0.82rem;
+        margin-bottom: 0.3rem;
+    }
+    .nl-confidence-row strong {
+        color: #15202b;
+    }
+    .nl-confidence-track {
+        background: #eef2f7;
+        border-radius: 999px;
+        height: 0.45rem;
+        overflow: hidden;
+        width: 100%;
+    }
+    .nl-confidence-track div {
+        height: 100%;
+    }
+    .nl-read-link {
+        color: #2457c5;
+        display: inline-block;
+        font-weight: 800;
+        margin-top: 0.75rem;
+        text-decoration: none;
+    }
+    .nl-read-link:hover {
+        color: #1f4dac;
+        text-decoration: underline;
+    }
+    @media (max-width: 760px) {
+        .nl-article-header {
+            flex-direction: column;
+            gap: 0.5rem;
+        }
+    }
+</style>
+"""
+def inject_article_card_styles() -> None:
+    st.markdown(ARTICLE_CARD_STYLES, unsafe_allow_html=True)
+def _safe_text(value: object, fallback: str = "") -> str:
+    if value is None:
+        return fallback
+    text = str(value).strip()
+    return text or fallback
+def _label_style(label: str) -> tuple[str, str]:
+    if label.lower() == "biased":
+        return "#c24138", "#fff4f2"
+    return "#247857", "#effaf5"
+def smart_truncate(text, limit=80):
+    if len(text) <= limit:
+        return text
+    return text[:limit].rsplit(" ", 1)[0] + "..."
+def render_article_card(article: dict, debug: bool = False) -> None:
+    label = _safe_text(article.get("text_label"), "Unknown")
+    confidence = float(article.get("confidence", 0) or 0)
+    source = _safe_text(article.get("source"), "Unknown source")
+    source_bias = _safe_text(article.get("source_bias"), "Unknown bias")
+    source_bias_provenance = _safe_text(article.get("source_bias_provenance"))
+    source_meta = f"{source} / {source_bias}"
+    if source_bias_provenance and source_bias_provenance != "manual_demo":
+        source_meta = f"{source_meta} / {source_bias_provenance}"
+    url = _safe_text(article.get("url"), "#")
+    description = _safe_text(article.get("description"))
+    fallback_text = _safe_text(article.get("text"))[:280]
+    excerpt = description or fallback_text or "No article excerpt was returned by the API."
+    title = _safe_text(article.get("title")) or smart_truncate(excerpt, 80)
+    accent, soft = _label_style(label)
+    confidence_pct = max(0, min(confidence, 1)) * 100
+    st.markdown(
+        f"""
+        <div class="nl-article-card">
+            <div class="nl-article-header">
+                <div>
+                    <div class="nl-article-source">{escape(source_meta)}</div>
+                    <h4>{escape(title)}</h4>
+                </div>
+                <span class="nl-label" style="color:{accent}; background:{soft}; border-color:{accent};">
+                    {escape(label)}
+                </span>
+            </div>
+            <p>{escape(excerpt)}</p>
+            <div class="nl-confidence-row">
+                <span>Confidence</span>
+                <strong>{confidence:.2f}</strong>
+            </div>
+            <div class="nl-confidence-track">
+                <div style="width:{confidence_pct:.0f}%; background:{accent};"></div>
+            </div>
+            <a class="nl-read-link" href="{escape(url)}" target="_blank" rel="noopener noreferrer">
+                Read article
+            </a>
+        </div>
+        """,
+        unsafe_allow_html=True,
+    )
+    if debug:
+        with st.expander("Model internals", expanded=False):
+            if "similarity_score" in article:
+                st.caption(f"Similarity score: {article['similarity_score']:.4f}")
+            if "probabilities" in article:
+                st.json(article["probabilities"])

src/ui/components/charts.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import pandas as pd
+import plotly.express as px
+def build_bias_distribution_chart(summary: dict):
+    rows = []
+    for source, stats in summary.items():
+        biased = stats.get("Biased", 0)
+        not_biased = stats.get("Not Biased", stats.get("Not_Biased", 0))
+        total = stats.get("total", biased + not_biased)
+        rows.append(
+            {
+                "Source": source,
+                "Biased": biased,
+                "Not biased": not_biased,
+                "Total": total,
+            }
+        )
+    df = pd.DataFrame(rows)
+    if df.empty:
+        return None
+    df = df.sort_values("Total", ascending=False)
+    df_melted = df.melt(
+        id_vars=["Source", "Total"],
+        value_vars=["Biased", "Not biased"],
+        var_name="Classification",
+        value_name="Articles",
+    )
+    fig = px.bar(
+        df_melted,
+        x="Source",
+        y="Articles",
+        color="Classification",
+        barmode="group",
+        text="Articles",
+        color_discrete_map={
+            "Biased": "#c24138",
+            "Not biased": "#247857",
+        },
+    )
+    fig.update_traces(
+        textposition="outside",
+        marker_line_width=0,
+        cliponaxis=False,
+    )
+    fig.update_layout(
+        height=430,
+        margin=dict(l=12, r=12, t=24, b=12),
+        paper_bgcolor="rgba(0,0,0,0)",
+        plot_bgcolor="rgba(0,0,0,0)",
+        bargap=0.26,
+        legend=dict(
+            orientation="h",
+            yanchor="bottom",
+            y=1.02,
+            xanchor="right",
+            x=1,
+            title=None,
+        ),
+        xaxis=dict(
+            title=None,
+            tickangle=-20,
+            showgrid=False,
+            linecolor="#d8dee9",
+        ),
+        yaxis=dict(
+            title="Articles",
+            gridcolor="#e8edf4",
+            zeroline=False,
+        ),
+        font=dict(color="#15202b", family="Arial, sans-serif"),
+    )
+    return fig
+def build_lean_bias_chart(results: list) -> object:
+    from collections import defaultdict
+    lean_counts = defaultdict(lambda: {"Biased": 0, "Not biased": 0})
+    for article in results:
+        lean = article.get("source_bias", "Unknown")
+        label = article.get("text_label", "Unknown")
+        if label == "Biased":
+            lean_counts[lean]["Biased"] += 1
+        elif label == "Not Biased":
+            lean_counts[lean]["Not biased"] += 1
+    rows = []
+    for lean, counts in lean_counts.items():
+        rows.append({
+            "Lean": lean,
+            "Biased": counts["Biased"],
+            "Not biased": counts["Not biased"],
+        })
+    df = pd.DataFrame(rows)
+    if df.empty:
+        return None
+    lean_order = ["Left", "Center-Left", "Center", "Center-Right", "Right", "Unknown"]
+    df["Lean"] = pd.Categorical(df["Lean"], categories=lean_order, ordered=True)
+    df = df.sort_values("Lean")
+    df_melted = df.melt(
+        id_vars="Lean",
+        value_vars=["Biased", "Not biased"],
+        var_name="Classification",
+        value_name="Articles",
+    )
+    fig = px.bar(
+        df_melted,
+        x="Lean",
+        y="Articles",
+        color="Classification",
+        barmode="group",
+        text="Articles",
+        color_discrete_map={"Biased": "#c24138", "Not biased": "#247857"},
+    )
+    fig.update_traces(textposition="outside", marker_line_width=0, cliponaxis=False)
+    fig.update_layout(
+        height=380,
+        margin=dict(l=12, r=12, t=24, b=12),
+        paper_bgcolor="rgba(0,0,0,0)",
+        plot_bgcolor="rgba(0,0,0,0)",
+        bargap=0.3,
+        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1, title=None),
+        xaxis=dict(title=None, showgrid=False, linecolor="#d8dee9"),
+        yaxis=dict(title="Articles", gridcolor="#e8edf4", zeroline=False),
+        font=dict(color="#15202b", family="Arial, sans-serif"),
+    )
+    return fig

src/ui/services/__init__.py ADDED Viewed

File without changes

src/ui/services/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (136 Bytes). View file

src/ui/services/__pycache__/api_client.cpython-313.pyc ADDED Viewed

Binary file (3.41 kB). View file

src/ui/services/api_client.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import requests
+from src.config import API_BASE_URL
+class NewsLensClient:
+    def __init__(self, base_url: str = API_BASE_URL):
+        self.base_url = base_url
+    def analyze(self, topic: str, top_k: int = 10) -> dict:
+        url = f"{self.base_url}/analyze"
+        payload = {
+            "topic": topic,
+            "top_k": top_k
+        }
+        try:
+            response = requests.post(url, json=payload, timeout=30)
+            response.raise_for_status()
+            return response.json()
+        except requests.exceptions.RequestException as e:
+            raise RuntimeError(f"API request failed: {str(e)}")
+    def ingest(self, topic: str, page_size: int = 10) -> dict:
+        url = f"{self.base_url}/ingest"
+        payload = {
+            "topic": topic,
+            "page_size": page_size,
+        }
+        try:
+            response = requests.post(url, json=payload, timeout=45)
+            response.raise_for_status()
+            return response.json()
+        except requests.exceptions.RequestException as e:
+            raise RuntimeError(f"API request failed: {str(e)}")
+class DirectPipelineClient:
+    def __init__(self):
+        from src.analysis.rag_pipeline import NewsAnalysisPipeline
+        self.pipeline = NewsAnalysisPipeline()
+    def analyze(self, topic: str, top_k: int = 10) -> dict:
+        return self.pipeline.analyze(topic, top_k)
+    def ingest(self, topic: str, page_size: int = 10) -> dict:
+        from src.ingestion.newsapi_client import fetch_news
+        articles = fetch_news(topic=topic, page_size=page_size)
+        self.pipeline.vector_store.store_articles(articles)
+        return {
+            "topic": topic,
+            "articles_fetched": len(articles),
+            "articles_stored": len(articles),
+            "status": "success"
+        }