newslens / src /ui /app.py
Jitender20's picture
Add NewsLens Streamlit app
208266a
from collections import defaultdict
from html import escape
import streamlit as st
from src.ui.components.article_card import inject_article_card_styles, render_article_card
from src.ui.components.charts import build_bias_distribution_chart, build_lean_bias_chart
from src.ui.services.api_client import NewsLensClient
from src.ui.services.api_client import DirectPipelineClient
MODEL_EVAL = {
"eval_accuracy": 0.8544,
"eval_f1_weighted": 0.8546,
"eval_loss": 0.3933,
"train_loss": 0.3888,
"epochs": 3,
}
st.set_page_config(
page_title="NewsLens",
layout="wide",
initial_sidebar_state="expanded",
)
def inject_styles() -> None:
st.markdown(
"""
<style>
:root {
--nl-ink: #15202b;
--nl-muted: #64748b;
--nl-line: #d8dee9;
--nl-panel: #ffffff;
--nl-soft: #f6f8fb;
--nl-blue: #2457c5;
--nl-teal: #087f8c;
--nl-red: #c24138;
--nl-green: #247857;
}
.block-container {
padding-top: 1.4rem;
padding-bottom: 2rem;
max-width: 1240px;
}
[data-testid="stSidebar"] {
background: #f7f9fc;
border-right: 1px solid var(--nl-line);
}
[data-testid="stSidebar"] h1,
[data-testid="stSidebar"] h2,
[data-testid="stSidebar"] h3 {
color: var(--nl-ink);
}
h1, h2, h3 {
letter-spacing: 0;
}
.nl-topbar {
border-bottom: 1px solid var(--nl-line);
padding: 0 0 1rem 0;
margin-bottom: 1.2rem;
}
.nl-kicker {
color: var(--nl-teal);
font-size: 0.78rem;
font-weight: 800;
letter-spacing: 0.08em;
text-transform: uppercase;
margin-bottom: 0.25rem;
}
.nl-title {
color: var(--nl-ink);
font-size: 2.25rem;
font-weight: 800;
line-height: 1.1;
margin: 0;
}
.nl-subtitle {
color: var(--nl-muted);
max-width: 780px;
margin-top: 0.55rem;
font-size: 1rem;
line-height: 1.55;
}
.nl-empty {
background: linear-gradient(135deg, #f7f9fc 0%, #eef6f2 100%);
border: 1px solid var(--nl-line);
border-radius: 8px;
padding: 2.2rem;
margin-top: 1rem;
}
.nl-empty h3 {
color: var(--nl-ink);
margin: 0 0 0.5rem 0;
}
.nl-empty p {
color: var(--nl-muted);
margin: 0;
line-height: 1.6;
}
.nl-section-heading {
color: var(--nl-ink);
font-size: 1.05rem;
font-weight: 800;
margin: 1.1rem 0 0.45rem 0;
}
.nl-source-heading {
border-top: 1px solid var(--nl-line);
color: var(--nl-ink);
display: flex;
justify-content: space-between;
align-items: center;
gap: 1rem;
padding-top: 1rem;
margin: 1.1rem 0 0.5rem 0;
}
.nl-source-heading h3 {
font-size: 1.05rem;
margin: 0;
}
.nl-source-meta {
color: var(--nl-muted);
font-size: 0.85rem;
white-space: nowrap;
}
.nl-insight {
border-left: 4px solid var(--nl-teal);
background: #f5fbfa;
padding: 0.9rem 1rem;
color: var(--nl-ink);
margin: 0.25rem 0 0.9rem 0;
}
.nl-insight strong {
color: var(--nl-teal);
}
.nl-model-panel {
background: #f7f9fc;
border: 1px solid var(--nl-line);
border-radius: 8px;
padding: 1rem;
margin-top: 0.8rem;
}
.nl-model-panel h3 {
color: var(--nl-ink);
font-size: 1rem;
margin: 0 0 0.6rem 0;
}
.nl-model-grid {
display: grid;
gap: 0.65rem;
grid-template-columns: repeat(4, minmax(0, 1fr));
}
.nl-model-stat {
background: #ffffff;
border: 1px solid var(--nl-line);
border-radius: 8px;
padding: 0.75rem;
}
.nl-model-stat span {
color: var(--nl-muted);
display: block;
font-size: 0.72rem;
font-weight: 800;
letter-spacing: 0.04em;
text-transform: uppercase;
}
.nl-model-stat strong {
color: var(--nl-ink);
display: block;
font-size: 1.25rem;
margin-top: 0.2rem;
}
div[data-testid="stMetric"] {
background: var(--nl-panel);
border: 1px solid var(--nl-line);
border-radius: 8px;
padding: 0.85rem 1rem;
}
div[data-testid="stMetric"] label {
color: var(--nl-muted);
}
.stButton > button {
background: var(--nl-blue);
border: 1px solid var(--nl-blue);
color: #ffffff;
font-weight: 700;
min-height: 2.6rem;
width: 100%;
}
.stButton > button:hover {
background: #1f4dac;
border-color: #1f4dac;
color: #ffffff;
}
@media (max-width: 760px) {
.nl-title {
font-size: 1.75rem;
}
.nl-empty {
padding: 1.4rem;
}
.nl-source-heading {
align-items: flex-start;
flex-direction: column;
gap: 0.2rem;
}
.nl-model-grid {
grid-template-columns: repeat(2, minmax(0, 1fr));
}
}
</style>
""",
unsafe_allow_html=True,
)
def summarize_bias(summary: dict) -> tuple[int, int, float]:
total = sum(source.get("total", 0) for source in summary.values())
biased = sum(source.get("Biased", 0) for source in summary.values())
ratio = biased / total if total else 0
return total, biased, ratio
def insight_copy(ratio: float) -> str:
percent = int(round(ratio * 100))
if ratio >= 0.6:
return f"<strong>{percent}% biased coverage.</strong> The retrieved articles lean noticeably toward biased framing."
if ratio <= 0.4:
return f"<strong>{percent}% biased coverage.</strong> The article set is mostly neutral by the current model."
return f"<strong>{percent}% biased coverage.</strong> The result set is mixed and worth comparing source by source."
def render_model_panel() -> None:
st.markdown(
f"""
<div class="nl-model-panel">
<h3>Model Snapshot</h3>
<div class="nl-model-grid">
<div class="nl-model-stat">
<span>Eval Accuracy</span>
<strong>{MODEL_EVAL["eval_accuracy"]:.1%}</strong>
</div>
<div class="nl-model-stat">
<span>Weighted F1</span>
<strong>{MODEL_EVAL["eval_f1_weighted"]:.1%}</strong>
</div>
<div class="nl-model-stat">
<span>Eval Loss</span>
<strong>{MODEL_EVAL["eval_loss"]:.3f}</strong>
</div>
<div class="nl-model-stat">
<span>Epochs</span>
<strong>{MODEL_EVAL["epochs"]}</strong>
</div>
</div>
</div>
""",
unsafe_allow_html=True,
)
def render_empty_state() -> None:
st.markdown(
"""
<div class="nl-empty">
<h3>Run a topic analysis</h3>
<p>
Search a public issue, company, policy, or event to compare retrieved articles by source,
model label, and confidence. Results will appear as a dashboard with source-level evidence.
</p>
</div>
""",
unsafe_allow_html=True,
)
inject_styles()
inject_article_card_styles()
client = DirectPipelineClient()
if "analysis" not in st.session_state:
st.session_state.analysis = None
if "last_ingest" not in st.session_state:
st.session_state.last_ingest = None
with st.sidebar:
st.title("NewsLens")
st.caption("News bias analysis dashboard")
topic = st.text_input("Topic", value="climate change", max_chars=120)
top_k = st.slider("Articles to retrieve", min_value=1, max_value=20, value=10)
page_size = st.slider("Articles to ingest", min_value=5, max_value=50, value=15, step=5)
with st.expander("Advanced", expanded=False):
debug = st.checkbox("Show model internals", value=False)
ingest = st.button("Ingest latest articles")
analyze = st.button("Analyze topic", type="primary")
if st.session_state.last_ingest:
st.success(
f"Stored {st.session_state.last_ingest['articles_stored']} "
f"article(s) for {st.session_state.last_ingest['topic']}."
)
st.divider()
st.caption("Suggested searches")
sample_topics = ["climate change", "electric vehicles", "AI regulation", "public health"]
selected_sample = st.selectbox(
"Sample topics",
["Use typed topic"] + sample_topics,
label_visibility="collapsed",
)
if selected_sample != "Use typed topic":
topic = selected_sample
st.markdown(
"""
<div class="nl-topbar">
<div class="nl-kicker">Media Intelligence</div>
<h1 class="nl-title">NewsLens Bias Analyzer</h1>
<div class="nl-subtitle">
Compare how news sources frame a topic using retrieval, source metadata, and a text-bias classifier.
</div>
</div>
""",
unsafe_allow_html=True,
)
if analyze:
if not topic.strip():
st.error("Topic cannot be empty.")
st.stop()
with st.spinner("Analyzing coverage..."):
try:
st.session_state.analysis = client.analyze(topic.strip(), top_k)
except Exception as exc:
st.error(str(exc))
st.stop()
if ingest:
if not topic.strip():
st.error("Topic cannot be empty.")
st.stop()
with st.spinner("Fetching and indexing articles..."):
try:
st.session_state.last_ingest = client.ingest(topic.strip(), page_size)
st.session_state.analysis = client.analyze(topic.strip(), top_k)
except Exception as exc:
st.error(str(exc))
st.stop()
data = st.session_state.analysis
if data is None:
render_empty_state()
st.stop()
summary = data.get("summary", {})
results = data.get("results", [])
total, biased, bias_ratio = summarize_bias(summary)
neutral = max(total - biased, 0)
source_count = len(summary)
metric_cols = st.columns(4)
metric_cols[0].metric("Articles", total)
metric_cols[1].metric("Sources", source_count)
metric_cols[2].metric("Biased", biased)
metric_cols[3].metric("Not biased", neutral)
st.markdown(
f"""<div class="nl-insight">{insight_copy(bias_ratio)}</div>""",
unsafe_allow_html=True,
)
tab_overview, tab_articles, tab_model = st.tabs(["Overview", "Articles", "Model"])
with tab_overview:
st.markdown('<div class="nl-section-heading">Bias Distribution by Source</div>', unsafe_allow_html=True)
chart = build_bias_distribution_chart(summary)
if chart:
st.plotly_chart(chart, use_container_width=True, config={"displayModeBar": False})
else:
st.warning("No chart data available.")
st.markdown('<div class="nl-section-heading">Bias by Political Lean</div>', unsafe_allow_html=True)
st.caption("Are left-leaning or right-leaning sources more biased on this topic?")
lean_chart = build_lean_bias_chart(results)
if lean_chart:
st.plotly_chart(lean_chart, use_container_width=True, config={"displayModeBar": False})
else:
st.warning("Not enough source lean data.")
with tab_articles:
st.markdown('<div class="nl-section-heading">Evidence Articles</div>', unsafe_allow_html=True)
if not results:
st.warning("No articles found.")
else:
labels = sorted({article.get("text_label", "Unknown") for article in results})
leans = sorted({article.get("source_bias", "Unknown") for article in results})
filter_cols = st.columns([1, 1, 1])
selected_label = filter_cols[0].selectbox("Classification", ["All"] + labels)
selected_lean = filter_cols[1].selectbox("Source lean", ["All"] + leans)
sort_by = filter_cols[2].selectbox(
"Sort by",
["Confidence", "Similarity", "Source"],
)
filtered_results = results
if selected_label != "All":
filtered_results = [
article for article in filtered_results
if article.get("text_label", "Unknown") == selected_label
]
if selected_lean != "All":
filtered_results = [
article for article in filtered_results
if article.get("source_bias", "Unknown") == selected_lean
]
if sort_by == "Confidence":
filtered_results = sorted(
filtered_results,
key=lambda article: article.get("confidence", 0),
reverse=True,
)
elif sort_by == "Similarity":
filtered_results = sorted(
filtered_results,
key=lambda article: article.get("similarity_score", 0),
reverse=True,
)
else:
filtered_results = sorted(
filtered_results,
key=lambda article: article.get("source", "Unknown source"),
)
st.caption(f"Showing {len(filtered_results)} of {len(results)} retrieved articles.")
if not filtered_results:
st.warning("No articles match the selected filters.")
else:
grouped = defaultdict(list)
for article in filtered_results:
grouped[article.get("source", "Unknown source")].append(article)
for source, articles in grouped.items():
source_bias = articles[0].get("source_bias", "Unknown")
st.markdown(
f"""
<div class="nl-source-heading">
<h3>{escape(str(source))}</h3>
<div class="nl-source-meta">{escape(str(source_bias))} source bias | {len(articles)} article(s)</div>
</div>
""",
unsafe_allow_html=True,
)
for article in articles:
render_article_card(article, debug=debug)
with tab_model:
render_model_panel()
st.markdown('<div class="nl-section-heading">Training Run</div>', unsafe_allow_html=True)
st.write(
"RoBERTa was fine-tuned for binary text-bias classification with LoRA. "
"The best supplied run finished at 85.44% evaluation accuracy and 85.46% weighted F1."
)
st.dataframe(
[
{"Epoch": 1, "Eval loss": 0.3576, "Accuracy": 0.8432, "Weighted F1": 0.8434},
{"Epoch": 2, "Eval loss": 0.3656, "Accuracy": 0.8512, "Weighted F1": 0.8512},
{"Epoch": 3, "Eval loss": 0.3933, "Accuracy": 0.8544, "Weighted F1": 0.8546},
],
hide_index=True,
use_container_width=True,
)
st.info(
"Use these labels as decision support, not ground truth. Bias classification is sensitive "
"to dataset definitions, article excerpts, and source coverage."
)