from collections import defaultdict
from html import escape
import streamlit as st
from src.ui.components.article_card import inject_article_card_styles, render_article_card
from src.ui.components.charts import build_bias_distribution_chart, build_lean_bias_chart
from src.ui.services.api_client import NewsLensClient
from src.ui.services.api_client import DirectPipelineClient
MODEL_EVAL = {
"eval_accuracy": 0.8544,
"eval_f1_weighted": 0.8546,
"eval_loss": 0.3933,
"train_loss": 0.3888,
"epochs": 3,
}
st.set_page_config(
page_title="NewsLens",
layout="wide",
initial_sidebar_state="expanded",
)
def inject_styles() -> None:
st.markdown(
"""
""",
unsafe_allow_html=True,
)
def summarize_bias(summary: dict) -> tuple[int, int, float]:
total = sum(source.get("total", 0) for source in summary.values())
biased = sum(source.get("Biased", 0) for source in summary.values())
ratio = biased / total if total else 0
return total, biased, ratio
def insight_copy(ratio: float) -> str:
percent = int(round(ratio * 100))
if ratio >= 0.6:
return f"{percent}% biased coverage. The retrieved articles lean noticeably toward biased framing."
if ratio <= 0.4:
return f"{percent}% biased coverage. The article set is mostly neutral by the current model."
return f"{percent}% biased coverage. The result set is mixed and worth comparing source by source."
def render_model_panel() -> None:
st.markdown(
f"""
Model Snapshot
Eval Accuracy
{MODEL_EVAL["eval_accuracy"]:.1%}
Weighted F1
{MODEL_EVAL["eval_f1_weighted"]:.1%}
Eval Loss
{MODEL_EVAL["eval_loss"]:.3f}
Epochs
{MODEL_EVAL["epochs"]}
""",
unsafe_allow_html=True,
)
def render_empty_state() -> None:
st.markdown(
"""
Run a topic analysis
Search a public issue, company, policy, or event to compare retrieved articles by source,
model label, and confidence. Results will appear as a dashboard with source-level evidence.
""",
unsafe_allow_html=True,
)
inject_styles()
inject_article_card_styles()
client = DirectPipelineClient()
if "analysis" not in st.session_state:
st.session_state.analysis = None
if "last_ingest" not in st.session_state:
st.session_state.last_ingest = None
with st.sidebar:
st.title("NewsLens")
st.caption("News bias analysis dashboard")
topic = st.text_input("Topic", value="climate change", max_chars=120)
top_k = st.slider("Articles to retrieve", min_value=1, max_value=20, value=10)
page_size = st.slider("Articles to ingest", min_value=5, max_value=50, value=15, step=5)
with st.expander("Advanced", expanded=False):
debug = st.checkbox("Show model internals", value=False)
ingest = st.button("Ingest latest articles")
analyze = st.button("Analyze topic", type="primary")
if st.session_state.last_ingest:
st.success(
f"Stored {st.session_state.last_ingest['articles_stored']} "
f"article(s) for {st.session_state.last_ingest['topic']}."
)
st.divider()
st.caption("Suggested searches")
sample_topics = ["climate change", "electric vehicles", "AI regulation", "public health"]
selected_sample = st.selectbox(
"Sample topics",
["Use typed topic"] + sample_topics,
label_visibility="collapsed",
)
if selected_sample != "Use typed topic":
topic = selected_sample
st.markdown(
"""
Media Intelligence
NewsLens Bias Analyzer
Compare how news sources frame a topic using retrieval, source metadata, and a text-bias classifier.
""",
unsafe_allow_html=True,
)
if analyze:
if not topic.strip():
st.error("Topic cannot be empty.")
st.stop()
with st.spinner("Analyzing coverage..."):
try:
st.session_state.analysis = client.analyze(topic.strip(), top_k)
except Exception as exc:
st.error(str(exc))
st.stop()
if ingest:
if not topic.strip():
st.error("Topic cannot be empty.")
st.stop()
with st.spinner("Fetching and indexing articles..."):
try:
st.session_state.last_ingest = client.ingest(topic.strip(), page_size)
st.session_state.analysis = client.analyze(topic.strip(), top_k)
except Exception as exc:
st.error(str(exc))
st.stop()
data = st.session_state.analysis
if data is None:
render_empty_state()
st.stop()
summary = data.get("summary", {})
results = data.get("results", [])
total, biased, bias_ratio = summarize_bias(summary)
neutral = max(total - biased, 0)
source_count = len(summary)
metric_cols = st.columns(4)
metric_cols[0].metric("Articles", total)
metric_cols[1].metric("Sources", source_count)
metric_cols[2].metric("Biased", biased)
metric_cols[3].metric("Not biased", neutral)
st.markdown(
f"""{insight_copy(bias_ratio)}
""",
unsafe_allow_html=True,
)
tab_overview, tab_articles, tab_model = st.tabs(["Overview", "Articles", "Model"])
with tab_overview:
st.markdown('Bias Distribution by Source
', unsafe_allow_html=True)
chart = build_bias_distribution_chart(summary)
if chart:
st.plotly_chart(chart, use_container_width=True, config={"displayModeBar": False})
else:
st.warning("No chart data available.")
st.markdown('Bias by Political Lean
', unsafe_allow_html=True)
st.caption("Are left-leaning or right-leaning sources more biased on this topic?")
lean_chart = build_lean_bias_chart(results)
if lean_chart:
st.plotly_chart(lean_chart, use_container_width=True, config={"displayModeBar": False})
else:
st.warning("Not enough source lean data.")
with tab_articles:
st.markdown('Evidence Articles
', unsafe_allow_html=True)
if not results:
st.warning("No articles found.")
else:
labels = sorted({article.get("text_label", "Unknown") for article in results})
leans = sorted({article.get("source_bias", "Unknown") for article in results})
filter_cols = st.columns([1, 1, 1])
selected_label = filter_cols[0].selectbox("Classification", ["All"] + labels)
selected_lean = filter_cols[1].selectbox("Source lean", ["All"] + leans)
sort_by = filter_cols[2].selectbox(
"Sort by",
["Confidence", "Similarity", "Source"],
)
filtered_results = results
if selected_label != "All":
filtered_results = [
article for article in filtered_results
if article.get("text_label", "Unknown") == selected_label
]
if selected_lean != "All":
filtered_results = [
article for article in filtered_results
if article.get("source_bias", "Unknown") == selected_lean
]
if sort_by == "Confidence":
filtered_results = sorted(
filtered_results,
key=lambda article: article.get("confidence", 0),
reverse=True,
)
elif sort_by == "Similarity":
filtered_results = sorted(
filtered_results,
key=lambda article: article.get("similarity_score", 0),
reverse=True,
)
else:
filtered_results = sorted(
filtered_results,
key=lambda article: article.get("source", "Unknown source"),
)
st.caption(f"Showing {len(filtered_results)} of {len(results)} retrieved articles.")
if not filtered_results:
st.warning("No articles match the selected filters.")
else:
grouped = defaultdict(list)
for article in filtered_results:
grouped[article.get("source", "Unknown source")].append(article)
for source, articles in grouped.items():
source_bias = articles[0].get("source_bias", "Unknown")
st.markdown(
f"""
{escape(str(source))}
{escape(str(source_bias))} source bias | {len(articles)} article(s)
""",
unsafe_allow_html=True,
)
for article in articles:
render_article_card(article, debug=debug)
with tab_model:
render_model_panel()
st.markdown('Training Run
', unsafe_allow_html=True)
st.write(
"RoBERTa was fine-tuned for binary text-bias classification with LoRA. "
"The best supplied run finished at 85.44% evaluation accuracy and 85.46% weighted F1."
)
st.dataframe(
[
{"Epoch": 1, "Eval loss": 0.3576, "Accuracy": 0.8432, "Weighted F1": 0.8434},
{"Epoch": 2, "Eval loss": 0.3656, "Accuracy": 0.8512, "Weighted F1": 0.8512},
{"Epoch": 3, "Eval loss": 0.3933, "Accuracy": 0.8544, "Weighted F1": 0.8546},
],
hide_index=True,
use_container_width=True,
)
st.info(
"Use these labels as decision support, not ground truth. Bias classification is sensitive "
"to dataset definitions, article excerpts, and source coverage."
)