Spaces:

Jitender20
/

newslens

Sleeping

App Files Files Community

newslens / src /ui /app.py

Jitender20

Add NewsLens Streamlit app

208266a 9 days ago

raw

history blame contribute delete

16.6 kB

	from collections import defaultdict
	from html import escape

	import streamlit as st

	from src.ui.components.article_card import inject_article_card_styles, render_article_card
	from src.ui.components.charts import build_bias_distribution_chart, build_lean_bias_chart
	from src.ui.services.api_client import NewsLensClient
	from src.ui.services.api_client import DirectPipelineClient

	MODEL_EVAL = {
	"eval_accuracy": 0.8544,
	"eval_f1_weighted": 0.8546,
	"eval_loss": 0.3933,
	"train_loss": 0.3888,
	"epochs": 3,
	}


	st.set_page_config(
	page_title="NewsLens",
	layout="wide",
	initial_sidebar_state="expanded",
	)


	def inject_styles() -> None:
	st.markdown(
	"""
	<style>
	:root {
	--nl-ink: #15202b;
	--nl-muted: #64748b;
	--nl-line: #d8dee9;
	--nl-panel: #ffffff;
	--nl-soft: #f6f8fb;
	--nl-blue: #2457c5;
	--nl-teal: #087f8c;
	--nl-red: #c24138;
	--nl-green: #247857;
	}

	.block-container {
	padding-top: 1.4rem;
	padding-bottom: 2rem;
	max-width: 1240px;
	}

	[data-testid="stSidebar"] {
	background: #f7f9fc;
	border-right: 1px solid var(--nl-line);
	}

	[data-testid="stSidebar"] h1,
	[data-testid="stSidebar"] h2,
	[data-testid="stSidebar"] h3 {
	color: var(--nl-ink);
	}

	h1, h2, h3 {
	letter-spacing: 0;
	}

	.nl-topbar {
	border-bottom: 1px solid var(--nl-line);
	padding: 0 0 1rem 0;
	margin-bottom: 1.2rem;
	}

	.nl-kicker {
	color: var(--nl-teal);
	font-size: 0.78rem;
	font-weight: 800;
	letter-spacing: 0.08em;
	text-transform: uppercase;
	margin-bottom: 0.25rem;
	}

	.nl-title {
	color: var(--nl-ink);
	font-size: 2.25rem;
	font-weight: 800;
	line-height: 1.1;
	margin: 0;
	}

	.nl-subtitle {
	color: var(--nl-muted);
	max-width: 780px;
	margin-top: 0.55rem;
	font-size: 1rem;
	line-height: 1.55;
	}

	.nl-empty {
	background: linear-gradient(135deg, #f7f9fc 0%, #eef6f2 100%);
	border: 1px solid var(--nl-line);
	border-radius: 8px;
	padding: 2.2rem;
	margin-top: 1rem;
	}

	.nl-empty h3 {
	color: var(--nl-ink);
	margin: 0 0 0.5rem 0;
	}

	.nl-empty p {
	color: var(--nl-muted);
	margin: 0;
	line-height: 1.6;
	}

	.nl-section-heading {
	color: var(--nl-ink);
	font-size: 1.05rem;
	font-weight: 800;
	margin: 1.1rem 0 0.45rem 0;
	}

	.nl-source-heading {
	border-top: 1px solid var(--nl-line);
	color: var(--nl-ink);
	display: flex;
	justify-content: space-between;
	align-items: center;
	gap: 1rem;
	padding-top: 1rem;
	margin: 1.1rem 0 0.5rem 0;
	}

	.nl-source-heading h3 {
	font-size: 1.05rem;
	margin: 0;
	}

	.nl-source-meta {
	color: var(--nl-muted);
	font-size: 0.85rem;
	white-space: nowrap;
	}

	.nl-insight {
	border-left: 4px solid var(--nl-teal);
	background: #f5fbfa;
	padding: 0.9rem 1rem;
	color: var(--nl-ink);
	margin: 0.25rem 0 0.9rem 0;
	}

	.nl-insight strong {
	color: var(--nl-teal);
	}

	.nl-model-panel {
	background: #f7f9fc;
	border: 1px solid var(--nl-line);
	border-radius: 8px;
	padding: 1rem;
	margin-top: 0.8rem;
	}

	.nl-model-panel h3 {
	color: var(--nl-ink);
	font-size: 1rem;
	margin: 0 0 0.6rem 0;
	}

	.nl-model-grid {
	display: grid;
	gap: 0.65rem;
	grid-template-columns: repeat(4, minmax(0, 1fr));
	}

	.nl-model-stat {
	background: #ffffff;
	border: 1px solid var(--nl-line);
	border-radius: 8px;
	padding: 0.75rem;
	}

	.nl-model-stat span {
	color: var(--nl-muted);
	display: block;
	font-size: 0.72rem;
	font-weight: 800;
	letter-spacing: 0.04em;
	text-transform: uppercase;
	}

	.nl-model-stat strong {
	color: var(--nl-ink);
	display: block;
	font-size: 1.25rem;
	margin-top: 0.2rem;
	}

	div[data-testid="stMetric"] {
	background: var(--nl-panel);
	border: 1px solid var(--nl-line);
	border-radius: 8px;
	padding: 0.85rem 1rem;
	}

	div[data-testid="stMetric"] label {
	color: var(--nl-muted);
	}

	.stButton > button {
	background: var(--nl-blue);
	border: 1px solid var(--nl-blue);
	color: #ffffff;
	font-weight: 700;
	min-height: 2.6rem;
	width: 100%;
	}

	.stButton > button:hover {
	background: #1f4dac;
	border-color: #1f4dac;
	color: #ffffff;
	}

	@media (max-width: 760px) {
	.nl-title {
	font-size: 1.75rem;
	}

	.nl-empty {
	padding: 1.4rem;
	}

	.nl-source-heading {
	align-items: flex-start;
	flex-direction: column;
	gap: 0.2rem;
	}

	.nl-model-grid {
	grid-template-columns: repeat(2, minmax(0, 1fr));
	}
	}
	</style>
	""",
	unsafe_allow_html=True,
	)


	def summarize_bias(summary: dict) -> tuple[int, int, float]:
	total = sum(source.get("total", 0) for source in summary.values())
	biased = sum(source.get("Biased", 0) for source in summary.values())
	ratio = biased / total if total else 0
	return total, biased, ratio


	def insight_copy(ratio: float) -> str:
	percent = int(round(ratio * 100))
	if ratio >= 0.6:
	return f"<strong>{percent}% biased coverage.</strong> The retrieved articles lean noticeably toward biased framing."
	if ratio <= 0.4:
	return f"<strong>{percent}% biased coverage.</strong> The article set is mostly neutral by the current model."
	return f"<strong>{percent}% biased coverage.</strong> The result set is mixed and worth comparing source by source."


	def render_model_panel() -> None:
	st.markdown(
	f"""
	<div class="nl-model-panel">
	<h3>Model Snapshot</h3>
	<div class="nl-model-grid">
	<div class="nl-model-stat">
	<span>Eval Accuracy</span>
	<strong>{MODEL_EVAL["eval_accuracy"]:.1%}</strong>
	</div>
	<div class="nl-model-stat">
	<span>Weighted F1</span>
	<strong>{MODEL_EVAL["eval_f1_weighted"]:.1%}</strong>
	</div>
	<div class="nl-model-stat">
	<span>Eval Loss</span>
	<strong>{MODEL_EVAL["eval_loss"]:.3f}</strong>
	</div>
	<div class="nl-model-stat">
	<span>Epochs</span>
	<strong>{MODEL_EVAL["epochs"]}</strong>
	</div>
	</div>
	</div>
	""",
	unsafe_allow_html=True,
	)


	def render_empty_state() -> None:
	st.markdown(
	"""
	<div class="nl-empty">
	<h3>Run a topic analysis</h3>
	<p>
	Search a public issue, company, policy, or event to compare retrieved articles by source,
	model label, and confidence. Results will appear as a dashboard with source-level evidence.
	</p>
	</div>
	""",
	unsafe_allow_html=True,
	)


	inject_styles()
	inject_article_card_styles()
	client = DirectPipelineClient()

	if "analysis" not in st.session_state:
	st.session_state.analysis = None
	if "last_ingest" not in st.session_state:
	st.session_state.last_ingest = None

	with st.sidebar:
	st.title("NewsLens")
	st.caption("News bias analysis dashboard")

	topic = st.text_input("Topic", value="climate change", max_chars=120)
	top_k = st.slider("Articles to retrieve", min_value=1, max_value=20, value=10)
	page_size = st.slider("Articles to ingest", min_value=5, max_value=50, value=15, step=5)

	with st.expander("Advanced", expanded=False):
	debug = st.checkbox("Show model internals", value=False)

	ingest = st.button("Ingest latest articles")
	analyze = st.button("Analyze topic", type="primary")

	if st.session_state.last_ingest:
	st.success(
	f"Stored {st.session_state.last_ingest['articles_stored']} "
	f"article(s) for {st.session_state.last_ingest['topic']}."
	)

	st.divider()
	st.caption("Suggested searches")
	sample_topics = ["climate change", "electric vehicles", "AI regulation", "public health"]
	selected_sample = st.selectbox(
	"Sample topics",
	["Use typed topic"] + sample_topics,
	label_visibility="collapsed",
	)

	if selected_sample != "Use typed topic":
	topic = selected_sample

	st.markdown(
	"""
	<div class="nl-topbar">
	<div class="nl-kicker">Media Intelligence</div>
	<h1 class="nl-title">NewsLens Bias Analyzer</h1>
	<div class="nl-subtitle">
	Compare how news sources frame a topic using retrieval, source metadata, and a text-bias classifier.
	</div>
	</div>
	""",
	unsafe_allow_html=True,
	)

	if analyze:
	if not topic.strip():
	st.error("Topic cannot be empty.")
	st.stop()

	with st.spinner("Analyzing coverage..."):
	try:
	st.session_state.analysis = client.analyze(topic.strip(), top_k)
	except Exception as exc:
	st.error(str(exc))
	st.stop()

	if ingest:
	if not topic.strip():
	st.error("Topic cannot be empty.")
	st.stop()

	with st.spinner("Fetching and indexing articles..."):
	try:
	st.session_state.last_ingest = client.ingest(topic.strip(), page_size)
	st.session_state.analysis = client.analyze(topic.strip(), top_k)
	except Exception as exc:
	st.error(str(exc))
	st.stop()

	data = st.session_state.analysis

	if data is None:
	render_empty_state()
	st.stop()

	summary = data.get("summary", {})
	results = data.get("results", [])
	total, biased, bias_ratio = summarize_bias(summary)
	neutral = max(total - biased, 0)
	source_count = len(summary)

	metric_cols = st.columns(4)
	metric_cols[0].metric("Articles", total)
	metric_cols[1].metric("Sources", source_count)
	metric_cols[2].metric("Biased", biased)
	metric_cols[3].metric("Not biased", neutral)

	st.markdown(
	f"""<div class="nl-insight">{insight_copy(bias_ratio)}</div>""",
	unsafe_allow_html=True,
	)

	tab_overview, tab_articles, tab_model = st.tabs(["Overview", "Articles", "Model"])

	with tab_overview:
	st.markdown('<div class="nl-section-heading">Bias Distribution by Source</div>', unsafe_allow_html=True)
	chart = build_bias_distribution_chart(summary)
	if chart:
	st.plotly_chart(chart, use_container_width=True, config={"displayModeBar": False})
	else:
	st.warning("No chart data available.")

	st.markdown('<div class="nl-section-heading">Bias by Political Lean</div>', unsafe_allow_html=True)
	st.caption("Are left-leaning or right-leaning sources more biased on this topic?")
	lean_chart = build_lean_bias_chart(results)
	if lean_chart:
	st.plotly_chart(lean_chart, use_container_width=True, config={"displayModeBar": False})
	else:
	st.warning("Not enough source lean data.")

	with tab_articles:
	st.markdown('<div class="nl-section-heading">Evidence Articles</div>', unsafe_allow_html=True)

	if not results:
	st.warning("No articles found.")
	else:
	labels = sorted({article.get("text_label", "Unknown") for article in results})
	leans = sorted({article.get("source_bias", "Unknown") for article in results})

	filter_cols = st.columns([1, 1, 1])
	selected_label = filter_cols[0].selectbox("Classification", ["All"] + labels)
	selected_lean = filter_cols[1].selectbox("Source lean", ["All"] + leans)
	sort_by = filter_cols[2].selectbox(
	"Sort by",
	["Confidence", "Similarity", "Source"],
	)

	filtered_results = results
	if selected_label != "All":
	filtered_results = [
	article for article in filtered_results
	if article.get("text_label", "Unknown") == selected_label
	]
	if selected_lean != "All":
	filtered_results = [
	article for article in filtered_results
	if article.get("source_bias", "Unknown") == selected_lean
	]

	if sort_by == "Confidence":
	filtered_results = sorted(
	filtered_results,
	key=lambda article: article.get("confidence", 0),
	reverse=True,
	)
	elif sort_by == "Similarity":
	filtered_results = sorted(
	filtered_results,
	key=lambda article: article.get("similarity_score", 0),
	reverse=True,
	)
	else:
	filtered_results = sorted(
	filtered_results,
	key=lambda article: article.get("source", "Unknown source"),
	)

	st.caption(f"Showing {len(filtered_results)} of {len(results)} retrieved articles.")

	if not filtered_results:
	st.warning("No articles match the selected filters.")
	else:
	grouped = defaultdict(list)
	for article in filtered_results:
	grouped[article.get("source", "Unknown source")].append(article)

	for source, articles in grouped.items():
	source_bias = articles[0].get("source_bias", "Unknown")
	st.markdown(
	f"""
	<div class="nl-source-heading">
	<h3>{escape(str(source))}</h3>
	<div class="nl-source-meta">{escape(str(source_bias))} source bias \| {len(articles)} article(s)</div>
	</div>
	""",
	unsafe_allow_html=True,
	)
	for article in articles:
	render_article_card(article, debug=debug)

	with tab_model:
	render_model_panel()
	st.markdown('<div class="nl-section-heading">Training Run</div>', unsafe_allow_html=True)
	st.write(
	"RoBERTa was fine-tuned for binary text-bias classification with LoRA. "
	"The best supplied run finished at 85.44% evaluation accuracy and 85.46% weighted F1."
	)
	st.dataframe(
	[
	{"Epoch": 1, "Eval loss": 0.3576, "Accuracy": 0.8432, "Weighted F1": 0.8434},
	{"Epoch": 2, "Eval loss": 0.3656, "Accuracy": 0.8512, "Weighted F1": 0.8512},
	{"Epoch": 3, "Eval loss": 0.3933, "Accuracy": 0.8544, "Weighted F1": 0.8546},
	],
	hide_index=True,
	use_container_width=True,
	)
	st.info(
	"Use these labels as decision support, not ground truth. Bias classification is sensitive "
	"to dataset definitions, article excerpts, and source coverage."
	)