from collections import defaultdict from html import escape import streamlit as st from src.ui.components.article_card import inject_article_card_styles, render_article_card from src.ui.components.charts import build_bias_distribution_chart, build_lean_bias_chart from src.ui.services.api_client import NewsLensClient from src.ui.services.api_client import DirectPipelineClient MODEL_EVAL = { "eval_accuracy": 0.8544, "eval_f1_weighted": 0.8546, "eval_loss": 0.3933, "train_loss": 0.3888, "epochs": 3, } st.set_page_config( page_title="NewsLens", layout="wide", initial_sidebar_state="expanded", ) def inject_styles() -> None: st.markdown( """ """, unsafe_allow_html=True, ) def summarize_bias(summary: dict) -> tuple[int, int, float]: total = sum(source.get("total", 0) for source in summary.values()) biased = sum(source.get("Biased", 0) for source in summary.values()) ratio = biased / total if total else 0 return total, biased, ratio def insight_copy(ratio: float) -> str: percent = int(round(ratio * 100)) if ratio >= 0.6: return f"{percent}% biased coverage. The retrieved articles lean noticeably toward biased framing." if ratio <= 0.4: return f"{percent}% biased coverage. The article set is mostly neutral by the current model." return f"{percent}% biased coverage. The result set is mixed and worth comparing source by source." def render_model_panel() -> None: st.markdown( f"""

Model Snapshot

Eval Accuracy {MODEL_EVAL["eval_accuracy"]:.1%}
Weighted F1 {MODEL_EVAL["eval_f1_weighted"]:.1%}
Eval Loss {MODEL_EVAL["eval_loss"]:.3f}
Epochs {MODEL_EVAL["epochs"]}
""", unsafe_allow_html=True, ) def render_empty_state() -> None: st.markdown( """

Run a topic analysis

Search a public issue, company, policy, or event to compare retrieved articles by source, model label, and confidence. Results will appear as a dashboard with source-level evidence.

""", unsafe_allow_html=True, ) inject_styles() inject_article_card_styles() client = DirectPipelineClient() if "analysis" not in st.session_state: st.session_state.analysis = None if "last_ingest" not in st.session_state: st.session_state.last_ingest = None with st.sidebar: st.title("NewsLens") st.caption("News bias analysis dashboard") topic = st.text_input("Topic", value="climate change", max_chars=120) top_k = st.slider("Articles to retrieve", min_value=1, max_value=20, value=10) page_size = st.slider("Articles to ingest", min_value=5, max_value=50, value=15, step=5) with st.expander("Advanced", expanded=False): debug = st.checkbox("Show model internals", value=False) ingest = st.button("Ingest latest articles") analyze = st.button("Analyze topic", type="primary") if st.session_state.last_ingest: st.success( f"Stored {st.session_state.last_ingest['articles_stored']} " f"article(s) for {st.session_state.last_ingest['topic']}." ) st.divider() st.caption("Suggested searches") sample_topics = ["climate change", "electric vehicles", "AI regulation", "public health"] selected_sample = st.selectbox( "Sample topics", ["Use typed topic"] + sample_topics, label_visibility="collapsed", ) if selected_sample != "Use typed topic": topic = selected_sample st.markdown( """
Media Intelligence

NewsLens Bias Analyzer

Compare how news sources frame a topic using retrieval, source metadata, and a text-bias classifier.
""", unsafe_allow_html=True, ) if analyze: if not topic.strip(): st.error("Topic cannot be empty.") st.stop() with st.spinner("Analyzing coverage..."): try: st.session_state.analysis = client.analyze(topic.strip(), top_k) except Exception as exc: st.error(str(exc)) st.stop() if ingest: if not topic.strip(): st.error("Topic cannot be empty.") st.stop() with st.spinner("Fetching and indexing articles..."): try: st.session_state.last_ingest = client.ingest(topic.strip(), page_size) st.session_state.analysis = client.analyze(topic.strip(), top_k) except Exception as exc: st.error(str(exc)) st.stop() data = st.session_state.analysis if data is None: render_empty_state() st.stop() summary = data.get("summary", {}) results = data.get("results", []) total, biased, bias_ratio = summarize_bias(summary) neutral = max(total - biased, 0) source_count = len(summary) metric_cols = st.columns(4) metric_cols[0].metric("Articles", total) metric_cols[1].metric("Sources", source_count) metric_cols[2].metric("Biased", biased) metric_cols[3].metric("Not biased", neutral) st.markdown( f"""
{insight_copy(bias_ratio)}
""", unsafe_allow_html=True, ) tab_overview, tab_articles, tab_model = st.tabs(["Overview", "Articles", "Model"]) with tab_overview: st.markdown('
Bias Distribution by Source
', unsafe_allow_html=True) chart = build_bias_distribution_chart(summary) if chart: st.plotly_chart(chart, use_container_width=True, config={"displayModeBar": False}) else: st.warning("No chart data available.") st.markdown('
Bias by Political Lean
', unsafe_allow_html=True) st.caption("Are left-leaning or right-leaning sources more biased on this topic?") lean_chart = build_lean_bias_chart(results) if lean_chart: st.plotly_chart(lean_chart, use_container_width=True, config={"displayModeBar": False}) else: st.warning("Not enough source lean data.") with tab_articles: st.markdown('
Evidence Articles
', unsafe_allow_html=True) if not results: st.warning("No articles found.") else: labels = sorted({article.get("text_label", "Unknown") for article in results}) leans = sorted({article.get("source_bias", "Unknown") for article in results}) filter_cols = st.columns([1, 1, 1]) selected_label = filter_cols[0].selectbox("Classification", ["All"] + labels) selected_lean = filter_cols[1].selectbox("Source lean", ["All"] + leans) sort_by = filter_cols[2].selectbox( "Sort by", ["Confidence", "Similarity", "Source"], ) filtered_results = results if selected_label != "All": filtered_results = [ article for article in filtered_results if article.get("text_label", "Unknown") == selected_label ] if selected_lean != "All": filtered_results = [ article for article in filtered_results if article.get("source_bias", "Unknown") == selected_lean ] if sort_by == "Confidence": filtered_results = sorted( filtered_results, key=lambda article: article.get("confidence", 0), reverse=True, ) elif sort_by == "Similarity": filtered_results = sorted( filtered_results, key=lambda article: article.get("similarity_score", 0), reverse=True, ) else: filtered_results = sorted( filtered_results, key=lambda article: article.get("source", "Unknown source"), ) st.caption(f"Showing {len(filtered_results)} of {len(results)} retrieved articles.") if not filtered_results: st.warning("No articles match the selected filters.") else: grouped = defaultdict(list) for article in filtered_results: grouped[article.get("source", "Unknown source")].append(article) for source, articles in grouped.items(): source_bias = articles[0].get("source_bias", "Unknown") st.markdown( f"""

{escape(str(source))}

{escape(str(source_bias))} source bias | {len(articles)} article(s)
""", unsafe_allow_html=True, ) for article in articles: render_article_card(article, debug=debug) with tab_model: render_model_panel() st.markdown('
Training Run
', unsafe_allow_html=True) st.write( "RoBERTa was fine-tuned for binary text-bias classification with LoRA. " "The best supplied run finished at 85.44% evaluation accuracy and 85.46% weighted F1." ) st.dataframe( [ {"Epoch": 1, "Eval loss": 0.3576, "Accuracy": 0.8432, "Weighted F1": 0.8434}, {"Epoch": 2, "Eval loss": 0.3656, "Accuracy": 0.8512, "Weighted F1": 0.8512}, {"Epoch": 3, "Eval loss": 0.3933, "Accuracy": 0.8544, "Weighted F1": 0.8546}, ], hide_index=True, use_container_width=True, ) st.info( "Use these labels as decision support, not ground truth. Bias classification is sensitive " "to dataset definitions, article excerpts, and source coverage." )