Spaces:
Sleeping
Sleeping
| from collections import defaultdict | |
| from html import escape | |
| import streamlit as st | |
| from src.ui.components.article_card import inject_article_card_styles, render_article_card | |
| from src.ui.components.charts import build_bias_distribution_chart, build_lean_bias_chart | |
| from src.ui.services.api_client import NewsLensClient | |
| from src.ui.services.api_client import DirectPipelineClient | |
| MODEL_EVAL = { | |
| "eval_accuracy": 0.8544, | |
| "eval_f1_weighted": 0.8546, | |
| "eval_loss": 0.3933, | |
| "train_loss": 0.3888, | |
| "epochs": 3, | |
| } | |
| st.set_page_config( | |
| page_title="NewsLens", | |
| layout="wide", | |
| initial_sidebar_state="expanded", | |
| ) | |
| def inject_styles() -> None: | |
| st.markdown( | |
| """ | |
| <style> | |
| :root { | |
| --nl-ink: #15202b; | |
| --nl-muted: #64748b; | |
| --nl-line: #d8dee9; | |
| --nl-panel: #ffffff; | |
| --nl-soft: #f6f8fb; | |
| --nl-blue: #2457c5; | |
| --nl-teal: #087f8c; | |
| --nl-red: #c24138; | |
| --nl-green: #247857; | |
| } | |
| .block-container { | |
| padding-top: 1.4rem; | |
| padding-bottom: 2rem; | |
| max-width: 1240px; | |
| } | |
| [data-testid="stSidebar"] { | |
| background: #f7f9fc; | |
| border-right: 1px solid var(--nl-line); | |
| } | |
| [data-testid="stSidebar"] h1, | |
| [data-testid="stSidebar"] h2, | |
| [data-testid="stSidebar"] h3 { | |
| color: var(--nl-ink); | |
| } | |
| h1, h2, h3 { | |
| letter-spacing: 0; | |
| } | |
| .nl-topbar { | |
| border-bottom: 1px solid var(--nl-line); | |
| padding: 0 0 1rem 0; | |
| margin-bottom: 1.2rem; | |
| } | |
| .nl-kicker { | |
| color: var(--nl-teal); | |
| font-size: 0.78rem; | |
| font-weight: 800; | |
| letter-spacing: 0.08em; | |
| text-transform: uppercase; | |
| margin-bottom: 0.25rem; | |
| } | |
| .nl-title { | |
| color: var(--nl-ink); | |
| font-size: 2.25rem; | |
| font-weight: 800; | |
| line-height: 1.1; | |
| margin: 0; | |
| } | |
| .nl-subtitle { | |
| color: var(--nl-muted); | |
| max-width: 780px; | |
| margin-top: 0.55rem; | |
| font-size: 1rem; | |
| line-height: 1.55; | |
| } | |
| .nl-empty { | |
| background: linear-gradient(135deg, #f7f9fc 0%, #eef6f2 100%); | |
| border: 1px solid var(--nl-line); | |
| border-radius: 8px; | |
| padding: 2.2rem; | |
| margin-top: 1rem; | |
| } | |
| .nl-empty h3 { | |
| color: var(--nl-ink); | |
| margin: 0 0 0.5rem 0; | |
| } | |
| .nl-empty p { | |
| color: var(--nl-muted); | |
| margin: 0; | |
| line-height: 1.6; | |
| } | |
| .nl-section-heading { | |
| color: var(--nl-ink); | |
| font-size: 1.05rem; | |
| font-weight: 800; | |
| margin: 1.1rem 0 0.45rem 0; | |
| } | |
| .nl-source-heading { | |
| border-top: 1px solid var(--nl-line); | |
| color: var(--nl-ink); | |
| display: flex; | |
| justify-content: space-between; | |
| align-items: center; | |
| gap: 1rem; | |
| padding-top: 1rem; | |
| margin: 1.1rem 0 0.5rem 0; | |
| } | |
| .nl-source-heading h3 { | |
| font-size: 1.05rem; | |
| margin: 0; | |
| } | |
| .nl-source-meta { | |
| color: var(--nl-muted); | |
| font-size: 0.85rem; | |
| white-space: nowrap; | |
| } | |
| .nl-insight { | |
| border-left: 4px solid var(--nl-teal); | |
| background: #f5fbfa; | |
| padding: 0.9rem 1rem; | |
| color: var(--nl-ink); | |
| margin: 0.25rem 0 0.9rem 0; | |
| } | |
| .nl-insight strong { | |
| color: var(--nl-teal); | |
| } | |
| .nl-model-panel { | |
| background: #f7f9fc; | |
| border: 1px solid var(--nl-line); | |
| border-radius: 8px; | |
| padding: 1rem; | |
| margin-top: 0.8rem; | |
| } | |
| .nl-model-panel h3 { | |
| color: var(--nl-ink); | |
| font-size: 1rem; | |
| margin: 0 0 0.6rem 0; | |
| } | |
| .nl-model-grid { | |
| display: grid; | |
| gap: 0.65rem; | |
| grid-template-columns: repeat(4, minmax(0, 1fr)); | |
| } | |
| .nl-model-stat { | |
| background: #ffffff; | |
| border: 1px solid var(--nl-line); | |
| border-radius: 8px; | |
| padding: 0.75rem; | |
| } | |
| .nl-model-stat span { | |
| color: var(--nl-muted); | |
| display: block; | |
| font-size: 0.72rem; | |
| font-weight: 800; | |
| letter-spacing: 0.04em; | |
| text-transform: uppercase; | |
| } | |
| .nl-model-stat strong { | |
| color: var(--nl-ink); | |
| display: block; | |
| font-size: 1.25rem; | |
| margin-top: 0.2rem; | |
| } | |
| div[data-testid="stMetric"] { | |
| background: var(--nl-panel); | |
| border: 1px solid var(--nl-line); | |
| border-radius: 8px; | |
| padding: 0.85rem 1rem; | |
| } | |
| div[data-testid="stMetric"] label { | |
| color: var(--nl-muted); | |
| } | |
| .stButton > button { | |
| background: var(--nl-blue); | |
| border: 1px solid var(--nl-blue); | |
| color: #ffffff; | |
| font-weight: 700; | |
| min-height: 2.6rem; | |
| width: 100%; | |
| } | |
| .stButton > button:hover { | |
| background: #1f4dac; | |
| border-color: #1f4dac; | |
| color: #ffffff; | |
| } | |
| @media (max-width: 760px) { | |
| .nl-title { | |
| font-size: 1.75rem; | |
| } | |
| .nl-empty { | |
| padding: 1.4rem; | |
| } | |
| .nl-source-heading { | |
| align-items: flex-start; | |
| flex-direction: column; | |
| gap: 0.2rem; | |
| } | |
| .nl-model-grid { | |
| grid-template-columns: repeat(2, minmax(0, 1fr)); | |
| } | |
| } | |
| </style> | |
| """, | |
| unsafe_allow_html=True, | |
| ) | |
| def summarize_bias(summary: dict) -> tuple[int, int, float]: | |
| total = sum(source.get("total", 0) for source in summary.values()) | |
| biased = sum(source.get("Biased", 0) for source in summary.values()) | |
| ratio = biased / total if total else 0 | |
| return total, biased, ratio | |
| def insight_copy(ratio: float) -> str: | |
| percent = int(round(ratio * 100)) | |
| if ratio >= 0.6: | |
| return f"<strong>{percent}% biased coverage.</strong> The retrieved articles lean noticeably toward biased framing." | |
| if ratio <= 0.4: | |
| return f"<strong>{percent}% biased coverage.</strong> The article set is mostly neutral by the current model." | |
| return f"<strong>{percent}% biased coverage.</strong> The result set is mixed and worth comparing source by source." | |
| def render_model_panel() -> None: | |
| st.markdown( | |
| f""" | |
| <div class="nl-model-panel"> | |
| <h3>Model Snapshot</h3> | |
| <div class="nl-model-grid"> | |
| <div class="nl-model-stat"> | |
| <span>Eval Accuracy</span> | |
| <strong>{MODEL_EVAL["eval_accuracy"]:.1%}</strong> | |
| </div> | |
| <div class="nl-model-stat"> | |
| <span>Weighted F1</span> | |
| <strong>{MODEL_EVAL["eval_f1_weighted"]:.1%}</strong> | |
| </div> | |
| <div class="nl-model-stat"> | |
| <span>Eval Loss</span> | |
| <strong>{MODEL_EVAL["eval_loss"]:.3f}</strong> | |
| </div> | |
| <div class="nl-model-stat"> | |
| <span>Epochs</span> | |
| <strong>{MODEL_EVAL["epochs"]}</strong> | |
| </div> | |
| </div> | |
| </div> | |
| """, | |
| unsafe_allow_html=True, | |
| ) | |
| def render_empty_state() -> None: | |
| st.markdown( | |
| """ | |
| <div class="nl-empty"> | |
| <h3>Run a topic analysis</h3> | |
| <p> | |
| Search a public issue, company, policy, or event to compare retrieved articles by source, | |
| model label, and confidence. Results will appear as a dashboard with source-level evidence. | |
| </p> | |
| </div> | |
| """, | |
| unsafe_allow_html=True, | |
| ) | |
| inject_styles() | |
| inject_article_card_styles() | |
| client = DirectPipelineClient() | |
| if "analysis" not in st.session_state: | |
| st.session_state.analysis = None | |
| if "last_ingest" not in st.session_state: | |
| st.session_state.last_ingest = None | |
| with st.sidebar: | |
| st.title("NewsLens") | |
| st.caption("News bias analysis dashboard") | |
| topic = st.text_input("Topic", value="climate change", max_chars=120) | |
| top_k = st.slider("Articles to retrieve", min_value=1, max_value=20, value=10) | |
| page_size = st.slider("Articles to ingest", min_value=5, max_value=50, value=15, step=5) | |
| with st.expander("Advanced", expanded=False): | |
| debug = st.checkbox("Show model internals", value=False) | |
| ingest = st.button("Ingest latest articles") | |
| analyze = st.button("Analyze topic", type="primary") | |
| if st.session_state.last_ingest: | |
| st.success( | |
| f"Stored {st.session_state.last_ingest['articles_stored']} " | |
| f"article(s) for {st.session_state.last_ingest['topic']}." | |
| ) | |
| st.divider() | |
| st.caption("Suggested searches") | |
| sample_topics = ["climate change", "electric vehicles", "AI regulation", "public health"] | |
| selected_sample = st.selectbox( | |
| "Sample topics", | |
| ["Use typed topic"] + sample_topics, | |
| label_visibility="collapsed", | |
| ) | |
| if selected_sample != "Use typed topic": | |
| topic = selected_sample | |
| st.markdown( | |
| """ | |
| <div class="nl-topbar"> | |
| <div class="nl-kicker">Media Intelligence</div> | |
| <h1 class="nl-title">NewsLens Bias Analyzer</h1> | |
| <div class="nl-subtitle"> | |
| Compare how news sources frame a topic using retrieval, source metadata, and a text-bias classifier. | |
| </div> | |
| </div> | |
| """, | |
| unsafe_allow_html=True, | |
| ) | |
| if analyze: | |
| if not topic.strip(): | |
| st.error("Topic cannot be empty.") | |
| st.stop() | |
| with st.spinner("Analyzing coverage..."): | |
| try: | |
| st.session_state.analysis = client.analyze(topic.strip(), top_k) | |
| except Exception as exc: | |
| st.error(str(exc)) | |
| st.stop() | |
| if ingest: | |
| if not topic.strip(): | |
| st.error("Topic cannot be empty.") | |
| st.stop() | |
| with st.spinner("Fetching and indexing articles..."): | |
| try: | |
| st.session_state.last_ingest = client.ingest(topic.strip(), page_size) | |
| st.session_state.analysis = client.analyze(topic.strip(), top_k) | |
| except Exception as exc: | |
| st.error(str(exc)) | |
| st.stop() | |
| data = st.session_state.analysis | |
| if data is None: | |
| render_empty_state() | |
| st.stop() | |
| summary = data.get("summary", {}) | |
| results = data.get("results", []) | |
| total, biased, bias_ratio = summarize_bias(summary) | |
| neutral = max(total - biased, 0) | |
| source_count = len(summary) | |
| metric_cols = st.columns(4) | |
| metric_cols[0].metric("Articles", total) | |
| metric_cols[1].metric("Sources", source_count) | |
| metric_cols[2].metric("Biased", biased) | |
| metric_cols[3].metric("Not biased", neutral) | |
| st.markdown( | |
| f"""<div class="nl-insight">{insight_copy(bias_ratio)}</div>""", | |
| unsafe_allow_html=True, | |
| ) | |
| tab_overview, tab_articles, tab_model = st.tabs(["Overview", "Articles", "Model"]) | |
| with tab_overview: | |
| st.markdown('<div class="nl-section-heading">Bias Distribution by Source</div>', unsafe_allow_html=True) | |
| chart = build_bias_distribution_chart(summary) | |
| if chart: | |
| st.plotly_chart(chart, use_container_width=True, config={"displayModeBar": False}) | |
| else: | |
| st.warning("No chart data available.") | |
| st.markdown('<div class="nl-section-heading">Bias by Political Lean</div>', unsafe_allow_html=True) | |
| st.caption("Are left-leaning or right-leaning sources more biased on this topic?") | |
| lean_chart = build_lean_bias_chart(results) | |
| if lean_chart: | |
| st.plotly_chart(lean_chart, use_container_width=True, config={"displayModeBar": False}) | |
| else: | |
| st.warning("Not enough source lean data.") | |
| with tab_articles: | |
| st.markdown('<div class="nl-section-heading">Evidence Articles</div>', unsafe_allow_html=True) | |
| if not results: | |
| st.warning("No articles found.") | |
| else: | |
| labels = sorted({article.get("text_label", "Unknown") for article in results}) | |
| leans = sorted({article.get("source_bias", "Unknown") for article in results}) | |
| filter_cols = st.columns([1, 1, 1]) | |
| selected_label = filter_cols[0].selectbox("Classification", ["All"] + labels) | |
| selected_lean = filter_cols[1].selectbox("Source lean", ["All"] + leans) | |
| sort_by = filter_cols[2].selectbox( | |
| "Sort by", | |
| ["Confidence", "Similarity", "Source"], | |
| ) | |
| filtered_results = results | |
| if selected_label != "All": | |
| filtered_results = [ | |
| article for article in filtered_results | |
| if article.get("text_label", "Unknown") == selected_label | |
| ] | |
| if selected_lean != "All": | |
| filtered_results = [ | |
| article for article in filtered_results | |
| if article.get("source_bias", "Unknown") == selected_lean | |
| ] | |
| if sort_by == "Confidence": | |
| filtered_results = sorted( | |
| filtered_results, | |
| key=lambda article: article.get("confidence", 0), | |
| reverse=True, | |
| ) | |
| elif sort_by == "Similarity": | |
| filtered_results = sorted( | |
| filtered_results, | |
| key=lambda article: article.get("similarity_score", 0), | |
| reverse=True, | |
| ) | |
| else: | |
| filtered_results = sorted( | |
| filtered_results, | |
| key=lambda article: article.get("source", "Unknown source"), | |
| ) | |
| st.caption(f"Showing {len(filtered_results)} of {len(results)} retrieved articles.") | |
| if not filtered_results: | |
| st.warning("No articles match the selected filters.") | |
| else: | |
| grouped = defaultdict(list) | |
| for article in filtered_results: | |
| grouped[article.get("source", "Unknown source")].append(article) | |
| for source, articles in grouped.items(): | |
| source_bias = articles[0].get("source_bias", "Unknown") | |
| st.markdown( | |
| f""" | |
| <div class="nl-source-heading"> | |
| <h3>{escape(str(source))}</h3> | |
| <div class="nl-source-meta">{escape(str(source_bias))} source bias | {len(articles)} article(s)</div> | |
| </div> | |
| """, | |
| unsafe_allow_html=True, | |
| ) | |
| for article in articles: | |
| render_article_card(article, debug=debug) | |
| with tab_model: | |
| render_model_panel() | |
| st.markdown('<div class="nl-section-heading">Training Run</div>', unsafe_allow_html=True) | |
| st.write( | |
| "RoBERTa was fine-tuned for binary text-bias classification with LoRA. " | |
| "The best supplied run finished at 85.44% evaluation accuracy and 85.46% weighted F1." | |
| ) | |
| st.dataframe( | |
| [ | |
| {"Epoch": 1, "Eval loss": 0.3576, "Accuracy": 0.8432, "Weighted F1": 0.8434}, | |
| {"Epoch": 2, "Eval loss": 0.3656, "Accuracy": 0.8512, "Weighted F1": 0.8512}, | |
| {"Epoch": 3, "Eval loss": 0.3933, "Accuracy": 0.8544, "Weighted F1": 0.8546}, | |
| ], | |
| hide_index=True, | |
| use_container_width=True, | |
| ) | |
| st.info( | |
| "Use these labels as decision support, not ground truth. Bias classification is sensitive " | |
| "to dataset definitions, article excerpts, and source coverage." | |
| ) | |