Spaces:

Jitender20
/

newslens

Sleeping

File size: 16,625 Bytes

208266a

from collections import defaultdict
from html import escape

import streamlit as st

from src.ui.components.article_card import inject_article_card_styles, render_article_card
from src.ui.components.charts import build_bias_distribution_chart, build_lean_bias_chart
from src.ui.services.api_client import NewsLensClient
from src.ui.services.api_client import DirectPipelineClient

MODEL_EVAL = {
    "eval_accuracy": 0.8544,
    "eval_f1_weighted": 0.8546,
    "eval_loss": 0.3933,
    "train_loss": 0.3888,
    "epochs": 3,
}


st.set_page_config(
    page_title="NewsLens",
    layout="wide",
    initial_sidebar_state="expanded",
)


def inject_styles() -> None:
    st.markdown(
        """
        <style>
            :root {
                --nl-ink: #15202b;
                --nl-muted: #64748b;
                --nl-line: #d8dee9;
                --nl-panel: #ffffff;
                --nl-soft: #f6f8fb;
                --nl-blue: #2457c5;
                --nl-teal: #087f8c;
                --nl-red: #c24138;
                --nl-green: #247857;
            }

            .block-container {
                padding-top: 1.4rem;
                padding-bottom: 2rem;
                max-width: 1240px;
            }

            [data-testid="stSidebar"] {
                background: #f7f9fc;
                border-right: 1px solid var(--nl-line);
            }

            [data-testid="stSidebar"] h1,
            [data-testid="stSidebar"] h2,
            [data-testid="stSidebar"] h3 {
                color: var(--nl-ink);
            }

            h1, h2, h3 {
                letter-spacing: 0;
            }

            .nl-topbar {
                border-bottom: 1px solid var(--nl-line);
                padding: 0 0 1rem 0;
                margin-bottom: 1.2rem;
            }

            .nl-kicker {
                color: var(--nl-teal);
                font-size: 0.78rem;
                font-weight: 800;
                letter-spacing: 0.08em;
                text-transform: uppercase;
                margin-bottom: 0.25rem;
            }

            .nl-title {
                color: var(--nl-ink);
                font-size: 2.25rem;
                font-weight: 800;
                line-height: 1.1;
                margin: 0;
            }

            .nl-subtitle {
                color: var(--nl-muted);
                max-width: 780px;
                margin-top: 0.55rem;
                font-size: 1rem;
                line-height: 1.55;
            }

            .nl-empty {
                background: linear-gradient(135deg, #f7f9fc 0%, #eef6f2 100%);
                border: 1px solid var(--nl-line);
                border-radius: 8px;
                padding: 2.2rem;
                margin-top: 1rem;
            }

            .nl-empty h3 {
                color: var(--nl-ink);
                margin: 0 0 0.5rem 0;
            }

            .nl-empty p {
                color: var(--nl-muted);
                margin: 0;
                line-height: 1.6;
            }

            .nl-section-heading {
                color: var(--nl-ink);
                font-size: 1.05rem;
                font-weight: 800;
                margin: 1.1rem 0 0.45rem 0;
            }

            .nl-source-heading {
                border-top: 1px solid var(--nl-line);
                color: var(--nl-ink);
                display: flex;
                justify-content: space-between;
                align-items: center;
                gap: 1rem;
                padding-top: 1rem;
                margin: 1.1rem 0 0.5rem 0;
            }

            .nl-source-heading h3 {
                font-size: 1.05rem;
                margin: 0;
            }

            .nl-source-meta {
                color: var(--nl-muted);
                font-size: 0.85rem;
                white-space: nowrap;
            }

            .nl-insight {
                border-left: 4px solid var(--nl-teal);
                background: #f5fbfa;
                padding: 0.9rem 1rem;
                color: var(--nl-ink);
                margin: 0.25rem 0 0.9rem 0;
            }

            .nl-insight strong {
                color: var(--nl-teal);
            }

            .nl-model-panel {
                background: #f7f9fc;
                border: 1px solid var(--nl-line);
                border-radius: 8px;
                padding: 1rem;
                margin-top: 0.8rem;
            }

            .nl-model-panel h3 {
                color: var(--nl-ink);
                font-size: 1rem;
                margin: 0 0 0.6rem 0;
            }

            .nl-model-grid {
                display: grid;
                gap: 0.65rem;
                grid-template-columns: repeat(4, minmax(0, 1fr));
            }

            .nl-model-stat {
                background: #ffffff;
                border: 1px solid var(--nl-line);
                border-radius: 8px;
                padding: 0.75rem;
            }

            .nl-model-stat span {
                color: var(--nl-muted);
                display: block;
                font-size: 0.72rem;
                font-weight: 800;
                letter-spacing: 0.04em;
                text-transform: uppercase;
            }

            .nl-model-stat strong {
                color: var(--nl-ink);
                display: block;
                font-size: 1.25rem;
                margin-top: 0.2rem;
            }

            div[data-testid="stMetric"] {
                background: var(--nl-panel);
                border: 1px solid var(--nl-line);
                border-radius: 8px;
                padding: 0.85rem 1rem;
            }

            div[data-testid="stMetric"] label {
                color: var(--nl-muted);
            }

            .stButton > button {
                background: var(--nl-blue);
                border: 1px solid var(--nl-blue);
                color: #ffffff;
                font-weight: 700;
                min-height: 2.6rem;
                width: 100%;
            }

            .stButton > button:hover {
                background: #1f4dac;
                border-color: #1f4dac;
                color: #ffffff;
            }

            @media (max-width: 760px) {
                .nl-title {
                    font-size: 1.75rem;
                }

                .nl-empty {
                    padding: 1.4rem;
                }

                .nl-source-heading {
                    align-items: flex-start;
                    flex-direction: column;
                    gap: 0.2rem;
                }

                .nl-model-grid {
                    grid-template-columns: repeat(2, minmax(0, 1fr));
                }
            }
        </style>
        """,
        unsafe_allow_html=True,
    )


def summarize_bias(summary: dict) -> tuple[int, int, float]:
    total = sum(source.get("total", 0) for source in summary.values())
    biased = sum(source.get("Biased", 0) for source in summary.values())
    ratio = biased / total if total else 0
    return total, biased, ratio


def insight_copy(ratio: float) -> str:
    percent = int(round(ratio * 100))
    if ratio >= 0.6:
        return f"<strong>{percent}% biased coverage.</strong> The retrieved articles lean noticeably toward biased framing."
    if ratio <= 0.4:
        return f"<strong>{percent}% biased coverage.</strong> The article set is mostly neutral by the current model."
    return f"<strong>{percent}% biased coverage.</strong> The result set is mixed and worth comparing source by source."


def render_model_panel() -> None:
    st.markdown(
        f"""
        <div class="nl-model-panel">
            <h3>Model Snapshot</h3>
            <div class="nl-model-grid">
                <div class="nl-model-stat">
                    <span>Eval Accuracy</span>
                    <strong>{MODEL_EVAL["eval_accuracy"]:.1%}</strong>
                </div>
                <div class="nl-model-stat">
                    <span>Weighted F1</span>
                    <strong>{MODEL_EVAL["eval_f1_weighted"]:.1%}</strong>
                </div>
                <div class="nl-model-stat">
                    <span>Eval Loss</span>
                    <strong>{MODEL_EVAL["eval_loss"]:.3f}</strong>
                </div>
                <div class="nl-model-stat">
                    <span>Epochs</span>
                    <strong>{MODEL_EVAL["epochs"]}</strong>
                </div>
            </div>
        </div>
        """,
        unsafe_allow_html=True,
    )


def render_empty_state() -> None:
    st.markdown(
        """
        <div class="nl-empty">
            <h3>Run a topic analysis</h3>
            <p>
                Search a public issue, company, policy, or event to compare retrieved articles by source,
                model label, and confidence. Results will appear as a dashboard with source-level evidence.
            </p>
        </div>
        """,
        unsafe_allow_html=True,
    )


inject_styles()
inject_article_card_styles()
client = DirectPipelineClient()

if "analysis" not in st.session_state:
    st.session_state.analysis = None
if "last_ingest" not in st.session_state:
    st.session_state.last_ingest = None

with st.sidebar:
    st.title("NewsLens")
    st.caption("News bias analysis dashboard")

    topic = st.text_input("Topic", value="climate change", max_chars=120)
    top_k = st.slider("Articles to retrieve", min_value=1, max_value=20, value=10)
    page_size = st.slider("Articles to ingest", min_value=5, max_value=50, value=15, step=5)

    with st.expander("Advanced", expanded=False):
        debug = st.checkbox("Show model internals", value=False)

    ingest = st.button("Ingest latest articles")
    analyze = st.button("Analyze topic", type="primary")

    if st.session_state.last_ingest:
        st.success(
            f"Stored {st.session_state.last_ingest['articles_stored']} "
            f"article(s) for {st.session_state.last_ingest['topic']}."
        )

    st.divider()
    st.caption("Suggested searches")
    sample_topics = ["climate change", "electric vehicles", "AI regulation", "public health"]
    selected_sample = st.selectbox(
        "Sample topics",
        ["Use typed topic"] + sample_topics,
        label_visibility="collapsed",
    )

    if selected_sample != "Use typed topic":
        topic = selected_sample

st.markdown(
    """
    <div class="nl-topbar">
        <div class="nl-kicker">Media Intelligence</div>
        <h1 class="nl-title">NewsLens Bias Analyzer</h1>
        <div class="nl-subtitle">
            Compare how news sources frame a topic using retrieval, source metadata, and a text-bias classifier.
        </div>
    </div>
    """,
    unsafe_allow_html=True,
)

if analyze:
    if not topic.strip():
        st.error("Topic cannot be empty.")
        st.stop()

    with st.spinner("Analyzing coverage..."):
        try:
            st.session_state.analysis = client.analyze(topic.strip(), top_k)
        except Exception as exc:
            st.error(str(exc))
            st.stop()

if ingest:
    if not topic.strip():
        st.error("Topic cannot be empty.")
        st.stop()

    with st.spinner("Fetching and indexing articles..."):
        try:
            st.session_state.last_ingest = client.ingest(topic.strip(), page_size)
            st.session_state.analysis = client.analyze(topic.strip(), top_k)
        except Exception as exc:
            st.error(str(exc))
            st.stop()

data = st.session_state.analysis

if data is None:
    render_empty_state()
    st.stop()

summary = data.get("summary", {})
results = data.get("results", [])
total, biased, bias_ratio = summarize_bias(summary)
neutral = max(total - biased, 0)
source_count = len(summary)

metric_cols = st.columns(4)
metric_cols[0].metric("Articles", total)
metric_cols[1].metric("Sources", source_count)
metric_cols[2].metric("Biased", biased)
metric_cols[3].metric("Not biased", neutral)

st.markdown(
    f"""<div class="nl-insight">{insight_copy(bias_ratio)}</div>""",
    unsafe_allow_html=True,
)

tab_overview, tab_articles, tab_model = st.tabs(["Overview", "Articles", "Model"])

with tab_overview:
    st.markdown('<div class="nl-section-heading">Bias Distribution by Source</div>', unsafe_allow_html=True)
    chart = build_bias_distribution_chart(summary)
    if chart:
        st.plotly_chart(chart, use_container_width=True, config={"displayModeBar": False})
    else:
        st.warning("No chart data available.")

    st.markdown('<div class="nl-section-heading">Bias by Political Lean</div>', unsafe_allow_html=True)
    st.caption("Are left-leaning or right-leaning sources more biased on this topic?")
    lean_chart = build_lean_bias_chart(results)
    if lean_chart:
        st.plotly_chart(lean_chart, use_container_width=True, config={"displayModeBar": False})
    else:
        st.warning("Not enough source lean data.")

with tab_articles:
    st.markdown('<div class="nl-section-heading">Evidence Articles</div>', unsafe_allow_html=True)

    if not results:
        st.warning("No articles found.")
    else:
        labels = sorted({article.get("text_label", "Unknown") for article in results})
        leans = sorted({article.get("source_bias", "Unknown") for article in results})

        filter_cols = st.columns([1, 1, 1])
        selected_label = filter_cols[0].selectbox("Classification", ["All"] + labels)
        selected_lean = filter_cols[1].selectbox("Source lean", ["All"] + leans)
        sort_by = filter_cols[2].selectbox(
            "Sort by",
            ["Confidence", "Similarity", "Source"],
        )

        filtered_results = results
        if selected_label != "All":
            filtered_results = [
                article for article in filtered_results
                if article.get("text_label", "Unknown") == selected_label
            ]
        if selected_lean != "All":
            filtered_results = [
                article for article in filtered_results
                if article.get("source_bias", "Unknown") == selected_lean
            ]

        if sort_by == "Confidence":
            filtered_results = sorted(
                filtered_results,
                key=lambda article: article.get("confidence", 0),
                reverse=True,
            )
        elif sort_by == "Similarity":
            filtered_results = sorted(
                filtered_results,
                key=lambda article: article.get("similarity_score", 0),
                reverse=True,
            )
        else:
            filtered_results = sorted(
                filtered_results,
                key=lambda article: article.get("source", "Unknown source"),
            )

        st.caption(f"Showing {len(filtered_results)} of {len(results)} retrieved articles.")

        if not filtered_results:
            st.warning("No articles match the selected filters.")
        else:
            grouped = defaultdict(list)
            for article in filtered_results:
                grouped[article.get("source", "Unknown source")].append(article)

            for source, articles in grouped.items():
                source_bias = articles[0].get("source_bias", "Unknown")
                st.markdown(
                    f"""
                    <div class="nl-source-heading">
                        <h3>{escape(str(source))}</h3>
                        <div class="nl-source-meta">{escape(str(source_bias))} source bias | {len(articles)} article(s)</div>
                    </div>
                    """,
                    unsafe_allow_html=True,
                )
                for article in articles:
                    render_article_card(article, debug=debug)

with tab_model:
    render_model_panel()
    st.markdown('<div class="nl-section-heading">Training Run</div>', unsafe_allow_html=True)
    st.write(
        "RoBERTa was fine-tuned for binary text-bias classification with LoRA. "
        "The best supplied run finished at 85.44% evaluation accuracy and 85.46% weighted F1."
    )
    st.dataframe(
        [
            {"Epoch": 1, "Eval loss": 0.3576, "Accuracy": 0.8432, "Weighted F1": 0.8434},
            {"Epoch": 2, "Eval loss": 0.3656, "Accuracy": 0.8512, "Weighted F1": 0.8512},
            {"Epoch": 3, "Eval loss": 0.3933, "Accuracy": 0.8544, "Weighted F1": 0.8546},
        ],
        hide_index=True,
        use_container_width=True,
    )
    st.info(
        "Use these labels as decision support, not ground truth. Bias classification is sensitive "
        "to dataset definitions, article excerpts, and source coverage."
    )