Spaces:

Human-AI-ETH
/

FrameVis

Sleeping

App Files Files Community

NKessler commited on 12 days ago

Commit

dce5185

verified ·

1 Parent(s): 86c9b5e

Update app.py

Browse files

Files changed (1) hide show

app.py +110 -63

app.py CHANGED Viewed

@@ -1,11 +1,14 @@
 import re
 import typing
 import plotly.graph_objects as go
 import streamlit as st
 from transformers import pipeline
-import yake
 MAX_TEXT_LENGTH = 1500
 CANDIDATE_TONES = ["alarmist", "objective", "defensive", "optimistic", "critical"]
@@ -20,6 +23,16 @@ A sweeping new climate agreement signed today is drawing fierce criticism from i
 @st.cache_resource
 def _load_nlp_models() -> typing.Dict[str, typing.Any]:
     sentiment_analyzer = pipeline(
         "sentiment-analysis",
         model="distilbert-base-uncased-finetuned-sst-2-english",
@@ -28,9 +41,8 @@ def _load_nlp_models() -> typing.Dict[str, typing.Any]:
         "zero-shot-classification",
         model="typeform/distilbert-base-uncased-mnli",
     )
-    keyword_extractor = yake.KeywordExtractor(
-        lan="en", n=1, dedupLim=0.9, top=5, features=None
-    )
     return {
         "sentiment": sentiment_analyzer,
@@ -40,27 +52,44 @@ def _load_nlp_models() -> typing.Dict[str, typing.Any]:
 def analyze_article(text: str) -> dict:
     models = _load_nlp_models()
     safe_text = text[:MAX_TEXT_LENGTH]
     sentiment_result = models["sentiment"](safe_text)[0]
-    tone_result = models["tone"](safe_text, CANDIDATE_TONES)
-    keyword_results = models["keyword"].extract_keywords(safe_text)
     is_positive = sentiment_result["label"] == "POSITIVE"
     sentiment_score = (
         sentiment_result["score"] if is_positive else -sentiment_result["score"]
     )
     tone_scores = {
         label: score
         for label, score in zip(tone_result["labels"], tone_result["scores"])
     }
     extracted_keywords = [kw[0] for kw in keyword_results]
     return {
         "sentiment_score": sentiment_score,
         "primary_tone": tone_result["labels"][0],
         "tone_scores": tone_scores,
         "keywords": extracted_keywords,
@@ -68,118 +97,136 @@ def analyze_article(text: str) -> dict:
 def _create_sentiment_gauge(score: float, title: str) -> go.Figure:
     fig = go.Figure(
         go.Indicator(
             mode="gauge+number",
             value=score,
             domain={"x": [0, 1], "y": [0, 1]},
-            title={"text": title, "font": {"size": 18}},
             gauge={
                 "axis": {"range": [-1, 1], "tickwidth": 1},
-                "bar": {"color": "black"},
                 "steps": [
-                    {"range": [-1, -0.3], "color": "lightpink"},
-                    {"range": [-0.3, 0.3], "color": "lightgray"},
-                    {"range": [0.3, 1], "color": "lightgreen"},
                 ],
             },
         )
     )
-    fig.update_layout(height=250, margin=dict(l=20, r=20, t=40, b=20))
     return fig
 def _create_tone_bar_chart(tone_scores: typing.Dict[str, float]) -> go.Figure:
     labels = list(tone_scores.keys())
     values = list(tone_scores.values())
-    fig = go.Figure(go.Bar(x=values, y=labels, orientation="h", marker_color="royalblue"))
     fig.update_layout(
-        title="Emotional Tone Distribution",
-        xaxis_title="Confidence",
-        yaxis_title="Tone",
-        height=250,
-        margin=dict(l=20, r=20, t=40, b=20),
         yaxis={"categoryorder": "total ascending"},
     )
     return fig
 def _highlight_keywords(text: str, keywords: typing.List[str]) -> str:
     highlighted_text = text
     for kw in keywords:
         pattern = re.compile(rf"\b({re.escape(kw)})\b", re.IGNORECASE)
         highlighted_text = pattern.sub(
-            r"<span style='background-color: #ffcc00; padding: 2px; border-radius: 3px;'>\1</span>",
             highlighted_text,
         )
     return highlighted_text
-st.set_page_config(page_title="FrameVis MVP", layout="wide")
-st.title("FrameVis: Media Framing Analyzer")
-st.markdown("Compare how different news sources frame the same event using NLP.")
-with st.spinner("Waking up NLP models..."):
-    _load_nlp_models()
-st.markdown("### Input Articles")
-st.markdown("Paste custom articles below or use the default samples to see the analysis.")
 col1, col2 = st.columns(2)
 with col1:
-    st.subheader("Source A")
     user_article_a = st.text_area(
-        "Paste Article A Text:", value=ARTICLE_A.strip(), height=200
     )
-    should_analyze_a = st.button("Analyze Source A", use_container_width=True)
 with col2:
-    st.subheader("Source B")
     user_article_b = st.text_area(
-        "Paste Article B Text:", value=ARTICLE_B.strip(), height=200
     )
-    should_analyze_b = st.button("Analyze Source B", use_container_width=True)
-st.divider()
 if should_analyze_a or should_analyze_b:
-    st.markdown("### Visual Analytics Results")
     res_col1, res_col2 = st.columns(2)
     if should_analyze_a and user_article_a:
-        with st.spinner("Analyzing Source A..."):
             results_a = analyze_article(user_article_a)
             with res_col1:
-                st.plotly_chart(
-                    _create_sentiment_gauge(results_a["sentiment_score"], "Sentiment"),
-                    use_container_width=True,
-                )
-                st.plotly_chart(
-                    _create_tone_bar_chart(results_a["tone_scores"]),
-                    use_container_width=True,
-                )
-                st.markdown("**Highlighted Text (Loaded Keywords):**")
-                annotated_text = _highlight_keywords(
-                    user_article_a, results_a["keywords"]
-                )
-                st.markdown(f"> {annotated_text}", unsafe_allow_html=True)
     if should_analyze_b and user_article_b:
-        with st.spinner("Analyzing Source B..."):
             results_b = analyze_article(user_article_b)
             with res_col2:
-                st.plotly_chart(
-                    _create_sentiment_gauge(results_b["sentiment_score"], "Sentiment"),
-                    use_container_width=True,
-                )
-                st.plotly_chart(
-                    _create_tone_bar_chart(results_b["tone_scores"]),
-                    use_container_width=True,
-                )
-                st.markdown("**Highlighted Text (Loaded Keywords):**")
-                annotated_text = _highlight_keywords(
-                    user_article_b, results_b["keywords"]
-                )
-                st.markdown(f"> {annotated_text}", unsafe_allow_html=True)

+# imports
 import re
 import typing
 import plotly.graph_objects as go
 import streamlit as st
+from keybert import KeyBERT
+from textblob import TextBlob
 from transformers import pipeline
+# constants
 MAX_TEXT_LENGTH = 1500
 CANDIDATE_TONES = ["alarmist", "objective", "defensive", "optimistic", "critical"]
 @st.cache_resource
 def _load_nlp_models() -> typing.Dict[str, typing.Any]:
+    """
+    Loads NLP model into memory and caches it.
+    Upgraded to include KeyBERT for semantic keyword extraction, which hopefully
+    outperforms statistical models on short news text.
+    Returns:
+        A dictionary containing the initialized Hugging Face pipelines
+        and the KeyBERT model.
+    """
     sentiment_analyzer = pipeline(
         "sentiment-analysis",
         model="distilbert-base-uncased-finetuned-sst-2-english",
         "zero-shot-classification",
         model="typeform/distilbert-base-uncased-mnli",
     )
+    # KeyBERT uses a tiny, fast transformer to find contextual keywords
+    keyword_extractor = KeyBERT(model="all-MiniLM-L6-v2")
     return {
         "sentiment": sentiment_analyzer,
 def analyze_article(text: str) -> dict:
+    """
+    Analyzes framing using semantic keyphrases, sentiment, tone, and subjectivity.
+    Args:
+        text: The article text to analyze.
+    Returns:
+        A dictionary containing all calculated framing metrics.
+    """
     models = _load_nlp_models()
     safe_text = text[:MAX_TEXT_LENGTH]
+    # Sentiment Analysis
     sentiment_result = models["sentiment"](safe_text)[0]
     is_positive = sentiment_result["label"] == "POSITIVE"
     sentiment_score = (
         sentiment_result["score"] if is_positive else -sentiment_result["score"]
     )
+    # Tone Classification
+    tone_result = models["tone"](safe_text, CANDIDATE_TONES)
     tone_scores = {
         label: score
         for label, score in zip(tone_result["labels"], tone_result["scores"])
     }
+    # Semantic Keyword Extraction
+    keyword_results = models["keyword"].extract_keywords(
+        safe_text, keyphrase_ngram_range=(1, 2), stop_words="english", top_n=4
+    )
     extracted_keywords = [kw[0] for kw in keyword_results]
+    # Subjectivity Analysis
+    subjectivity_score = TextBlob(safe_text).sentiment.subjectivity
     return {
         "sentiment_score": sentiment_score,
+        "subjectivity_score": subjectivity_score,
         "primary_tone": tone_result["labels"][0],
         "tone_scores": tone_scores,
         "keywords": extracted_keywords,
 def _create_sentiment_gauge(score: float, title: str) -> go.Figure:
+    """
+    Generates a Plotly gauge chart for sentiment visualization.
+    """
     fig = go.Figure(
         go.Indicator(
             mode="gauge+number",
             value=score,
             domain={"x": [0, 1], "y": [0, 1]},
+            title={"text": title, "font": {"size": 16}},
             gauge={
                 "axis": {"range": [-1, 1], "tickwidth": 1},
+                "bar": {"color": "darkblue"},
                 "steps": [
+                    {"range": [-1, -0.2], "color": "#ffb3b3"},  # Red
+                    {"range": [-0.2, 0.2], "color": "#f2f2f2"},  # Gray
+                    {"range": [0.2, 1], "color": "#b3ffb3"},    # Green
                 ],
             },
         )
     )
+    fig.update_layout(height=280, margin=dict(l=20, r=20, t=60, b=20))
     return fig
 def _create_tone_bar_chart(tone_scores: typing.Dict[str, float]) -> go.Figure:
+    """Generates a horizontal bar chart showing tone probabilities."""
     labels = list(tone_scores.keys())
     values = list(tone_scores.values())
+    fig = go.Figure(
+        go.Bar(
+            x=values,
+            y=labels,
+            orientation="h",
+            marker_color="#4f46e5",  # Indigo
+            bordercolor="white",
+        )
+    )
     fig.update_layout(
+        title={"text": "Emotional Tone Confidence", "font": {"size": 16}},
+        xaxis_title="Confidence Matrix",
+        height=280,
+        margin=dict(l=20, r=20, t=60, b=20),
         yaxis={"categoryorder": "total ascending"},
+        plot_bgcolor="rgba(0,0,0,0)",
     )
     return fig
 def _highlight_keywords(text: str, keywords: typing.List[str]) -> str:
+    """Wraps keywords in HTML tags for visual highlighting."""
     highlighted_text = text
     for kw in keywords:
         pattern = re.compile(rf"\b({re.escape(kw)})\b", re.IGNORECASE)
         highlighted_text = pattern.sub(
+            r"<span style='background-color: #fef08a; font-weight: 600; padding: 0.1rem 0.2rem; border-radius: 4px;'>\1</span>",
             highlighted_text,
         )
     return highlighted_text
+st.set_page_config(page_title="FrameVis | Media Framing", layout="wide")
+st.title("FrameVis")
+st.markdown("##### Media bias and framing effects across global news sources.")
+st.divider()
+with st.spinner("Starting NLP model..."):
+    _load_nlp_models()
+# Source Inputs
 col1, col2 = st.columns(2)
 with col1:
     user_article_a = st.text_area(
+        "Source A",
+        value=ARTICLE_A.strip(),
+        height=220,
+        help="Paste the raw text of the first article you wish to analyze."
     )
+    should_analyze_a = st.button("Process Source A", use_container_width=True)
 with col2:
     user_article_b = st.text_area(
+        "Source B",
+        value=ARTICLE_B.strip(),
+        height=220,
+        help="Paste the raw text of the second article for comparison."
     )
+    should_analyze_b = st.button("Process Source B", use_container_width=True)
+st.write("") # Spacer
+# Analysis Display
 if should_analyze_a or should_analyze_b:
+    st.markdown("### Framing Comparison")
     res_col1, res_col2 = st.columns(2)
     if should_analyze_a and user_article_a:
+        with st.spinner("Processing Source A..."):
             results_a = analyze_article(user_article_a)
             with res_col1:
+                # Top Metrics
+                m1, m2 = st.columns(2)
+                m1.metric("Subjectivity", f"{results_a['subjectivity_score']:.2f}", help="0.0 is entirely factual/objective. 1.0 is highly opinionated.")
+                m2.metric("Primary Tone", results_a['primary_tone'].title())
+                # Charts
+                st.plotly_chart(_create_sentiment_gauge(results_a["sentiment_score"], "Sentiment Bias"), use_container_width=True)
+                st.plotly_chart(_create_tone_bar_chart(results_a["tone_scores"]), use_container_width=True)
+                # Context Highlighting
+                st.markdown("**Semantic Fingerprint (Keyphrases):**")
+                annotated_text = _highlight_keywords(user_article_a, results_a["keywords"])
+                st.markdown(f"<div style='background-color: #f8fafc; padding: 1rem; border-radius: 8px; border: 1px solid #e2e8f0;'>{annotated_text}</div>", unsafe_allow_html=True)
     if should_analyze_b and user_article_b:
+        with st.spinner("Processing Source B..."):
             results_b = analyze_article(user_article_b)
             with res_col2:
+                # Top Metrics
+                m1, m2 = st.columns(2)
+                m1.metric("Subjectivity", f"{results_b['subjectivity_score']:.2f}", help="0.0 is entirely factual/objective. 1.0 is highly opinionated.")
+                m2.metric("Primary Tone", results_b['primary_tone'].title())
+                # Charts
+                st.plotly_chart(_create_sentiment_gauge(results_b["sentiment_score"], "Sentiment Bias"), use_container_width=True)
+                st.plotly_chart(_create_tone_bar_chart(results_b["tone_scores"]), use_container_width=True)
+                # Context Highlighting
+                st.markdown("**Semantic Fingerprint (Keyphrases):**")
+                annotated_text = _highlight_keywords(user_article_b, results_b["keywords"])
+                st.markdown(f"<div style='background-color: #f8fafc; padding: 1rem; border-radius: 8px; border: 1px solid #e2e8f0;'>{annotated_text}</div>", unsafe_allow_html=True)