Spaces:

ai-sentiment-group
/

BootcampFinalProject

Sleeping

App Files Files Community

Jonas Neves commited on Aug 25, 2025

Commit

2bd9c76

2 Parent(s): 2696124 fead880

Merge branch 'main' of https://github.com/alexoh2bd/BootcampFinalProject

Browse files

Files changed (5) hide show

.gitignore +2 -0
config.json +4 -0
requirements.txt +0 -0
src/api_handler.py +29 -35
src/streamlit_app.py +103 -98

.gitignore CHANGED Viewed

@@ -10,6 +10,8 @@ __pycache__/
 # Virtual environment
 .venv/
 venv/
 # IDE
 .vscode/

 # Virtual environment
 .venv/
 venv/
+.env/
+env/
 # IDE
 .vscode/

config.json CHANGED Viewed

@@ -28,5 +28,9 @@
     "AI breakthrough promises to revolutionize healthcare",
     "Concerns grow over AI job displacement",
     "New machine learning model shows mixed results"
   ]
 }

     "AI breakthrough promises to revolutionize healthcare",
     "Concerns grow over AI job displacement",
     "New machine learning model shows mixed results"
+  ],
+  "model_options":[
+    "Vader",
+    "TextBlob"
   ]
 }

requirements.txt CHANGED Viewed

Binary files a/requirements.txt and b/requirements.txt differ

src/api_handler.py CHANGED Viewed

@@ -10,6 +10,8 @@ import json
 from dotenv import load_dotenv
 from textblob import TextBlob
 from typing import List, Dict, Optional
 # Load environment variables
 load_dotenv()
@@ -77,7 +79,7 @@ class AINewsAnalyzer:
             print(f"Request failed: {e}")
             return []
-    def analyze_sentiment(self, text: str) -> Dict:
         """
         Analyze sentiment of given text using TextBlob
@@ -94,30 +96,39 @@ class AINewsAnalyzer:
                 'label': 'neutral',
                 'confidence': 0.0
             }
         blob = TextBlob(text)
-        polarity = blob.sentiment.polarity
         subjectivity = blob.sentiment.subjectivity
-        # Determine sentiment label
-        if polarity > 0.1:
             label = 'positive'
-        elif polarity < -0.1:
             label = 'negative'
         else:
             label = 'neutral'
         # Calculate confidence (distance from neutral)
         confidence = abs(polarity)
-        return {
             'polarity': polarity,
             'subjectivity': subjectivity,
             'label': label,
             'confidence': confidence
         }
-    def process_news_articles(self, articles: List[Dict]) -> pd.DataFrame:
         """
         Process news articles and add sentiment analysis
@@ -135,15 +146,14 @@ class AINewsAnalyzer:
                 continue
             # Analyze sentiment of title and description
-            title_sentiment = self.analyze_sentiment(article['title'])
-            description_sentiment = self.analyze_sentiment(article.get('description', ''))
             # Combine title and description sentiment (weighted toward title)
             combined_polarity = (title_sentiment['polarity'] * 0.7 +
                                description_sentiment['polarity'] * 0.3)
             combined_subjectivity = (title_sentiment['subjectivity'] * 0.7 +
                                    description_sentiment['subjectivity'] * 0.3)
             # Determine overall sentiment
             if combined_polarity > 0.1:
                 overall_sentiment = 'positive'
@@ -183,7 +193,8 @@ class AINewsAnalyzer:
     def get_ai_news_with_sentiment(self,
                                    query: str = "artificial intelligence",
                                    days: int = 7,
-                                   sources: Optional[str] = None) -> pd.DataFrame:
         """
         Complete pipeline: fetch news and analyze sentiment
@@ -207,26 +218,10 @@ class AINewsAnalyzer:
         print(f"Found {len(articles)} articles. Analyzing sentiment...")
         # Process and analyze
-        df = self.process_news_articles(articles)
-        print(f"Processed {len(df)} articles with sentiment analysis.")
         return df
-def fetch_ai_news(query="artificial intelligence", days=7, sources=None):
-    """Standalone function to fetch AI news"""
-    analyzer = AINewsAnalyzer()
-    return analyzer.fetch_ai_news(query, days, sources=sources)
-def analyze_sentiment(text):
-    """Standalone function to analyze sentiment"""
-    analyzer = AINewsAnalyzer()
-    return analyzer.analyze_sentiment(text)
-def get_ai_news_with_sentiment(query="artificial intelligence", days=7, sources=None):
-    """Standalone function for complete pipeline"""
-    analyzer = AINewsAnalyzer()
-    return analyzer.get_ai_news_with_sentiment(query, days, sources)
 def load_config():
     """Load configuration from config.json"""
     with open('config.json', 'r') as f:
@@ -247,8 +242,7 @@ if __name__ == "__main__":
     for text in test_texts:
         sentiment = analyzer.analyze_sentiment(text)
         print(f"Text: {text}")
-        print(f"Sentiment: {sentiment['label']} (polarity: {sentiment['polarity']:.2f})")
-        print()
     # Test news fetching
     print("Fetching recent AI news...")

 from dotenv import load_dotenv
 from textblob import TextBlob
 from typing import List, Dict, Optional
+from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer as SIA
 # Load environment variables
 load_dotenv()
             print(f"Request failed: {e}")
             return []
+    def analyze_sentiment(self, text: str, model: str) -> Dict:
         """
         Analyze sentiment of given text using TextBlob
                 'label': 'neutral',
                 'confidence': 0.0
             }
         blob = TextBlob(text)
         subjectivity = blob.sentiment.subjectivity
+        # implement Vader Analysis for polarity scores
+        if model == "Vader":
+            vader = SIA()
+            fullpolarity = vader.polarity_scores(text)
+            polarity=fullpolarity['compound']
+            polarity_thresh = 0.05
+        # otherwise
+        else:
+            polarity = blob.sentiment.polarity
+            polarity_thresh = 0.1
+        # Determine sentiment label through polarity threshold
+        if polarity > polarity_thresh:
             label = 'positive'
+        elif polarity < -polarity_thresh:
             label = 'negative'
         else:
             label = 'neutral'
         # Calculate confidence (distance from neutral)
         confidence = abs(polarity)
+        res = {
             'polarity': polarity,
             'subjectivity': subjectivity,
             'label': label,
             'confidence': confidence
         }
+        return res
+    def process_news_articles(self, articles: List[Dict], model: str) -> pd.DataFrame:
         """
         Process news articles and add sentiment analysis
                 continue
             # Analyze sentiment of title and description
+            title_sentiment = self.analyze_sentiment(article['title'], model=model)
+            description_sentiment = self.analyze_sentiment(article['description'], model=model)
             # Combine title and description sentiment (weighted toward title)
             combined_polarity = (title_sentiment['polarity'] * 0.7 +
                                description_sentiment['polarity'] * 0.3)
             combined_subjectivity = (title_sentiment['subjectivity'] * 0.7 +
                                    description_sentiment['subjectivity'] * 0.3)
             # Determine overall sentiment
             if combined_polarity > 0.1:
                 overall_sentiment = 'positive'
     def get_ai_news_with_sentiment(self,
                                    query: str = "artificial intelligence",
                                    days: int = 7,
+                                   sources: Optional[str] = None,
+                                   model: str = "Textblob") -> pd.DataFrame:
         """
         Complete pipeline: fetch news and analyze sentiment
         print(f"Found {len(articles)} articles. Analyzing sentiment...")
         # Process and analyze
+        df = self.process_news_articles(articles, model=model)
+        print(f"Processed {len(df)} articles with sentiment analysis. \nUsed {model} for polarity analysis and Textblob for sentiment analysis.")
         return df
 def load_config():
     """Load configuration from config.json"""
     with open('config.json', 'r') as f:
     for text in test_texts:
         sentiment = analyzer.analyze_sentiment(text)
         print(f"Text: {text}")
+        print(f"Sentiment: {sentiment['label']} (polarity: {sentiment['polarity']:.2f}\n")
     # Test news fetching
     print("Fetching recent AI news...")

src/streamlit_app.py CHANGED Viewed

@@ -21,7 +21,7 @@ st.set_page_config(
 # Custom CSS for better styling
 st.markdown("""
-<style>
     .main-header {
         font-size: 2.5rem;
         font-weight: bold;
@@ -48,11 +48,11 @@ def load_config():
         return json.load(f)
 @st.cache_data(ttl=1800)  # Cache for 30 minutes
-def load_news_data(query, days, sources=None):
     """Load and cache news data"""
     try:
         analyzer = AINewsAnalyzer()
-        df = analyzer.get_ai_news_with_sentiment(query=query, days=days, sources=sources)
         return df, None
     except Exception as e:
         return pd.DataFrame(), str(e)
@@ -107,7 +107,7 @@ def create_source_analysis(df):
     return fig
-def create_polarity_distribution(df):
     """Create sentiment polarity distribution"""
     if df.empty:
         return None
@@ -121,10 +121,9 @@ def create_polarity_distribution(df):
     )
     # Add vertical lines for sentiment boundaries
-    fig.add_vline(x=0.1, line_dash="dash", line_color="green", annotation_text="Positive Threshold")
-    fig.add_vline(x=-0.1, line_dash="dash", line_color="red", annotation_text="Negative Threshold")
-    fig.add_vline(x=0, line_dash="dash", line_color="gray", annotation_text="Neutral")
     return fig
@@ -152,6 +151,12 @@ def main():
         "Or enter custom search:",
         placeholder="e.g., 'generative AI'"
     )
     # Use custom query if provided
     final_query = custom_query if custom_query else selected_query
@@ -188,7 +193,7 @@ def main():
     # Load data
     if st.sidebar.button("🚀 Analyze News", type="primary"):
         with st.spinner(f"Fetching and analyzing news about '{final_query}'..."):
-            df, error = load_news_data(final_query, days, sources)
             if error:
                 st.error(f"Error loading data: {error}")
@@ -203,105 +208,105 @@ def main():
             st.session_state.query = final_query
             st.session_state.days = days
-# ===== Display results if data is available =====
-if 'df' in st.session_state and not st.session_state.df.empty:
-    df = st.session_state.df
-    # ===== Summary Metrics =====
-    st.markdown("### 📊 Analysis Summary")
-    col1, col2, col3, col4 = st.columns(4)
-    with col1:
-        st.metric("📰 Total Articles", len(df))
-    with col2:
-        avg_polarity = df['sentiment_polarity'].mean()
-        delta_polarity = f"{avg_polarity:+.3f}"
-        st.metric("🎭 Avg Sentiment", f"{avg_polarity:.3f}", delta_polarity)
-    with col3:
-        positive_pct = (len(df[df['sentiment_label'] == 'positive']) / len(df) * 100)
-        st.metric("😊 Positive %", f"{positive_pct:.1f}%")
-    with col4:
-        unique_sources = df['source'].nunique()
-        st.metric("📺 News Sources", unique_sources)
-    # ===== Charts =====
-    st.markdown("### 📈 Visual Analysis")
-    col1, col2 = st.columns(2)
-    # Sentiment Distribution
-    dist_fig = create_sentiment_distribution(df)
-    if dist_fig:
-        st.plotly_chart(dist_fig, use_container_width=True, key="dist_fig")
-        # Export buttons
-        buf = io.BytesIO()
-        dist_fig.update_layout(template="plotly_white")
-        dist_fig.update_layout(plot_bgcolor='white', paper_bgcolor='white')  # 设置白底
-        dist_fig.write_image(buf, format="png", engine="kaleido")
-        st.download_button("📷 Download Distribution Chart as PNG", buf.getvalue(),
-                        "distribution_chart.png", mime="image/png")
-        st.download_button("🌐 Download Distribution Chart as HTML",
-                        dist_fig.to_html().encode("utf-8"), "distribution_chart.html",
-                        mime="text/html")
-    # Source Analysis
-    source_fig = create_source_analysis(df)
-    if source_fig:
-        st.plotly_chart(source_fig, use_container_width=True, key="source_fig")
-        buf = io.BytesIO()
-        source_fig.update_layout(template="plotly_white")
-        source_fig.update_layout(plot_bgcolor='white', paper_bgcolor='white')  # 白底
-        source_fig.write_image(buf, format="png", engine="kaleido")
-        st.download_button("📷 Download Source Chart as PNG", buf.getvalue(),
-                        "source_chart.png", mime="image/png")
-        st.download_button("🌐 Download Source Chart as HTML",
-                        source_fig.to_html().encode("utf-8"), "source_chart.html",
-                        mime="text/html")
-    # Polarity Distribution
-    polarity_fig = create_polarity_distribution(df)
-    if polarity_fig:
-        st.plotly_chart(polarity_fig, use_container_width=True, key="polarity_fig")
-        buf = io.BytesIO()
-        polarity_fig.update_layout(template="plotly_white")
-        polarity_fig.update_layout(plot_bgcolor='white', paper_bgcolor='white')  # 白底
-        polarity_fig.write_image(buf, format="png", engine="kaleido")
-        st.download_button("📷 Download Polarity Chart as PNG", buf.getvalue(),
-                        "polarity_chart.png", mime="image/png")
-        st.download_button("🌐 Download Polarity Chart as HTML",
-                        polarity_fig.to_html().encode("utf-8"), "polarity_chart.html",
-                        mime="text/html")
-    # ===== Export CSV button =====
-    csv_data = df.to_csv(index=False).encode('utf-8')
-    st.download_button(
-        label="💾 Export Analysis as CSV",
-        data=csv_data,
-        file_name=f"ai_news_analysis_{st.session_state.query.replace(' ', '_')}.csv",
-        mime='text/csv'
-    )
-else:
-    # Welcome message
-    st.info("👋 Welcome! Configure your analysis settings in the sidebar and click 'Analyze News' to get started.")
-    # Sample visualization or instructions
-    st.markdown("""
-    ### 🚀 How to Use:
-    1. **Choose a topic** from the dropdown or enter your own search term
-    2. **Select time range** (1-30 days) to analyze recent news
-    3. **Pick news sources** or leave as 'All Sources' for comprehensive coverage
-    4. **Click 'Analyze News'** to fetch and analyze articles
-    ### 📊 What You'll Get:
-    - **Sentiment Analysis** of headlines and descriptions
-    - **Interactive Charts** showing trends over time
-    - **Source Breakdown** to see which outlets cover your topic
-    """)

 # Custom CSS for better styling
 st.markdown("""
+<style>
     .main-header {
         font-size: 2.5rem;
         font-weight: bold;
         return json.load(f)
 @st.cache_data(ttl=1800)  # Cache for 30 minutes
+def load_news_data(query, days, sources=None, model="TextBlob"):
     """Load and cache news data"""
     try:
         analyzer = AINewsAnalyzer()
+        df = analyzer.get_ai_news_with_sentiment(query=query, days=days, sources=sources, model=model)
         return df, None
     except Exception as e:
         return pd.DataFrame(), str(e)
     return fig
+def create_polarity_distribution(df, thresh: float):
     """Create sentiment polarity distribution"""
     if df.empty:
         return None
     )
     # Add vertical lines for sentiment boundaries
+    fig.add_vline(x=thresh, line_dash="dash", line_color="green", annotation_text="Positive Threshold", annotation_position="top right")
+    fig.add_vline(x=-thresh, line_dash="dash", line_color="red", annotation_text="Negative Threshold", annotation_position="top left")
+    fig.add_vline(x=0, line_dash="dash", line_color="gray", annotation_text="Neutral", annotation_position="top")
     return fig
         "Or enter custom search:",
         placeholder="e.g., 'generative AI'"
     )
+    model_query = st.sidebar.selectbox(
+        "📝 Search a Sentiment Model:",
+        options=config["model_options"],
+        index=0
+    )
     # Use custom query if provided
     final_query = custom_query if custom_query else selected_query
     # Load data
     if st.sidebar.button("🚀 Analyze News", type="primary"):
         with st.spinner(f"Fetching and analyzing news about '{final_query}'..."):
+            df, error = load_news_data(final_query, days, sources=sources, model=model_query)
             if error:
                 st.error(f"Error loading data: {error}")
             st.session_state.query = final_query
             st.session_state.days = days
+    # ===== Display results if data is available =====
+    if 'df' in st.session_state and not st.session_state.df.empty:
+        df = st.session_state.df
+        # ===== Summary Metrics =====
+        st.markdown("### 📊 Analysis Summary")
+        col1, col2, col3, col4 = st.columns(4)
+        with col1:
+            st.metric("📰 Total Articles", len(df))
+        with col2:
+            avg_polarity = df['sentiment_polarity'].mean()
+            delta_polarity = f"{avg_polarity:+.3f}"
+            st.metric("🎭 Avg Sentiment", f"{avg_polarity:.3f}", delta_polarity)
+        with col3:
+            positive_pct = (len(df[df['sentiment_label'] == 'positive']) / len(df) * 100)
+            st.metric("😊 Positive %", f"{positive_pct:.1f}%")
+        with col4:
+            unique_sources = df['source'].nunique()
+            st.metric("📺 News Sources", unique_sources)
+        # ===== Charts =====
+        st.markdown("### 📈 Visual Analysis")
+        col1, col2 = st.columns(2)
+        # Sentiment Distribution
+        dist_fig = create_sentiment_distribution(df)
+        if dist_fig:
+            st.plotly_chart(dist_fig, use_container_width=True, key="dist_fig")
+            # Export buttons
+            buf = io.BytesIO()
+            dist_fig.update_layout(template="plotly_white")
+            dist_fig.update_layout(plot_bgcolor='white', paper_bgcolor='white')  # 设置白底
+            dist_fig.write_image(buf, format="png", engine="kaleido")
+            st.download_button("📷 Download Distribution Chart as PNG", buf.getvalue(),
+                            "distribution_chart.png", mime="image/png")
+            st.download_button("🌐 Download Distribution Chart as HTML",
+                            dist_fig.to_html().encode("utf-8"), "distribution_chart.html",
+                            mime="text/html")
+        # Source Analysis
+        source_fig = create_source_analysis(df)
+        if source_fig:
+            st.plotly_chart(source_fig, use_container_width=True, key="source_fig")
+            buf = io.BytesIO()
+            source_fig.update_layout(template="plotly_white")
+            source_fig.update_layout(plot_bgcolor='white', paper_bgcolor='white')  # 白底
+            source_fig.write_image(buf, format="png", engine="kaleido")
+            st.download_button("📷 Download Source Chart as PNG", buf.getvalue(),
+                            "source_chart.png", mime="image/png")
+            st.download_button("🌐 Download Source Chart as HTML",
+                            source_fig.to_html().encode("utf-8"), "source_chart.html",
+                            mime="text/html")
+        # Polarity Distribution
+        polarity_fig = create_polarity_distribution(df)
+        if polarity_fig:
+            st.plotly_chart(polarity_fig, use_container_width=True, key="polarity_fig")
+            buf = io.BytesIO()
+            polarity_fig.update_layout(template="plotly_white")
+            polarity_fig.update_layout(plot_bgcolor='white', paper_bgcolor='white')  # 白底
+            polarity_fig.write_image(buf, format="png", engine="kaleido")
+            st.download_button("📷 Download Polarity Chart as PNG", buf.getvalue(),
+                            "polarity_chart.png", mime="image/png")
+            st.download_button("🌐 Download Polarity Chart as HTML",
+                            polarity_fig.to_html().encode("utf-8"), "polarity_chart.html",
+                            mime="text/html")
+        # ===== Export CSV button =====
+        csv_data = df.to_csv(index=False).encode('utf-8')
+        st.download_button(
+            label="💾 Export Analysis as CSV",
+            data=csv_data,
+            file_name=f"ai_news_analysis_{st.session_state.query.replace(' ', '_')}.csv",
+            mime='text/csv'
+        )
+    else:
+        # Welcome message
+        st.info("👋 Welcome! Configure your analysis settings in the sidebar and click 'Analyze News' to get started.")
+        # Sample visualization or instructions
+        st.markdown("""
+        ### 🚀 How to Use:
+        1. **Choose a topic** from the dropdown or enter your own search term
+        2. **Select time range** (1-30 days) to analyze recent news
+        3. **Pick news sources** or leave as 'All Sources' for comprehensive coverage
+        4. **Click 'Analyze News'** to fetch and analyze articles
+        ### 📊 What You'll Get:
+        - **Sentiment Analysis** of headlines and descriptions
+        - **Interactive Charts** showing trends over time
+        - **Source Breakdown** to see which outlets cover your topic
+        """)
+    pass