alexoh2020 commited on
Commit
0f2373c
·
1 Parent(s): 097fe34

added VADER sentiment analysis

Browse files
Files changed (5) hide show
  1. .gitignore +2 -0
  2. config.json +4 -0
  3. requirements.txt +2 -1
  4. src/api_handler.py +29 -35
  5. src/streamlit_app.py +16 -10
.gitignore CHANGED
@@ -10,6 +10,8 @@ __pycache__/
10
  # Virtual environment
11
  .venv/
12
  venv/
 
 
13
 
14
  # IDE
15
  .vscode/
 
10
  # Virtual environment
11
  .venv/
12
  venv/
13
+ .env/
14
+ env/
15
 
16
  # IDE
17
  .vscode/
config.json CHANGED
@@ -28,5 +28,9 @@
28
  "AI breakthrough promises to revolutionize healthcare",
29
  "Concerns grow over AI job displacement",
30
  "New machine learning model shows mixed results"
 
 
 
 
31
  ]
32
  }
 
28
  "AI breakthrough promises to revolutionize healthcare",
29
  "Concerns grow over AI job displacement",
30
  "New machine learning model shows mixed results"
31
+ ],
32
+ "model_options":[
33
+ "Vader",
34
+ "TextBlob"
35
  ]
36
  }
requirements.txt CHANGED
@@ -4,4 +4,5 @@ requests>=2.31.0
4
  python-dotenv>=1.0.0
5
  textblob>=0.17.1
6
  plotly>=5.15.0
7
- numpy>=1.24.0
 
 
4
  python-dotenv>=1.0.0
5
  textblob>=0.17.1
6
  plotly>=5.15.0
7
+ numpy>=1.24.0
8
+ vaderSentiment>=3.3.2
src/api_handler.py CHANGED
@@ -10,6 +10,8 @@ import json
10
  from dotenv import load_dotenv
11
  from textblob import TextBlob
12
  from typing import List, Dict, Optional
 
 
13
 
14
  # Load environment variables
15
  load_dotenv()
@@ -77,7 +79,7 @@ class AINewsAnalyzer:
77
  print(f"Request failed: {e}")
78
  return []
79
 
80
- def analyze_sentiment(self, text: str) -> Dict:
81
  """
82
  Analyze sentiment of given text using TextBlob
83
 
@@ -94,30 +96,39 @@ class AINewsAnalyzer:
94
  'label': 'neutral',
95
  'confidence': 0.0
96
  }
97
-
98
  blob = TextBlob(text)
99
- polarity = blob.sentiment.polarity
100
  subjectivity = blob.sentiment.subjectivity
101
-
102
- # Determine sentiment label
103
- if polarity > 0.1:
 
 
 
 
 
 
 
 
 
 
 
104
  label = 'positive'
105
- elif polarity < -0.1:
106
  label = 'negative'
107
  else:
108
  label = 'neutral'
109
 
 
110
  # Calculate confidence (distance from neutral)
111
  confidence = abs(polarity)
112
-
113
- return {
114
  'polarity': polarity,
115
  'subjectivity': subjectivity,
116
  'label': label,
117
  'confidence': confidence
118
  }
119
-
120
- def process_news_articles(self, articles: List[Dict]) -> pd.DataFrame:
121
  """
122
  Process news articles and add sentiment analysis
123
 
@@ -135,15 +146,14 @@ class AINewsAnalyzer:
135
  continue
136
 
137
  # Analyze sentiment of title and description
138
- title_sentiment = self.analyze_sentiment(article['title'])
139
- description_sentiment = self.analyze_sentiment(article.get('description', ''))
140
 
141
  # Combine title and description sentiment (weighted toward title)
142
  combined_polarity = (title_sentiment['polarity'] * 0.7 +
143
  description_sentiment['polarity'] * 0.3)
144
  combined_subjectivity = (title_sentiment['subjectivity'] * 0.7 +
145
  description_sentiment['subjectivity'] * 0.3)
146
-
147
  # Determine overall sentiment
148
  if combined_polarity > 0.1:
149
  overall_sentiment = 'positive'
@@ -183,7 +193,8 @@ class AINewsAnalyzer:
183
  def get_ai_news_with_sentiment(self,
184
  query: str = "artificial intelligence",
185
  days: int = 7,
186
- sources: Optional[str] = None) -> pd.DataFrame:
 
187
  """
188
  Complete pipeline: fetch news and analyze sentiment
189
 
@@ -207,26 +218,10 @@ class AINewsAnalyzer:
207
  print(f"Found {len(articles)} articles. Analyzing sentiment...")
208
 
209
  # Process and analyze
210
- df = self.process_news_articles(articles)
211
 
212
- print(f"Processed {len(df)} articles with sentiment analysis.")
213
  return df
214
-
215
- def fetch_ai_news(query="artificial intelligence", days=7, sources=None):
216
- """Standalone function to fetch AI news"""
217
- analyzer = AINewsAnalyzer()
218
- return analyzer.fetch_ai_news(query, days, sources=sources)
219
-
220
- def analyze_sentiment(text):
221
- """Standalone function to analyze sentiment"""
222
- analyzer = AINewsAnalyzer()
223
- return analyzer.analyze_sentiment(text)
224
-
225
- def get_ai_news_with_sentiment(query="artificial intelligence", days=7, sources=None):
226
- """Standalone function for complete pipeline"""
227
- analyzer = AINewsAnalyzer()
228
- return analyzer.get_ai_news_with_sentiment(query, days, sources)
229
-
230
  def load_config():
231
  """Load configuration from config.json"""
232
  with open('config.json', 'r') as f:
@@ -247,8 +242,7 @@ if __name__ == "__main__":
247
  for text in test_texts:
248
  sentiment = analyzer.analyze_sentiment(text)
249
  print(f"Text: {text}")
250
- print(f"Sentiment: {sentiment['label']} (polarity: {sentiment['polarity']:.2f})")
251
- print()
252
 
253
  # Test news fetching
254
  print("Fetching recent AI news...")
 
10
  from dotenv import load_dotenv
11
  from textblob import TextBlob
12
  from typing import List, Dict, Optional
13
+ from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer as SIA
14
+
15
 
16
  # Load environment variables
17
  load_dotenv()
 
79
  print(f"Request failed: {e}")
80
  return []
81
 
82
+ def analyze_sentiment(self, text: str, model: str) -> Dict:
83
  """
84
  Analyze sentiment of given text using TextBlob
85
 
 
96
  'label': 'neutral',
97
  'confidence': 0.0
98
  }
 
99
  blob = TextBlob(text)
 
100
  subjectivity = blob.sentiment.subjectivity
101
+
102
+ # implement Vader Analysis for polarity scores
103
+ if model == "Vader":
104
+ vader = SIA()
105
+ fullpolarity = vader.polarity_scores(text)
106
+ polarity=fullpolarity['compound']
107
+ polarity_thresh = 0.05
108
+ # otherwise
109
+ else:
110
+ polarity = blob.sentiment.polarity
111
+ polarity_thresh = 0.1
112
+
113
+ # Determine sentiment label through polarity threshold
114
+ if polarity > polarity_thresh:
115
  label = 'positive'
116
+ elif polarity < -polarity_thresh:
117
  label = 'negative'
118
  else:
119
  label = 'neutral'
120
 
121
+
122
  # Calculate confidence (distance from neutral)
123
  confidence = abs(polarity)
124
+ res = {
 
125
  'polarity': polarity,
126
  'subjectivity': subjectivity,
127
  'label': label,
128
  'confidence': confidence
129
  }
130
+ return res
131
+ def process_news_articles(self, articles: List[Dict], model: str) -> pd.DataFrame:
132
  """
133
  Process news articles and add sentiment analysis
134
 
 
146
  continue
147
 
148
  # Analyze sentiment of title and description
149
+ title_sentiment = self.analyze_sentiment(article['title'], model=model)
150
+ description_sentiment = self.analyze_sentiment(article['description'], model=model)
151
 
152
  # Combine title and description sentiment (weighted toward title)
153
  combined_polarity = (title_sentiment['polarity'] * 0.7 +
154
  description_sentiment['polarity'] * 0.3)
155
  combined_subjectivity = (title_sentiment['subjectivity'] * 0.7 +
156
  description_sentiment['subjectivity'] * 0.3)
 
157
  # Determine overall sentiment
158
  if combined_polarity > 0.1:
159
  overall_sentiment = 'positive'
 
193
  def get_ai_news_with_sentiment(self,
194
  query: str = "artificial intelligence",
195
  days: int = 7,
196
+ sources: Optional[str] = None,
197
+ model: str = "Textblob") -> pd.DataFrame:
198
  """
199
  Complete pipeline: fetch news and analyze sentiment
200
 
 
218
  print(f"Found {len(articles)} articles. Analyzing sentiment...")
219
 
220
  # Process and analyze
221
+ df = self.process_news_articles(articles, model=model)
222
 
223
+ print(f"Processed {len(df)} articles with sentiment analysis. \nUsed {model} for polarity analysis and Textblob for sentiment analysis.")
224
  return df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  def load_config():
226
  """Load configuration from config.json"""
227
  with open('config.json', 'r') as f:
 
242
  for text in test_texts:
243
  sentiment = analyzer.analyze_sentiment(text)
244
  print(f"Text: {text}")
245
+ print(f"Sentiment: {sentiment['label']} (polarity: {sentiment['polarity']:.2f}\n")
 
246
 
247
  # Test news fetching
248
  print("Fetching recent AI news...")
src/streamlit_app.py CHANGED
@@ -19,7 +19,7 @@ st.set_page_config(
19
 
20
  # Custom CSS for better styling
21
  st.markdown("""
22
- <style>
23
  .main-header {
24
  font-size: 2.5rem;
25
  font-weight: bold;
@@ -46,11 +46,11 @@ def load_config():
46
  return json.load(f)
47
 
48
  @st.cache_data(ttl=1800) # Cache for 30 minutes
49
- def load_news_data(query, days, sources=None):
50
  """Load and cache news data"""
51
  try:
52
  analyzer = AINewsAnalyzer()
53
- df = analyzer.get_ai_news_with_sentiment(query=query, days=days, sources=sources)
54
  return df, None
55
  except Exception as e:
56
  return pd.DataFrame(), str(e)
@@ -105,7 +105,7 @@ def create_source_analysis(df):
105
 
106
  return fig
107
 
108
- def create_polarity_distribution(df):
109
  """Create sentiment polarity distribution"""
110
  if df.empty:
111
  return None
@@ -119,10 +119,9 @@ def create_polarity_distribution(df):
119
  )
120
 
121
  # Add vertical lines for sentiment boundaries
122
- fig.add_vline(x=0.1, line_dash="dash", line_color="green", annotation_text="Positive Threshold")
123
- fig.add_vline(x=-0.1, line_dash="dash", line_color="red", annotation_text="Negative Threshold")
124
- fig.add_vline(x=0, line_dash="dash", line_color="gray", annotation_text="Neutral")
125
-
126
  return fig
127
 
128
 
@@ -150,6 +149,12 @@ def main():
150
  "Or enter custom search:",
151
  placeholder="e.g., 'generative AI'"
152
  )
 
 
 
 
 
 
153
 
154
  # Use custom query if provided
155
  final_query = custom_query if custom_query else selected_query
@@ -186,7 +191,7 @@ def main():
186
  # Load data
187
  if st.sidebar.button("🚀 Analyze News", type="primary"):
188
  with st.spinner(f"Fetching and analyzing news about '{final_query}'..."):
189
- df, error = load_news_data(final_query, days, sources)
190
 
191
  if error:
192
  st.error(f"Error loading data: {error}")
@@ -242,7 +247,8 @@ def main():
242
  st.plotly_chart(source_fig, use_container_width=True)
243
 
244
  # Row 2: Polarity distribution (full width)
245
- polarity_fig = create_polarity_distribution(df)
 
246
  if polarity_fig:
247
  st.plotly_chart(polarity_fig, use_container_width=True)
248
 
 
19
 
20
  # Custom CSS for better styling
21
  st.markdown("""
22
+ <style>
23
  .main-header {
24
  font-size: 2.5rem;
25
  font-weight: bold;
 
46
  return json.load(f)
47
 
48
  @st.cache_data(ttl=1800) # Cache for 30 minutes
49
+ def load_news_data(query, days, sources=None, model="TextBlob"):
50
  """Load and cache news data"""
51
  try:
52
  analyzer = AINewsAnalyzer()
53
+ df = analyzer.get_ai_news_with_sentiment(query=query, days=days, sources=sources, model=model)
54
  return df, None
55
  except Exception as e:
56
  return pd.DataFrame(), str(e)
 
105
 
106
  return fig
107
 
108
+ def create_polarity_distribution(df, thresh: float):
109
  """Create sentiment polarity distribution"""
110
  if df.empty:
111
  return None
 
119
  )
120
 
121
  # Add vertical lines for sentiment boundaries
122
+ fig.add_vline(x=thresh, line_dash="dash", line_color="green", annotation_text="Positive Threshold", annotation_position="top right")
123
+ fig.add_vline(x=-thresh, line_dash="dash", line_color="red", annotation_text="Negative Threshold", annotation_position="top left")
124
+ fig.add_vline(x=0, line_dash="dash", line_color="gray", annotation_text="Neutral", annotation_position="top")
 
125
  return fig
126
 
127
 
 
149
  "Or enter custom search:",
150
  placeholder="e.g., 'generative AI'"
151
  )
152
+
153
+ model_query = st.sidebar.selectbox(
154
+ "📝 Search a Sentiment Model:",
155
+ options=config["model_options"],
156
+ index=0
157
+ )
158
 
159
  # Use custom query if provided
160
  final_query = custom_query if custom_query else selected_query
 
191
  # Load data
192
  if st.sidebar.button("🚀 Analyze News", type="primary"):
193
  with st.spinner(f"Fetching and analyzing news about '{final_query}'..."):
194
+ df, error = load_news_data(final_query, days, sources=sources, model=model_query)
195
 
196
  if error:
197
  st.error(f"Error loading data: {error}")
 
247
  st.plotly_chart(source_fig, use_container_width=True)
248
 
249
  # Row 2: Polarity distribution (full width)
250
+ thresh = 0.05 if model_query == "Vader" else 0.1
251
+ polarity_fig = create_polarity_distribution(df, thresh=thresh)
252
  if polarity_fig:
253
  st.plotly_chart(polarity_fig, use_container_width=True)
254