Jonas Neves commited on
Commit
2bd9c76
ยท
2 Parent(s): 2696124 fead880

Merge branch 'main' of https://github.com/alexoh2bd/BootcampFinalProject

Browse files
Files changed (5) hide show
  1. .gitignore +2 -0
  2. config.json +4 -0
  3. requirements.txt +0 -0
  4. src/api_handler.py +29 -35
  5. src/streamlit_app.py +103 -98
.gitignore CHANGED
@@ -10,6 +10,8 @@ __pycache__/
10
  # Virtual environment
11
  .venv/
12
  venv/
 
 
13
 
14
  # IDE
15
  .vscode/
 
10
  # Virtual environment
11
  .venv/
12
  venv/
13
+ .env/
14
+ env/
15
 
16
  # IDE
17
  .vscode/
config.json CHANGED
@@ -28,5 +28,9 @@
28
  "AI breakthrough promises to revolutionize healthcare",
29
  "Concerns grow over AI job displacement",
30
  "New machine learning model shows mixed results"
 
 
 
 
31
  ]
32
  }
 
28
  "AI breakthrough promises to revolutionize healthcare",
29
  "Concerns grow over AI job displacement",
30
  "New machine learning model shows mixed results"
31
+ ],
32
+ "model_options":[
33
+ "Vader",
34
+ "TextBlob"
35
  ]
36
  }
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
 
src/api_handler.py CHANGED
@@ -10,6 +10,8 @@ import json
10
  from dotenv import load_dotenv
11
  from textblob import TextBlob
12
  from typing import List, Dict, Optional
 
 
13
 
14
  # Load environment variables
15
  load_dotenv()
@@ -77,7 +79,7 @@ class AINewsAnalyzer:
77
  print(f"Request failed: {e}")
78
  return []
79
 
80
- def analyze_sentiment(self, text: str) -> Dict:
81
  """
82
  Analyze sentiment of given text using TextBlob
83
 
@@ -94,30 +96,39 @@ class AINewsAnalyzer:
94
  'label': 'neutral',
95
  'confidence': 0.0
96
  }
97
-
98
  blob = TextBlob(text)
99
- polarity = blob.sentiment.polarity
100
  subjectivity = blob.sentiment.subjectivity
101
-
102
- # Determine sentiment label
103
- if polarity > 0.1:
 
 
 
 
 
 
 
 
 
 
 
104
  label = 'positive'
105
- elif polarity < -0.1:
106
  label = 'negative'
107
  else:
108
  label = 'neutral'
109
 
 
110
  # Calculate confidence (distance from neutral)
111
  confidence = abs(polarity)
112
-
113
- return {
114
  'polarity': polarity,
115
  'subjectivity': subjectivity,
116
  'label': label,
117
  'confidence': confidence
118
  }
119
-
120
- def process_news_articles(self, articles: List[Dict]) -> pd.DataFrame:
121
  """
122
  Process news articles and add sentiment analysis
123
 
@@ -135,15 +146,14 @@ class AINewsAnalyzer:
135
  continue
136
 
137
  # Analyze sentiment of title and description
138
- title_sentiment = self.analyze_sentiment(article['title'])
139
- description_sentiment = self.analyze_sentiment(article.get('description', ''))
140
 
141
  # Combine title and description sentiment (weighted toward title)
142
  combined_polarity = (title_sentiment['polarity'] * 0.7 +
143
  description_sentiment['polarity'] * 0.3)
144
  combined_subjectivity = (title_sentiment['subjectivity'] * 0.7 +
145
  description_sentiment['subjectivity'] * 0.3)
146
-
147
  # Determine overall sentiment
148
  if combined_polarity > 0.1:
149
  overall_sentiment = 'positive'
@@ -183,7 +193,8 @@ class AINewsAnalyzer:
183
  def get_ai_news_with_sentiment(self,
184
  query: str = "artificial intelligence",
185
  days: int = 7,
186
- sources: Optional[str] = None) -> pd.DataFrame:
 
187
  """
188
  Complete pipeline: fetch news and analyze sentiment
189
 
@@ -207,26 +218,10 @@ class AINewsAnalyzer:
207
  print(f"Found {len(articles)} articles. Analyzing sentiment...")
208
 
209
  # Process and analyze
210
- df = self.process_news_articles(articles)
211
 
212
- print(f"Processed {len(df)} articles with sentiment analysis.")
213
  return df
214
-
215
- def fetch_ai_news(query="artificial intelligence", days=7, sources=None):
216
- """Standalone function to fetch AI news"""
217
- analyzer = AINewsAnalyzer()
218
- return analyzer.fetch_ai_news(query, days, sources=sources)
219
-
220
- def analyze_sentiment(text):
221
- """Standalone function to analyze sentiment"""
222
- analyzer = AINewsAnalyzer()
223
- return analyzer.analyze_sentiment(text)
224
-
225
- def get_ai_news_with_sentiment(query="artificial intelligence", days=7, sources=None):
226
- """Standalone function for complete pipeline"""
227
- analyzer = AINewsAnalyzer()
228
- return analyzer.get_ai_news_with_sentiment(query, days, sources)
229
-
230
  def load_config():
231
  """Load configuration from config.json"""
232
  with open('config.json', 'r') as f:
@@ -247,8 +242,7 @@ if __name__ == "__main__":
247
  for text in test_texts:
248
  sentiment = analyzer.analyze_sentiment(text)
249
  print(f"Text: {text}")
250
- print(f"Sentiment: {sentiment['label']} (polarity: {sentiment['polarity']:.2f})")
251
- print()
252
 
253
  # Test news fetching
254
  print("Fetching recent AI news...")
 
10
  from dotenv import load_dotenv
11
  from textblob import TextBlob
12
  from typing import List, Dict, Optional
13
+ from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer as SIA
14
+
15
 
16
  # Load environment variables
17
  load_dotenv()
 
79
  print(f"Request failed: {e}")
80
  return []
81
 
82
+ def analyze_sentiment(self, text: str, model: str) -> Dict:
83
  """
84
  Analyze sentiment of given text using TextBlob
85
 
 
96
  'label': 'neutral',
97
  'confidence': 0.0
98
  }
 
99
  blob = TextBlob(text)
 
100
  subjectivity = blob.sentiment.subjectivity
101
+
102
+ # implement Vader Analysis for polarity scores
103
+ if model == "Vader":
104
+ vader = SIA()
105
+ fullpolarity = vader.polarity_scores(text)
106
+ polarity=fullpolarity['compound']
107
+ polarity_thresh = 0.05
108
+ # otherwise
109
+ else:
110
+ polarity = blob.sentiment.polarity
111
+ polarity_thresh = 0.1
112
+
113
+ # Determine sentiment label through polarity threshold
114
+ if polarity > polarity_thresh:
115
  label = 'positive'
116
+ elif polarity < -polarity_thresh:
117
  label = 'negative'
118
  else:
119
  label = 'neutral'
120
 
121
+
122
  # Calculate confidence (distance from neutral)
123
  confidence = abs(polarity)
124
+ res = {
 
125
  'polarity': polarity,
126
  'subjectivity': subjectivity,
127
  'label': label,
128
  'confidence': confidence
129
  }
130
+ return res
131
+ def process_news_articles(self, articles: List[Dict], model: str) -> pd.DataFrame:
132
  """
133
  Process news articles and add sentiment analysis
134
 
 
146
  continue
147
 
148
  # Analyze sentiment of title and description
149
+ title_sentiment = self.analyze_sentiment(article['title'], model=model)
150
+ description_sentiment = self.analyze_sentiment(article['description'], model=model)
151
 
152
  # Combine title and description sentiment (weighted toward title)
153
  combined_polarity = (title_sentiment['polarity'] * 0.7 +
154
  description_sentiment['polarity'] * 0.3)
155
  combined_subjectivity = (title_sentiment['subjectivity'] * 0.7 +
156
  description_sentiment['subjectivity'] * 0.3)
 
157
  # Determine overall sentiment
158
  if combined_polarity > 0.1:
159
  overall_sentiment = 'positive'
 
193
  def get_ai_news_with_sentiment(self,
194
  query: str = "artificial intelligence",
195
  days: int = 7,
196
+ sources: Optional[str] = None,
197
+ model: str = "Textblob") -> pd.DataFrame:
198
  """
199
  Complete pipeline: fetch news and analyze sentiment
200
 
 
218
  print(f"Found {len(articles)} articles. Analyzing sentiment...")
219
 
220
  # Process and analyze
221
+ df = self.process_news_articles(articles, model=model)
222
 
223
+ print(f"Processed {len(df)} articles with sentiment analysis. \nUsed {model} for polarity analysis and Textblob for sentiment analysis.")
224
  return df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  def load_config():
226
  """Load configuration from config.json"""
227
  with open('config.json', 'r') as f:
 
242
  for text in test_texts:
243
  sentiment = analyzer.analyze_sentiment(text)
244
  print(f"Text: {text}")
245
+ print(f"Sentiment: {sentiment['label']} (polarity: {sentiment['polarity']:.2f}\n")
 
246
 
247
  # Test news fetching
248
  print("Fetching recent AI news...")
src/streamlit_app.py CHANGED
@@ -21,7 +21,7 @@ st.set_page_config(
21
 
22
  # Custom CSS for better styling
23
  st.markdown("""
24
- <style>
25
  .main-header {
26
  font-size: 2.5rem;
27
  font-weight: bold;
@@ -48,11 +48,11 @@ def load_config():
48
  return json.load(f)
49
 
50
  @st.cache_data(ttl=1800) # Cache for 30 minutes
51
- def load_news_data(query, days, sources=None):
52
  """Load and cache news data"""
53
  try:
54
  analyzer = AINewsAnalyzer()
55
- df = analyzer.get_ai_news_with_sentiment(query=query, days=days, sources=sources)
56
  return df, None
57
  except Exception as e:
58
  return pd.DataFrame(), str(e)
@@ -107,7 +107,7 @@ def create_source_analysis(df):
107
 
108
  return fig
109
 
110
- def create_polarity_distribution(df):
111
  """Create sentiment polarity distribution"""
112
  if df.empty:
113
  return None
@@ -121,10 +121,9 @@ def create_polarity_distribution(df):
121
  )
122
 
123
  # Add vertical lines for sentiment boundaries
124
- fig.add_vline(x=0.1, line_dash="dash", line_color="green", annotation_text="Positive Threshold")
125
- fig.add_vline(x=-0.1, line_dash="dash", line_color="red", annotation_text="Negative Threshold")
126
- fig.add_vline(x=0, line_dash="dash", line_color="gray", annotation_text="Neutral")
127
-
128
  return fig
129
 
130
 
@@ -152,6 +151,12 @@ def main():
152
  "Or enter custom search:",
153
  placeholder="e.g., 'generative AI'"
154
  )
 
 
 
 
 
 
155
 
156
  # Use custom query if provided
157
  final_query = custom_query if custom_query else selected_query
@@ -188,7 +193,7 @@ def main():
188
  # Load data
189
  if st.sidebar.button("๐Ÿš€ Analyze News", type="primary"):
190
  with st.spinner(f"Fetching and analyzing news about '{final_query}'..."):
191
- df, error = load_news_data(final_query, days, sources)
192
 
193
  if error:
194
  st.error(f"Error loading data: {error}")
@@ -203,105 +208,105 @@ def main():
203
  st.session_state.query = final_query
204
  st.session_state.days = days
205
 
206
- # ===== Display results if data is available =====
207
- if 'df' in st.session_state and not st.session_state.df.empty:
208
- df = st.session_state.df
209
 
210
- # ===== Summary Metrics =====
211
- st.markdown("### ๐Ÿ“Š Analysis Summary")
212
- col1, col2, col3, col4 = st.columns(4)
213
 
214
- with col1:
215
- st.metric("๐Ÿ“ฐ Total Articles", len(df))
216
- with col2:
217
- avg_polarity = df['sentiment_polarity'].mean()
218
- delta_polarity = f"{avg_polarity:+.3f}"
219
- st.metric("๐ŸŽญ Avg Sentiment", f"{avg_polarity:.3f}", delta_polarity)
220
- with col3:
221
- positive_pct = (len(df[df['sentiment_label'] == 'positive']) / len(df) * 100)
222
- st.metric("๐Ÿ˜Š Positive %", f"{positive_pct:.1f}%")
223
- with col4:
224
- unique_sources = df['source'].nunique()
225
- st.metric("๐Ÿ“บ News Sources", unique_sources)
226
 
227
 
228
- # ===== Charts =====
229
- st.markdown("### ๐Ÿ“ˆ Visual Analysis")
230
- col1, col2 = st.columns(2)
231
 
232
- # Sentiment Distribution
233
- dist_fig = create_sentiment_distribution(df)
234
- if dist_fig:
235
- st.plotly_chart(dist_fig, use_container_width=True, key="dist_fig")
236
- # Export buttons
237
- buf = io.BytesIO()
238
- dist_fig.update_layout(template="plotly_white")
239
- dist_fig.update_layout(plot_bgcolor='white', paper_bgcolor='white') # ่ฎพ็ฝฎ็™ฝๅบ•
240
- dist_fig.write_image(buf, format="png", engine="kaleido")
241
- st.download_button("๐Ÿ“ท Download Distribution Chart as PNG", buf.getvalue(),
242
- "distribution_chart.png", mime="image/png")
243
- st.download_button("๐ŸŒ Download Distribution Chart as HTML",
244
- dist_fig.to_html().encode("utf-8"), "distribution_chart.html",
245
- mime="text/html")
246
 
247
- # Source Analysis
248
- source_fig = create_source_analysis(df)
249
- if source_fig:
250
- st.plotly_chart(source_fig, use_container_width=True, key="source_fig")
251
- buf = io.BytesIO()
252
- source_fig.update_layout(template="plotly_white")
253
- source_fig.update_layout(plot_bgcolor='white', paper_bgcolor='white') # ็™ฝๅบ•
254
- source_fig.write_image(buf, format="png", engine="kaleido")
255
- st.download_button("๐Ÿ“ท Download Source Chart as PNG", buf.getvalue(),
256
- "source_chart.png", mime="image/png")
257
- st.download_button("๐ŸŒ Download Source Chart as HTML",
258
- source_fig.to_html().encode("utf-8"), "source_chart.html",
259
- mime="text/html")
260
 
261
- # Polarity Distribution
262
- polarity_fig = create_polarity_distribution(df)
263
- if polarity_fig:
264
- st.plotly_chart(polarity_fig, use_container_width=True, key="polarity_fig")
265
- buf = io.BytesIO()
266
- polarity_fig.update_layout(template="plotly_white")
267
- polarity_fig.update_layout(plot_bgcolor='white', paper_bgcolor='white') # ็™ฝๅบ•
268
- polarity_fig.write_image(buf, format="png", engine="kaleido")
269
- st.download_button("๐Ÿ“ท Download Polarity Chart as PNG", buf.getvalue(),
270
- "polarity_chart.png", mime="image/png")
271
- st.download_button("๐ŸŒ Download Polarity Chart as HTML",
272
- polarity_fig.to_html().encode("utf-8"), "polarity_chart.html",
273
- mime="text/html")
274
 
275
-
276
- # ===== Export CSV button =====
277
- csv_data = df.to_csv(index=False).encode('utf-8')
278
- st.download_button(
279
- label="๐Ÿ’พ Export Analysis as CSV",
280
- data=csv_data,
281
- file_name=f"ai_news_analysis_{st.session_state.query.replace(' ', '_')}.csv",
282
- mime='text/csv'
283
- )
284
-
285
 
286
- else:
287
- # Welcome message
288
- st.info("๐Ÿ‘‹ Welcome! Configure your analysis settings in the sidebar and click 'Analyze News' to get started.")
 
 
 
 
 
289
 
290
- # Sample visualization or instructions
291
- st.markdown("""
292
- ### ๐Ÿš€ How to Use:
293
-
294
- 1. **Choose a topic** from the dropdown or enter your own search term
295
- 2. **Select time range** (1-30 days) to analyze recent news
296
- 3. **Pick news sources** or leave as 'All Sources' for comprehensive coverage
297
- 4. **Click 'Analyze News'** to fetch and analyze articles
298
-
299
- ### ๐Ÿ“Š What You'll Get:
300
-
301
- - **Sentiment Analysis** of headlines and descriptions
302
- - **Interactive Charts** showing trends over time
303
- - **Source Breakdown** to see which outlets cover your topic
304
- """)
 
 
 
 
 
305
 
306
 
307
 
 
21
 
22
  # Custom CSS for better styling
23
  st.markdown("""
24
+ <style>
25
  .main-header {
26
  font-size: 2.5rem;
27
  font-weight: bold;
 
48
  return json.load(f)
49
 
50
  @st.cache_data(ttl=1800) # Cache for 30 minutes
51
+ def load_news_data(query, days, sources=None, model="TextBlob"):
52
  """Load and cache news data"""
53
  try:
54
  analyzer = AINewsAnalyzer()
55
+ df = analyzer.get_ai_news_with_sentiment(query=query, days=days, sources=sources, model=model)
56
  return df, None
57
  except Exception as e:
58
  return pd.DataFrame(), str(e)
 
107
 
108
  return fig
109
 
110
+ def create_polarity_distribution(df, thresh: float):
111
  """Create sentiment polarity distribution"""
112
  if df.empty:
113
  return None
 
121
  )
122
 
123
  # Add vertical lines for sentiment boundaries
124
+ fig.add_vline(x=thresh, line_dash="dash", line_color="green", annotation_text="Positive Threshold", annotation_position="top right")
125
+ fig.add_vline(x=-thresh, line_dash="dash", line_color="red", annotation_text="Negative Threshold", annotation_position="top left")
126
+ fig.add_vline(x=0, line_dash="dash", line_color="gray", annotation_text="Neutral", annotation_position="top")
 
127
  return fig
128
 
129
 
 
151
  "Or enter custom search:",
152
  placeholder="e.g., 'generative AI'"
153
  )
154
+
155
+ model_query = st.sidebar.selectbox(
156
+ "๐Ÿ“ Search a Sentiment Model:",
157
+ options=config["model_options"],
158
+ index=0
159
+ )
160
 
161
  # Use custom query if provided
162
  final_query = custom_query if custom_query else selected_query
 
193
  # Load data
194
  if st.sidebar.button("๐Ÿš€ Analyze News", type="primary"):
195
  with st.spinner(f"Fetching and analyzing news about '{final_query}'..."):
196
+ df, error = load_news_data(final_query, days, sources=sources, model=model_query)
197
 
198
  if error:
199
  st.error(f"Error loading data: {error}")
 
208
  st.session_state.query = final_query
209
  st.session_state.days = days
210
 
211
+ # ===== Display results if data is available =====
212
+ if 'df' in st.session_state and not st.session_state.df.empty:
213
+ df = st.session_state.df
214
 
215
+ # ===== Summary Metrics =====
216
+ st.markdown("### ๐Ÿ“Š Analysis Summary")
217
+ col1, col2, col3, col4 = st.columns(4)
218
 
219
+ with col1:
220
+ st.metric("๐Ÿ“ฐ Total Articles", len(df))
221
+ with col2:
222
+ avg_polarity = df['sentiment_polarity'].mean()
223
+ delta_polarity = f"{avg_polarity:+.3f}"
224
+ st.metric("๐ŸŽญ Avg Sentiment", f"{avg_polarity:.3f}", delta_polarity)
225
+ with col3:
226
+ positive_pct = (len(df[df['sentiment_label'] == 'positive']) / len(df) * 100)
227
+ st.metric("๐Ÿ˜Š Positive %", f"{positive_pct:.1f}%")
228
+ with col4:
229
+ unique_sources = df['source'].nunique()
230
+ st.metric("๐Ÿ“บ News Sources", unique_sources)
231
 
232
 
233
+ # ===== Charts =====
234
+ st.markdown("### ๐Ÿ“ˆ Visual Analysis")
235
+ col1, col2 = st.columns(2)
236
 
237
+ # Sentiment Distribution
238
+ dist_fig = create_sentiment_distribution(df)
239
+ if dist_fig:
240
+ st.plotly_chart(dist_fig, use_container_width=True, key="dist_fig")
241
+ # Export buttons
242
+ buf = io.BytesIO()
243
+ dist_fig.update_layout(template="plotly_white")
244
+ dist_fig.update_layout(plot_bgcolor='white', paper_bgcolor='white') # ่ฎพ็ฝฎ็™ฝๅบ•
245
+ dist_fig.write_image(buf, format="png", engine="kaleido")
246
+ st.download_button("๐Ÿ“ท Download Distribution Chart as PNG", buf.getvalue(),
247
+ "distribution_chart.png", mime="image/png")
248
+ st.download_button("๐ŸŒ Download Distribution Chart as HTML",
249
+ dist_fig.to_html().encode("utf-8"), "distribution_chart.html",
250
+ mime="text/html")
251
 
252
+ # Source Analysis
253
+ source_fig = create_source_analysis(df)
254
+ if source_fig:
255
+ st.plotly_chart(source_fig, use_container_width=True, key="source_fig")
256
+ buf = io.BytesIO()
257
+ source_fig.update_layout(template="plotly_white")
258
+ source_fig.update_layout(plot_bgcolor='white', paper_bgcolor='white') # ็™ฝๅบ•
259
+ source_fig.write_image(buf, format="png", engine="kaleido")
260
+ st.download_button("๐Ÿ“ท Download Source Chart as PNG", buf.getvalue(),
261
+ "source_chart.png", mime="image/png")
262
+ st.download_button("๐ŸŒ Download Source Chart as HTML",
263
+ source_fig.to_html().encode("utf-8"), "source_chart.html",
264
+ mime="text/html")
265
 
266
+ # Polarity Distribution
267
+ polarity_fig = create_polarity_distribution(df)
268
+ if polarity_fig:
269
+ st.plotly_chart(polarity_fig, use_container_width=True, key="polarity_fig")
270
+ buf = io.BytesIO()
271
+ polarity_fig.update_layout(template="plotly_white")
272
+ polarity_fig.update_layout(plot_bgcolor='white', paper_bgcolor='white') # ็™ฝๅบ•
273
+ polarity_fig.write_image(buf, format="png", engine="kaleido")
274
+ st.download_button("๐Ÿ“ท Download Polarity Chart as PNG", buf.getvalue(),
275
+ "polarity_chart.png", mime="image/png")
276
+ st.download_button("๐ŸŒ Download Polarity Chart as HTML",
277
+ polarity_fig.to_html().encode("utf-8"), "polarity_chart.html",
278
+ mime="text/html")
279
 
 
 
 
 
 
 
 
 
 
 
280
 
281
+ # ===== Export CSV button =====
282
+ csv_data = df.to_csv(index=False).encode('utf-8')
283
+ st.download_button(
284
+ label="๐Ÿ’พ Export Analysis as CSV",
285
+ data=csv_data,
286
+ file_name=f"ai_news_analysis_{st.session_state.query.replace(' ', '_')}.csv",
287
+ mime='text/csv'
288
+ )
289
 
290
+ else:
291
+ # Welcome message
292
+ st.info("๐Ÿ‘‹ Welcome! Configure your analysis settings in the sidebar and click 'Analyze News' to get started.")
293
+
294
+ # Sample visualization or instructions
295
+ st.markdown("""
296
+ ### ๐Ÿš€ How to Use:
297
+
298
+ 1. **Choose a topic** from the dropdown or enter your own search term
299
+ 2. **Select time range** (1-30 days) to analyze recent news
300
+ 3. **Pick news sources** or leave as 'All Sources' for comprehensive coverage
301
+ 4. **Click 'Analyze News'** to fetch and analyze articles
302
+
303
+ ### ๐Ÿ“Š What You'll Get:
304
+
305
+ - **Sentiment Analysis** of headlines and descriptions
306
+ - **Interactive Charts** showing trends over time
307
+ - **Source Breakdown** to see which outlets cover your topic
308
+ """)
309
+ pass
310
 
311
 
312