Jonas Neves commited on
Commit
f5b7e31
Β·
1 Parent(s): d4f4ff7

Create initial project structure

Browse files
Files changed (9) hide show
  1. .env.example +1 -0
  2. .gitattributes +35 -0
  3. .gitignore +23 -0
  4. Dockerfile +20 -0
  5. README.md +183 -1
  6. requirements.txt +7 -0
  7. src/api_handler.py +269 -0
  8. src/cli_demo.py +181 -0
  9. src/streamlit_app.py +277 -0
.env.example ADDED
@@ -0,0 +1 @@
 
 
1
+ NEWSAPI_KEY=your_newsapi_key_here
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Environment variables
2
+ .env
3
+
4
+ # Python cache
5
+ __pycache__/
6
+ *.pyc
7
+ *.pyo
8
+ *.pyd
9
+
10
+ # Virtual environment
11
+ .venv/
12
+ venv/
13
+
14
+ # IDE
15
+ .vscode/
16
+ .idea/
17
+
18
+ # OS
19
+ .DS_Store
20
+ Thumbs.db
21
+
22
+ # Streamlit
23
+ .streamlit/
Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.13.5-slim
2
+
3
+ WORKDIR /app
4
+
5
+ RUN apt-get update && apt-get install -y \
6
+ build-essential \
7
+ curl \
8
+ git \
9
+ && rm -rf /var/lib/apt/lists/*
10
+
11
+ COPY requirements.txt ./
12
+ COPY src/ ./src/
13
+
14
+ RUN pip3 install -r requirements.txt
15
+
16
+ EXPOSE 8501
17
+
18
+ HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
19
+
20
+ ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
README.md CHANGED
@@ -1 +1,183 @@
1
- # BootcampFinalProject
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: AI News Sentiment Analyzer
3
+ emoji: πŸ€–
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: streamlit
7
+ sdk_version: "1.28.0"
8
+ app_file: src/streamlit_app.py
9
+ pinned: false
10
+ ---
11
+
12
+ # πŸ€– AI News Sentiment Analyzer
13
+
14
+ An interactive web application that fetches the latest AI-related news and analyzes the sentiment of headlines and articles. Built with Python, Streamlit, and powered by NewsAPI.
15
+
16
+ ## πŸ› οΈ Installation
17
+
18
+ ### Prerequisites
19
+ - Python 3.9+
20
+ - NewsAPI key (get free at [newsapi.org](https://newsapi.org))
21
+
22
+ ### Setup Instructions
23
+
24
+ 1. **Clone the repository**
25
+ ```bash
26
+ git clone https://github.com/alexoh2bd/BootcampFinalProject
27
+ cd BootcampFinalProject
28
+ ```
29
+
30
+ 2. **Create virtual environment**
31
+ ```bash
32
+ # macOS/Linux
33
+ python3 -m venv .venv
34
+ source .venv/bin/activate
35
+ ```
36
+
37
+ 3. **Install dependencies**
38
+ ```bash
39
+ pip install -r requirements.txt
40
+ ```
41
+
42
+ 4. **Set up environment variables**
43
+
44
+ Create a `.env` file in the project root:
45
+ ```bash
46
+ NEWSAPI_KEY=your_newsapi_key_here
47
+ ```
48
+
49
+ ## 🎯 Usage
50
+
51
+ ### Web Application
52
+
53
+ Run the Streamlit app:
54
+ ```bash
55
+ streamlit run streamlit_app.py
56
+ ```
57
+
58
+ Then open your browser to `http://localhost:8501`
59
+
60
+ ### Command Line Interface
61
+
62
+ For quick sentiment analysis:
63
+
64
+ ```bash
65
+ # Basic usage
66
+ python cli_demo.py
67
+
68
+ # Custom search query
69
+ python cli_demo.py --query "ChatGPT" --days 3
70
+
71
+ # Filter to specific sources
72
+ python cli_demo.py --sources "techcrunch,wired" --max-articles 5
73
+
74
+ # Show only positive articles
75
+ python cli_demo.py --positive-only
76
+
77
+ # Show detailed sentiment analysis
78
+ python cli_demo.py --sentiment-only
79
+ ```
80
+
81
+ #### CLI Options
82
+ - `--query, -q`: Search query (default: "artificial intelligence")
83
+ - `--days, -d`: Days to look back (default: 7)
84
+ - `--sources, -s`: Comma-separated news sources
85
+ - `--max-articles, -m`: Maximum articles to display (default: 10)
86
+ - `--positive-only`: Show only positive sentiment articles
87
+ - `--negative-only`: Show only negative sentiment articles
88
+ - `--sentiment-only`: Show only sentiment analysis summary
89
+
90
+ ## πŸ”§ Technical Architecture
91
+
92
+ ```mermaid
93
+ flowchart TB
94
+ subgraph Frontend["🎨 Frontend Layer"]
95
+ A["🌐 Streamlit UI"]
96
+ B["πŸ’» CLI Interface"]
97
+ end
98
+
99
+ subgraph Application["βš™οΈ Application Layer"]
100
+ C["api_handler.py<br/>πŸ”§ Core Logic"]
101
+ D["streamlit_app.py<br/>πŸ“Š Web Framework"]
102
+ E["cli_demo.py<br/>⌨️ Command Line"]
103
+ end
104
+
105
+ subgraph Processing["🧠 Data Processing"]
106
+ F["TextBlob<br/>Sentiment Engine"]
107
+ G["Plotly<br/>Visualizations"]
108
+ H["Pandas<br/>Data Processing"]
109
+ end
110
+
111
+ subgraph External["🌐 External Services"]
112
+ I["πŸ“‘ NewsAPI<br/>TechCrunch, Wired, etc."]
113
+ J["πŸ” Environment<br/>API Keys"]
114
+ end
115
+
116
+ A --> D
117
+ B --> E
118
+ D --> C
119
+ E --> C
120
+ C --> F
121
+ C --> H
122
+ D --> G
123
+ C --> I
124
+ C --> J
125
+
126
+ classDef frontend fill:#e3f2fd,stroke:#1976d2,stroke-width:2px
127
+ classDef application fill:#fff3e0,stroke:#f57c00,stroke-width:2px
128
+ classDef processing fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
129
+ classDef external fill:#fce4ec,stroke:#c2185b,stroke-width:2px
130
+
131
+ class A,B frontend
132
+ class C,D,E application
133
+ class F,G,H processing
134
+ class I,J external
135
+ ```
136
+
137
+ ## πŸ“ˆ Example Output
138
+
139
+ ### CLI Example
140
+ ```bash
141
+ πŸ€– AI News Sentiment Analyzer
142
+ ==================================================
143
+
144
+ πŸ” Searching for: "artificial intelligence"
145
+ πŸ“… Looking back: 7 days
146
+
147
+ πŸ“° Found 43 articles
148
+
149
+ Sentiment Distribution:
150
+ 😊 Positive: 18 articles (41.9%)
151
+ 😐 Neutral: 15 articles (34.9%)
152
+ 😞 Negative: 10 articles (23.2%)
153
+
154
+ πŸ“„ Top 10 Articles:
155
+ --------------------------------------------------------------------------------
156
+ 1. 😊 [TechCrunch] 2024-01-20 14:30
157
+ AI startup raises $50M for breakthrough in healthcare diagnosis
158
+ Sentiment: Positive (Score: 0.45)
159
+ πŸ“ Revolutionary AI technology promises to transform medical diagnosis...
160
+ πŸ”— https://techcrunch.com/...
161
+
162
+ 2. 😞 [Reuters] 2024-01-20 12:15
163
+ Concerns grow over AI job displacement in manufacturing
164
+ Sentiment: Negative (Score: -0.32)
165
+ πŸ“ Labor unions express worry about automation replacing workers...
166
+ πŸ”— https://reuters.com/...
167
+ ```
168
+
169
+ ## 🀝 Contributing
170
+
171
+ This project was built as part of the Duke AIPI 503 Bootcamp.
172
+
173
+ ### Development Setup
174
+
175
+ 1. Fork the repository
176
+ 2. Create a feature branch: `git checkout -b feature/some-feature`
177
+ 3. Make your changes and commit: `git commit -m 'Add some feature'`
178
+ 4. Push to the branch: `git push origin feature/some-feature`
179
+ 5. Open a Pull Request
180
+
181
+ ## πŸ“ License
182
+
183
+ This project is licensed under the MIT License - see the LICENSE file for details.
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit>=1.28.0
2
+ pandas>=2.0.0
3
+ requests>=2.31.0
4
+ python-dotenv>=1.0.0
5
+ textblob>=0.17.1
6
+ plotly>=5.15.0
7
+ numpy>=1.24.0
src/api_handler.py ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ AI News API Handler
3
+ Fetches AI-related news from NewsAPI and performs sentiment analysis
4
+ """
5
+ import requests
6
+ import pandas as pd
7
+ from datetime import datetime, timedelta
8
+ import os
9
+ from dotenv import load_dotenv
10
+ from textblob import TextBlob
11
+ from typing import List, Dict, Optional
12
+
13
+ # Load environment variables
14
+ load_dotenv()
15
+
16
+ class AINewsAnalyzer:
17
+ def __init__(self):
18
+ self.api_key = os.getenv('NEWSAPI_KEY')
19
+ self.base_url = "https://newsapi.org/v2/everything"
20
+
21
+ if not self.api_key:
22
+ raise ValueError("NewsAPI key not found. Please set NEWSAPI_KEY in your .env file")
23
+
24
+ def fetch_ai_news(self,
25
+ query: str = "artificial intelligence",
26
+ days: int = 7,
27
+ language: str = "en",
28
+ sources: Optional[str] = None,
29
+ page_size: int = 100) -> List[Dict]:
30
+ """
31
+ Fetch AI-related news from NewsAPI
32
+
33
+ Args:
34
+ query: Search query for news articles
35
+ days: Number of days to look back
36
+ language: Language code (default: "en")
37
+ sources: Comma-separated string of news sources
38
+ page_size: Number of articles to fetch (max 100)
39
+
40
+ Returns:
41
+ List of news articles with metadata
42
+ """
43
+ # Calculate date range
44
+ to_date = datetime.now()
45
+ from_date = to_date - timedelta(days=days)
46
+
47
+ # Prepare API parameters
48
+ params = {
49
+ 'q': query,
50
+ 'from': from_date.strftime('%Y-%m-%d'),
51
+ 'to': to_date.strftime('%Y-%m-%d'),
52
+ 'language': language,
53
+ 'sortBy': 'publishedAt',
54
+ 'pageSize': page_size,
55
+ 'apiKey': self.api_key
56
+ }
57
+
58
+ # Add sources if specified
59
+ if sources:
60
+ params['sources'] = sources
61
+
62
+ try:
63
+ # Make API request
64
+ response = requests.get(self.base_url, params=params)
65
+ response.raise_for_status()
66
+
67
+ data = response.json()
68
+
69
+ if data['status'] == 'ok':
70
+ return data['articles']
71
+ else:
72
+ print(f"API Error: {data.get('message', 'Unknown error')}")
73
+ return []
74
+
75
+ except requests.exceptions.RequestException as e:
76
+ print(f"Request failed: {e}")
77
+ return []
78
+
79
+ def analyze_sentiment(self, text: str) -> Dict:
80
+ """
81
+ Analyze sentiment of given text using TextBlob
82
+
83
+ Args:
84
+ text: Text to analyze
85
+
86
+ Returns:
87
+ Dictionary with sentiment metrics
88
+ """
89
+ if not text:
90
+ return {
91
+ 'polarity': 0.0,
92
+ 'subjectivity': 0.0,
93
+ 'label': 'neutral',
94
+ 'confidence': 0.0
95
+ }
96
+
97
+ blob = TextBlob(text)
98
+ polarity = blob.sentiment.polarity
99
+ subjectivity = blob.sentiment.subjectivity
100
+
101
+ # Determine sentiment label
102
+ if polarity > 0.1:
103
+ label = 'positive'
104
+ elif polarity < -0.1:
105
+ label = 'negative'
106
+ else:
107
+ label = 'neutral'
108
+
109
+ # Calculate confidence (distance from neutral)
110
+ confidence = abs(polarity)
111
+
112
+ return {
113
+ 'polarity': polarity,
114
+ 'subjectivity': subjectivity,
115
+ 'label': label,
116
+ 'confidence': confidence
117
+ }
118
+
119
+ def process_news_articles(self, articles: List[Dict]) -> pd.DataFrame:
120
+ """
121
+ Process news articles and add sentiment analysis
122
+
123
+ Args:
124
+ articles: List of news articles from API
125
+
126
+ Returns:
127
+ DataFrame with processed articles and sentiment data
128
+ """
129
+ processed_articles = []
130
+
131
+ for article in articles:
132
+ # Skip articles with missing essential data
133
+ if not article.get('title') or not article.get('publishedAt'):
134
+ continue
135
+
136
+ # Analyze sentiment of title and description
137
+ title_sentiment = self.analyze_sentiment(article['title'])
138
+ description_sentiment = self.analyze_sentiment(article.get('description', ''))
139
+
140
+ # Combine title and description sentiment (weighted toward title)
141
+ combined_polarity = (title_sentiment['polarity'] * 0.7 +
142
+ description_sentiment['polarity'] * 0.3)
143
+ combined_subjectivity = (title_sentiment['subjectivity'] * 0.7 +
144
+ description_sentiment['subjectivity'] * 0.3)
145
+
146
+ # Determine overall sentiment
147
+ if combined_polarity > 0.1:
148
+ overall_sentiment = 'positive'
149
+ elif combined_polarity < -0.1:
150
+ overall_sentiment = 'negative'
151
+ else:
152
+ overall_sentiment = 'neutral'
153
+
154
+ processed_article = {
155
+ 'title': article['title'],
156
+ 'description': article.get('description', ''),
157
+ 'url': article['url'],
158
+ 'source': article['source']['name'],
159
+ 'published_at': article['publishedAt'],
160
+ 'author': article.get('author', 'Unknown'),
161
+ 'sentiment_label': overall_sentiment,
162
+ 'sentiment_polarity': combined_polarity,
163
+ 'sentiment_subjectivity': combined_subjectivity,
164
+ 'title_sentiment': title_sentiment['label'],
165
+ 'title_polarity': title_sentiment['polarity'],
166
+ 'description_sentiment': description_sentiment['label'],
167
+ 'description_polarity': description_sentiment['polarity']
168
+ }
169
+
170
+ processed_articles.append(processed_article)
171
+
172
+ # Convert to DataFrame
173
+ df = pd.DataFrame(processed_articles)
174
+
175
+ # Convert published_at to datetime
176
+ if not df.empty:
177
+ df['published_at'] = pd.to_datetime(df['published_at'])
178
+ df = df.sort_values('published_at', ascending=False)
179
+
180
+ return df
181
+
182
+ def get_ai_news_with_sentiment(self,
183
+ query: str = "artificial intelligence",
184
+ days: int = 7,
185
+ sources: Optional[str] = None) -> pd.DataFrame:
186
+ """
187
+ Complete pipeline: fetch news and analyze sentiment
188
+
189
+ Args:
190
+ query: Search query for news articles
191
+ days: Number of days to look back
192
+ sources: Comma-separated string of news sources
193
+
194
+ Returns:
195
+ DataFrame with news articles and sentiment analysis
196
+ """
197
+ print(f"Fetching {query} news from the last {days} days...")
198
+
199
+ # Fetch articles
200
+ articles = self.fetch_ai_news(query=query, days=days, sources=sources)
201
+
202
+ if not articles:
203
+ print("No articles found.")
204
+ return pd.DataFrame()
205
+
206
+ print(f"Found {len(articles)} articles. Analyzing sentiment...")
207
+
208
+ # Process and analyze
209
+ df = self.process_news_articles(articles)
210
+
211
+ print(f"Processed {len(df)} articles with sentiment analysis.")
212
+ return df
213
+
214
+ def fetch_ai_news(query="artificial intelligence", days=7, sources=None):
215
+ """Standalone function to fetch AI news"""
216
+ analyzer = AINewsAnalyzer()
217
+ return analyzer.fetch_ai_news(query, days, sources=sources)
218
+
219
+ def analyze_sentiment(text):
220
+ """Standalone function to analyze sentiment"""
221
+ analyzer = AINewsAnalyzer()
222
+ return analyzer.analyze_sentiment(text)
223
+
224
+ def get_ai_news_with_sentiment(query="artificial intelligence", days=7, sources=None):
225
+ """Standalone function for complete pipeline"""
226
+ analyzer = AINewsAnalyzer()
227
+ return analyzer.get_ai_news_with_sentiment(query, days, sources)
228
+
229
+ if __name__ == "__main__":
230
+ # Test the API when run directly
231
+ analyzer = AINewsAnalyzer()
232
+
233
+ print("Testing AI News Sentiment Analyzer...")
234
+ print("=" * 50)
235
+
236
+ # Test sentiment analysis
237
+ test_texts = [
238
+ "AI breakthrough promises to revolutionize healthcare",
239
+ "Concerns grow over AI job displacement",
240
+ "New machine learning model shows mixed results"
241
+ ]
242
+
243
+ print("\nSentiment Analysis Examples:")
244
+ for text in test_texts:
245
+ sentiment = analyzer.analyze_sentiment(text)
246
+ print(f"Text: {text}")
247
+ print(f"Sentiment: {sentiment['label']} (polarity: {sentiment['polarity']:.2f})")
248
+ print()
249
+
250
+ # Test news fetching
251
+ print("Fetching recent AI news...")
252
+ df = analyzer.get_ai_news_with_sentiment(days=3)
253
+
254
+ if not df.empty:
255
+ print(f"\nFound {len(df)} articles")
256
+ print("\nSentiment Distribution:")
257
+ print(df['sentiment_label'].value_counts())
258
+
259
+ print("\nTop 3 Most Positive Headlines:")
260
+ positive_articles = df[df['sentiment_label'] == 'positive'].nlargest(3, 'sentiment_polarity')
261
+ for _, article in positive_articles.iterrows():
262
+ print(f"πŸ“ˆ {article['title']} (Score: {article['sentiment_polarity']:.2f})")
263
+
264
+ print("\nTop 3 Most Negative Headlines:")
265
+ negative_articles = df[df['sentiment_label'] == 'negative'].nsmallest(3, 'sentiment_polarity')
266
+ for _, article in negative_articles.iterrows():
267
+ print(f"πŸ“‰ {article['title']} (Score: {article['sentiment_polarity']:.2f})")
268
+ else:
269
+ print("No articles found. Check your API key and internet connection.")
src/cli_demo.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ CLI Demo for AI News Sentiment Analyzer
4
+ Demonstrates the functionality via command line interface
5
+ """
6
+
7
+ import argparse
8
+ import sys
9
+ from datetime import datetime
10
+ from api_handler import AINewsAnalyzer
11
+
12
+ def print_header():
13
+ """Print a nice header for the CLI"""
14
+ print("πŸ€– AI News Sentiment Analyzer")
15
+ print("=" * 50)
16
+ print()
17
+
18
+ def print_sentiment_emoji(sentiment):
19
+ """Return emoji based on sentiment"""
20
+ emoji_map = {
21
+ 'positive': '😊',
22
+ 'negative': '😞',
23
+ 'neutral': '😐'
24
+ }
25
+ return emoji_map.get(sentiment, '🀷')
26
+
27
+ def display_articles(df, max_articles=10):
28
+ """Display articles in a formatted way"""
29
+ if df.empty:
30
+ print("❌ No articles found.")
31
+ return
32
+
33
+ print(f"πŸ“° Found {len(df)} articles")
34
+ print("\nSentiment Distribution:")
35
+ sentiment_counts = df['sentiment_label'].value_counts()
36
+ for sentiment, count in sentiment_counts.items():
37
+ emoji = print_sentiment_emoji(sentiment)
38
+ percentage = (count / len(df)) * 100
39
+ print(f" {emoji} {sentiment.title()}: {count} articles ({percentage:.1f}%)")
40
+
41
+ print(f"\nπŸ“„ Top {min(max_articles, len(df))} Articles:")
42
+ print("-" * 80)
43
+
44
+ for idx, (_, article) in enumerate(df.head(max_articles).iterrows(), 1):
45
+ sentiment_emoji = print_sentiment_emoji(article['sentiment_label'])
46
+ score = article['sentiment_polarity']
47
+ published = article['published_at'].strftime('%Y-%m-%d %H:%M')
48
+
49
+ print(f"{idx:2}. {sentiment_emoji} [{article['source']}] {published}")
50
+ print(f" {article['title']}")
51
+ print(f" Sentiment: {article['sentiment_label'].title()} (Score: {score:.2f})")
52
+ if article['description'] and len(article['description']) > 100:
53
+ description = article['description'][:100] + "..."
54
+ else:
55
+ description = article['description'] or "No description available"
56
+ print(f" πŸ“ {description}")
57
+ print(f" πŸ”— {article['url']}")
58
+ print()
59
+
60
+ def display_sentiment_analysis(df):
61
+ """Display detailed sentiment analysis"""
62
+ if df.empty:
63
+ return
64
+
65
+ print("\nπŸ“Š Sentiment Analysis Summary:")
66
+ print("-" * 40)
67
+
68
+ # Overall statistics
69
+ avg_polarity = df['sentiment_polarity'].mean()
70
+ avg_subjectivity = df['sentiment_subjectivity'].mean()
71
+
72
+ print(f"Average Polarity: {avg_polarity:.3f} (Range: -1.0 to +1.0)")
73
+ print(f"Average Subjectivity: {avg_subjectivity:.3f} (Range: 0.0 to 1.0)")
74
+
75
+ if avg_polarity > 0.1:
76
+ overall_mood = "πŸ“ˆ Generally Positive"
77
+ elif avg_polarity < -0.1:
78
+ overall_mood = "πŸ“‰ Generally Negative"
79
+ else:
80
+ overall_mood = "➑️ Generally Neutral"
81
+
82
+ print(f"Overall Mood: {overall_mood}")
83
+
84
+ # Most positive and negative articles
85
+ if len(df[df['sentiment_label'] == 'positive']) > 0:
86
+ most_positive = df.loc[df['sentiment_polarity'].idxmax()]
87
+ print(f"\n😊 Most Positive: \"{most_positive['title']}\" ({most_positive['sentiment_polarity']:.2f})")
88
+
89
+ if len(df[df['sentiment_label'] == 'negative']) > 0:
90
+ most_negative = df.loc[df['sentiment_polarity'].idxmin()]
91
+ print(f"😞 Most Negative: \"{most_negative['title']}\" ({most_negative['sentiment_polarity']:.2f})")
92
+
93
+ def display_sources(df):
94
+ """Display source breakdown"""
95
+ if df.empty:
96
+ return
97
+
98
+ print("\nπŸ“Ί News Sources:")
99
+ print("-" * 30)
100
+ source_counts = df['source'].value_counts()
101
+ for source, count in source_counts.head(10).items():
102
+ print(f" πŸ“° {source}: {count} articles")
103
+
104
+ def main():
105
+ parser = argparse.ArgumentParser(description='AI News Sentiment Analyzer CLI Demo')
106
+ parser.add_argument('--query', '-q',
107
+ default='artificial intelligence',
108
+ help='Search query for news articles (default: "artificial intelligence")')
109
+ parser.add_argument('--days', '-d',
110
+ type=int,
111
+ default=7,
112
+ help='Number of days to look back (default: 7)')
113
+ parser.add_argument('--sources', '-s',
114
+ help='Comma-separated list of news sources (e.g., "techcrunch,wired")')
115
+ parser.add_argument('--max-articles', '-m',
116
+ type=int,
117
+ default=10,
118
+ help='Maximum number of articles to display (default: 10)')
119
+ parser.add_argument('--sentiment-only',
120
+ action='store_true',
121
+ help='Show only sentiment analysis summary')
122
+ parser.add_argument('--positive-only',
123
+ action='store_true',
124
+ help='Show only positive articles')
125
+ parser.add_argument('--negative-only',
126
+ action='store_true',
127
+ help='Show only negative articles')
128
+
129
+ args = parser.parse_args()
130
+
131
+ print_header()
132
+
133
+ try:
134
+ # Initialize analyzer
135
+ analyzer = AINewsAnalyzer()
136
+
137
+ print(f"πŸ” Searching for: \"{args.query}\"")
138
+ print(f"πŸ“… Looking back: {args.days} days")
139
+ if args.sources:
140
+ print(f"πŸ“° Sources: {args.sources}")
141
+ print()
142
+
143
+ # Fetch and analyze news
144
+ df = analyzer.get_ai_news_with_sentiment(
145
+ query=args.query,
146
+ days=args.days,
147
+ sources=args.sources
148
+ )
149
+
150
+ if df.empty:
151
+ print("❌ No articles found. Try adjusting your search parameters.")
152
+ return
153
+
154
+ # Filter by sentiment if requested
155
+ if args.positive_only:
156
+ df = df[df['sentiment_label'] == 'positive']
157
+ print("πŸ”½ Filtered to show only POSITIVE articles")
158
+ elif args.negative_only:
159
+ df = df[df['sentiment_label'] == 'negative']
160
+ print("πŸ”½ Filtered to show only NEGATIVE articles")
161
+
162
+ # Display results based on options
163
+ if args.sentiment_only:
164
+ display_sentiment_analysis(df)
165
+ else:
166
+ display_articles(df, args.max_articles)
167
+ display_sentiment_analysis(df)
168
+ display_sources(df)
169
+
170
+ print(f"\nβœ… Analysis complete! Processed {len(df)} articles.")
171
+
172
+ except KeyboardInterrupt:
173
+ print("\nπŸ‘‹ Analysis interrupted by user.")
174
+ sys.exit(0)
175
+ except Exception as e:
176
+ print(f"❌ Error occurred: {e}")
177
+ print("Please check your API key and internet connection.")
178
+ sys.exit(1)
179
+
180
+ if __name__ == "__main__":
181
+ main()
src/streamlit_app.py ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ AI News Sentiment Analyzer - Streamlit Web Application
3
+ Interactive dashboard for analyzing sentiment of AI-related news
4
+ """
5
+
6
+ import streamlit as st
7
+ import pandas as pd
8
+ import plotly.express as px
9
+ from api_handler import AINewsAnalyzer
10
+
11
+ # Page configuration
12
+ st.set_page_config(
13
+ page_title="AI News Sentiment Analyzer",
14
+ page_icon="πŸ€–",
15
+ layout="wide",
16
+ initial_sidebar_state="expanded"
17
+ )
18
+
19
+ # Custom CSS for better styling
20
+ st.markdown("""
21
+ <style>
22
+ .main-header {
23
+ font-size: 2.5rem;
24
+ font-weight: bold;
25
+ color: #1f77b4;
26
+ text-align: center;
27
+ margin-bottom: 2rem;
28
+ }
29
+ .metric-card {
30
+ background-color: #f0f2f6;
31
+ padding: 1rem;
32
+ border-radius: 0.5rem;
33
+ border-left: 5px solid #1f77b4;
34
+ }
35
+ .positive { color: #28a745; }
36
+ .negative { color: #dc3545; }
37
+ .neutral { color: #6c757d; }
38
+ </style>
39
+ """, unsafe_allow_html=True)
40
+
41
+ @st.cache_data(ttl=1800) # Cache for 30 minutes
42
+ def load_news_data(query, days, sources=None):
43
+ """Load and cache news data"""
44
+ try:
45
+ analyzer = AINewsAnalyzer()
46
+ df = analyzer.get_ai_news_with_sentiment(query=query, days=days, sources=sources)
47
+ return df, None
48
+ except Exception as e:
49
+ return pd.DataFrame(), str(e)
50
+
51
+
52
+ def create_sentiment_distribution(df):
53
+ """Create sentiment distribution pie chart"""
54
+ if df.empty:
55
+ return None
56
+
57
+ sentiment_counts = df['sentiment_label'].value_counts()
58
+
59
+ fig = px.pie(
60
+ values=sentiment_counts.values,
61
+ names=sentiment_counts.index,
62
+ title="🎯 Sentiment Distribution",
63
+ color_discrete_map={
64
+ 'positive': '#28a745',
65
+ 'negative': '#dc3545',
66
+ 'neutral': '#6c757d'
67
+ }
68
+ )
69
+
70
+ fig.update_traces(textposition='inside', textinfo='percent+label')
71
+ return fig
72
+
73
+ def create_source_analysis(df):
74
+ """Create source analysis chart"""
75
+ if df.empty:
76
+ return None
77
+
78
+ source_sentiment = df.groupby(['source', 'sentiment_label']).size().unstack(fill_value=0)
79
+ source_sentiment = source_sentiment.loc[source_sentiment.sum(axis=1).nlargest(10).index]
80
+
81
+ fig = px.bar(
82
+ source_sentiment.reset_index(),
83
+ x='source',
84
+ y=['positive', 'negative', 'neutral'],
85
+ title="πŸ“° Sentiment by News Source (Top 10)",
86
+ color_discrete_map={
87
+ 'positive': '#28a745',
88
+ 'negative': '#dc3545',
89
+ 'neutral': '#6c757d'
90
+ }
91
+ )
92
+
93
+ fig.update_layout(
94
+ xaxis_title="News Source",
95
+ yaxis_title="Number of Articles",
96
+ xaxis_tickangle=-45
97
+ )
98
+
99
+ return fig
100
+
101
+ def create_polarity_distribution(df):
102
+ """Create sentiment polarity distribution"""
103
+ if df.empty:
104
+ return None
105
+
106
+ fig = px.histogram(
107
+ df,
108
+ x='sentiment_polarity',
109
+ nbins=30,
110
+ title="πŸ“Š Sentiment Polarity Distribution",
111
+ labels={'sentiment_polarity': 'Sentiment Polarity', 'count': 'Number of Articles'}
112
+ )
113
+
114
+ # Add vertical lines for sentiment boundaries
115
+ fig.add_vline(x=0.1, line_dash="dash", line_color="green", annotation_text="Positive Threshold")
116
+ fig.add_vline(x=-0.1, line_dash="dash", line_color="red", annotation_text="Negative Threshold")
117
+ fig.add_vline(x=0, line_dash="dash", line_color="gray", annotation_text="Neutral")
118
+
119
+ return fig
120
+
121
+
122
+ def main():
123
+ # Header
124
+ st.markdown("<h1 class='main-header'>πŸ€– AI News Sentiment Analyzer</h1>", unsafe_allow_html=True)
125
+ st.markdown("### Discover the sentiment trends in AI-related news from around the world")
126
+
127
+ # Sidebar controls
128
+ st.sidebar.header("πŸ”§ Analysis Settings")
129
+
130
+ # Query input
131
+ query_options = [
132
+ "artificial intelligence",
133
+ "machine learning",
134
+ "ChatGPT",
135
+ "OpenAI",
136
+ "deep learning",
137
+ "neural networks",
138
+ "AI ethics",
139
+ "robotics",
140
+ "computer vision",
141
+ "natural language processing"
142
+ ]
143
+
144
+ selected_query = st.sidebar.selectbox(
145
+ "πŸ“ Search Topic:",
146
+ options=query_options,
147
+ index=0
148
+ )
149
+
150
+ custom_query = st.sidebar.text_input(
151
+ "Or enter custom search:",
152
+ placeholder="e.g., 'generative AI'"
153
+ )
154
+
155
+ # Use custom query if provided
156
+ final_query = custom_query if custom_query else selected_query
157
+
158
+ # Time range
159
+ days = st.sidebar.slider(
160
+ "πŸ“… Days to analyze:",
161
+ min_value=1,
162
+ max_value=30,
163
+ value=7,
164
+ help="How many days back to search for news"
165
+ )
166
+
167
+ # News sources (confirmed available in NewsAPI)
168
+ popular_sources = [
169
+ "techcrunch,wired,ars-technica,the-verge,engadget",
170
+ "reuters,associated-press,bbc-news",
171
+ "cnn,fox-news,abc-news",
172
+ "financial-times,wall-street-journal,bloomberg"
173
+ ]
174
+
175
+ source_option = st.sidebar.selectbox(
176
+ "πŸ“° Source Category:",
177
+ options=["All Sources", "Tech Media", "General News", "US News", "Financial News"],
178
+ index=0
179
+ )
180
+
181
+ if source_option == "Tech Media":
182
+ sources = popular_sources[0]
183
+ elif source_option == "General News":
184
+ sources = popular_sources[1]
185
+ elif source_option == "US News":
186
+ sources = popular_sources[2]
187
+ elif source_option == "Financial News":
188
+ sources = popular_sources[3]
189
+ else:
190
+ sources = None
191
+
192
+ # Load data
193
+ if st.sidebar.button("πŸš€ Analyze News", type="primary"):
194
+ with st.spinner(f"Fetching and analyzing news about '{final_query}'..."):
195
+ df, error = load_news_data(final_query, days, sources)
196
+
197
+ if error:
198
+ st.error(f"Error loading data: {error}")
199
+ st.stop()
200
+
201
+ if df.empty:
202
+ st.warning("No articles found. Try adjusting your search parameters.")
203
+ st.stop()
204
+
205
+ # Store results in session state
206
+ st.session_state.df = df
207
+ st.session_state.query = final_query
208
+ st.session_state.days = days
209
+
210
+ # Display results if data is available
211
+ if 'df' in st.session_state:
212
+ df = st.session_state.df
213
+
214
+ # Summary metrics
215
+ st.markdown("### πŸ“Š Analysis Summary")
216
+ col1, col2, col3, col4 = st.columns(4)
217
+
218
+ with col1:
219
+ st.metric("πŸ“° Total Articles", len(df))
220
+
221
+ with col2:
222
+ avg_polarity = df['sentiment_polarity'].mean()
223
+ delta_polarity = f"{avg_polarity:+.3f}"
224
+ st.metric("🎭 Avg Sentiment", f"{avg_polarity:.3f}", delta_polarity)
225
+
226
+ with col3:
227
+ positive_pct = (len(df[df['sentiment_label'] == 'positive']) / len(df) * 100)
228
+ st.metric("😊 Positive %", f"{positive_pct:.1f}%")
229
+
230
+ with col4:
231
+ unique_sources = df['source'].nunique()
232
+ st.metric("πŸ“Ί News Sources", unique_sources)
233
+
234
+ # Charts
235
+ st.markdown("### πŸ“ˆ Visual Analysis")
236
+
237
+ # Row 1: Distribution and source analysis
238
+ col1, col2 = st.columns(2)
239
+
240
+ with col1:
241
+ dist_fig = create_sentiment_distribution(df)
242
+ if dist_fig:
243
+ st.plotly_chart(dist_fig, use_container_width=True)
244
+
245
+ with col2:
246
+ source_fig = create_source_analysis(df)
247
+ if source_fig:
248
+ st.plotly_chart(source_fig, use_container_width=True)
249
+
250
+ # Row 2: Polarity distribution (full width)
251
+ polarity_fig = create_polarity_distribution(df)
252
+ if polarity_fig:
253
+ st.plotly_chart(polarity_fig, use_container_width=True)
254
+
255
+
256
+ else:
257
+ # Welcome message
258
+ st.info("πŸ‘‹ Welcome! Configure your analysis settings in the sidebar and click 'Analyze News' to get started.")
259
+
260
+ # Sample visualization or instructions
261
+ st.markdown("""
262
+ ### πŸš€ How to Use:
263
+
264
+ 1. **Choose a topic** from the dropdown or enter your own search term
265
+ 2. **Select time range** (1-30 days) to analyze recent news
266
+ 3. **Pick news sources** or leave as 'All Sources' for comprehensive coverage
267
+ 4. **Click 'Analyze News'** to fetch and analyze articles
268
+
269
+ ### πŸ“Š What You'll Get:
270
+
271
+ - **Sentiment Analysis** of headlines and descriptions
272
+ - **Interactive Charts** showing trends over time
273
+ - **Source Breakdown** to see which outlets cover your topic
274
+ """)
275
+
276
+ if __name__ == "__main__":
277
+ main()