Commit
·
0f2373c
1
Parent(s):
097fe34
added VADER sentiment analysis
Browse files- .gitignore +2 -0
- config.json +4 -0
- requirements.txt +2 -1
- src/api_handler.py +29 -35
- src/streamlit_app.py +16 -10
.gitignore
CHANGED
|
@@ -10,6 +10,8 @@ __pycache__/
|
|
| 10 |
# Virtual environment
|
| 11 |
.venv/
|
| 12 |
venv/
|
|
|
|
|
|
|
| 13 |
|
| 14 |
# IDE
|
| 15 |
.vscode/
|
|
|
|
| 10 |
# Virtual environment
|
| 11 |
.venv/
|
| 12 |
venv/
|
| 13 |
+
.env/
|
| 14 |
+
env/
|
| 15 |
|
| 16 |
# IDE
|
| 17 |
.vscode/
|
config.json
CHANGED
|
@@ -28,5 +28,9 @@
|
|
| 28 |
"AI breakthrough promises to revolutionize healthcare",
|
| 29 |
"Concerns grow over AI job displacement",
|
| 30 |
"New machine learning model shows mixed results"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
]
|
| 32 |
}
|
|
|
|
| 28 |
"AI breakthrough promises to revolutionize healthcare",
|
| 29 |
"Concerns grow over AI job displacement",
|
| 30 |
"New machine learning model shows mixed results"
|
| 31 |
+
],
|
| 32 |
+
"model_options":[
|
| 33 |
+
"Vader",
|
| 34 |
+
"TextBlob"
|
| 35 |
]
|
| 36 |
}
|
requirements.txt
CHANGED
|
@@ -4,4 +4,5 @@ requests>=2.31.0
|
|
| 4 |
python-dotenv>=1.0.0
|
| 5 |
textblob>=0.17.1
|
| 6 |
plotly>=5.15.0
|
| 7 |
-
numpy>=1.24.0
|
|
|
|
|
|
| 4 |
python-dotenv>=1.0.0
|
| 5 |
textblob>=0.17.1
|
| 6 |
plotly>=5.15.0
|
| 7 |
+
numpy>=1.24.0
|
| 8 |
+
vaderSentiment>=3.3.2
|
src/api_handler.py
CHANGED
|
@@ -10,6 +10,8 @@ import json
|
|
| 10 |
from dotenv import load_dotenv
|
| 11 |
from textblob import TextBlob
|
| 12 |
from typing import List, Dict, Optional
|
|
|
|
|
|
|
| 13 |
|
| 14 |
# Load environment variables
|
| 15 |
load_dotenv()
|
|
@@ -77,7 +79,7 @@ class AINewsAnalyzer:
|
|
| 77 |
print(f"Request failed: {e}")
|
| 78 |
return []
|
| 79 |
|
| 80 |
-
def analyze_sentiment(self, text: str) -> Dict:
|
| 81 |
"""
|
| 82 |
Analyze sentiment of given text using TextBlob
|
| 83 |
|
|
@@ -94,30 +96,39 @@ class AINewsAnalyzer:
|
|
| 94 |
'label': 'neutral',
|
| 95 |
'confidence': 0.0
|
| 96 |
}
|
| 97 |
-
|
| 98 |
blob = TextBlob(text)
|
| 99 |
-
polarity = blob.sentiment.polarity
|
| 100 |
subjectivity = blob.sentiment.subjectivity
|
| 101 |
-
|
| 102 |
-
#
|
| 103 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
label = 'positive'
|
| 105 |
-
elif polarity < -
|
| 106 |
label = 'negative'
|
| 107 |
else:
|
| 108 |
label = 'neutral'
|
| 109 |
|
|
|
|
| 110 |
# Calculate confidence (distance from neutral)
|
| 111 |
confidence = abs(polarity)
|
| 112 |
-
|
| 113 |
-
return {
|
| 114 |
'polarity': polarity,
|
| 115 |
'subjectivity': subjectivity,
|
| 116 |
'label': label,
|
| 117 |
'confidence': confidence
|
| 118 |
}
|
| 119 |
-
|
| 120 |
-
def process_news_articles(self, articles: List[Dict]) -> pd.DataFrame:
|
| 121 |
"""
|
| 122 |
Process news articles and add sentiment analysis
|
| 123 |
|
|
@@ -135,15 +146,14 @@ class AINewsAnalyzer:
|
|
| 135 |
continue
|
| 136 |
|
| 137 |
# Analyze sentiment of title and description
|
| 138 |
-
title_sentiment = self.analyze_sentiment(article['title'])
|
| 139 |
-
description_sentiment = self.analyze_sentiment(article
|
| 140 |
|
| 141 |
# Combine title and description sentiment (weighted toward title)
|
| 142 |
combined_polarity = (title_sentiment['polarity'] * 0.7 +
|
| 143 |
description_sentiment['polarity'] * 0.3)
|
| 144 |
combined_subjectivity = (title_sentiment['subjectivity'] * 0.7 +
|
| 145 |
description_sentiment['subjectivity'] * 0.3)
|
| 146 |
-
|
| 147 |
# Determine overall sentiment
|
| 148 |
if combined_polarity > 0.1:
|
| 149 |
overall_sentiment = 'positive'
|
|
@@ -183,7 +193,8 @@ class AINewsAnalyzer:
|
|
| 183 |
def get_ai_news_with_sentiment(self,
|
| 184 |
query: str = "artificial intelligence",
|
| 185 |
days: int = 7,
|
| 186 |
-
sources: Optional[str] = None
|
|
|
|
| 187 |
"""
|
| 188 |
Complete pipeline: fetch news and analyze sentiment
|
| 189 |
|
|
@@ -207,26 +218,10 @@ class AINewsAnalyzer:
|
|
| 207 |
print(f"Found {len(articles)} articles. Analyzing sentiment...")
|
| 208 |
|
| 209 |
# Process and analyze
|
| 210 |
-
df = self.process_news_articles(articles)
|
| 211 |
|
| 212 |
-
print(f"Processed {len(df)} articles with sentiment analysis.")
|
| 213 |
return df
|
| 214 |
-
|
| 215 |
-
def fetch_ai_news(query="artificial intelligence", days=7, sources=None):
|
| 216 |
-
"""Standalone function to fetch AI news"""
|
| 217 |
-
analyzer = AINewsAnalyzer()
|
| 218 |
-
return analyzer.fetch_ai_news(query, days, sources=sources)
|
| 219 |
-
|
| 220 |
-
def analyze_sentiment(text):
|
| 221 |
-
"""Standalone function to analyze sentiment"""
|
| 222 |
-
analyzer = AINewsAnalyzer()
|
| 223 |
-
return analyzer.analyze_sentiment(text)
|
| 224 |
-
|
| 225 |
-
def get_ai_news_with_sentiment(query="artificial intelligence", days=7, sources=None):
|
| 226 |
-
"""Standalone function for complete pipeline"""
|
| 227 |
-
analyzer = AINewsAnalyzer()
|
| 228 |
-
return analyzer.get_ai_news_with_sentiment(query, days, sources)
|
| 229 |
-
|
| 230 |
def load_config():
|
| 231 |
"""Load configuration from config.json"""
|
| 232 |
with open('config.json', 'r') as f:
|
|
@@ -247,8 +242,7 @@ if __name__ == "__main__":
|
|
| 247 |
for text in test_texts:
|
| 248 |
sentiment = analyzer.analyze_sentiment(text)
|
| 249 |
print(f"Text: {text}")
|
| 250 |
-
print(f"Sentiment: {sentiment['label']} (polarity: {sentiment['polarity']:.2f}
|
| 251 |
-
print()
|
| 252 |
|
| 253 |
# Test news fetching
|
| 254 |
print("Fetching recent AI news...")
|
|
|
|
| 10 |
from dotenv import load_dotenv
|
| 11 |
from textblob import TextBlob
|
| 12 |
from typing import List, Dict, Optional
|
| 13 |
+
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer as SIA
|
| 14 |
+
|
| 15 |
|
| 16 |
# Load environment variables
|
| 17 |
load_dotenv()
|
|
|
|
| 79 |
print(f"Request failed: {e}")
|
| 80 |
return []
|
| 81 |
|
| 82 |
+
def analyze_sentiment(self, text: str, model: str) -> Dict:
|
| 83 |
"""
|
| 84 |
Analyze sentiment of given text using TextBlob
|
| 85 |
|
|
|
|
| 96 |
'label': 'neutral',
|
| 97 |
'confidence': 0.0
|
| 98 |
}
|
|
|
|
| 99 |
blob = TextBlob(text)
|
|
|
|
| 100 |
subjectivity = blob.sentiment.subjectivity
|
| 101 |
+
|
| 102 |
+
# implement Vader Analysis for polarity scores
|
| 103 |
+
if model == "Vader":
|
| 104 |
+
vader = SIA()
|
| 105 |
+
fullpolarity = vader.polarity_scores(text)
|
| 106 |
+
polarity=fullpolarity['compound']
|
| 107 |
+
polarity_thresh = 0.05
|
| 108 |
+
# otherwise
|
| 109 |
+
else:
|
| 110 |
+
polarity = blob.sentiment.polarity
|
| 111 |
+
polarity_thresh = 0.1
|
| 112 |
+
|
| 113 |
+
# Determine sentiment label through polarity threshold
|
| 114 |
+
if polarity > polarity_thresh:
|
| 115 |
label = 'positive'
|
| 116 |
+
elif polarity < -polarity_thresh:
|
| 117 |
label = 'negative'
|
| 118 |
else:
|
| 119 |
label = 'neutral'
|
| 120 |
|
| 121 |
+
|
| 122 |
# Calculate confidence (distance from neutral)
|
| 123 |
confidence = abs(polarity)
|
| 124 |
+
res = {
|
|
|
|
| 125 |
'polarity': polarity,
|
| 126 |
'subjectivity': subjectivity,
|
| 127 |
'label': label,
|
| 128 |
'confidence': confidence
|
| 129 |
}
|
| 130 |
+
return res
|
| 131 |
+
def process_news_articles(self, articles: List[Dict], model: str) -> pd.DataFrame:
|
| 132 |
"""
|
| 133 |
Process news articles and add sentiment analysis
|
| 134 |
|
|
|
|
| 146 |
continue
|
| 147 |
|
| 148 |
# Analyze sentiment of title and description
|
| 149 |
+
title_sentiment = self.analyze_sentiment(article['title'], model=model)
|
| 150 |
+
description_sentiment = self.analyze_sentiment(article['description'], model=model)
|
| 151 |
|
| 152 |
# Combine title and description sentiment (weighted toward title)
|
| 153 |
combined_polarity = (title_sentiment['polarity'] * 0.7 +
|
| 154 |
description_sentiment['polarity'] * 0.3)
|
| 155 |
combined_subjectivity = (title_sentiment['subjectivity'] * 0.7 +
|
| 156 |
description_sentiment['subjectivity'] * 0.3)
|
|
|
|
| 157 |
# Determine overall sentiment
|
| 158 |
if combined_polarity > 0.1:
|
| 159 |
overall_sentiment = 'positive'
|
|
|
|
| 193 |
def get_ai_news_with_sentiment(self,
|
| 194 |
query: str = "artificial intelligence",
|
| 195 |
days: int = 7,
|
| 196 |
+
sources: Optional[str] = None,
|
| 197 |
+
model: str = "Textblob") -> pd.DataFrame:
|
| 198 |
"""
|
| 199 |
Complete pipeline: fetch news and analyze sentiment
|
| 200 |
|
|
|
|
| 218 |
print(f"Found {len(articles)} articles. Analyzing sentiment...")
|
| 219 |
|
| 220 |
# Process and analyze
|
| 221 |
+
df = self.process_news_articles(articles, model=model)
|
| 222 |
|
| 223 |
+
print(f"Processed {len(df)} articles with sentiment analysis. \nUsed {model} for polarity analysis and Textblob for sentiment analysis.")
|
| 224 |
return df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
def load_config():
|
| 226 |
"""Load configuration from config.json"""
|
| 227 |
with open('config.json', 'r') as f:
|
|
|
|
| 242 |
for text in test_texts:
|
| 243 |
sentiment = analyzer.analyze_sentiment(text)
|
| 244 |
print(f"Text: {text}")
|
| 245 |
+
print(f"Sentiment: {sentiment['label']} (polarity: {sentiment['polarity']:.2f}\n")
|
|
|
|
| 246 |
|
| 247 |
# Test news fetching
|
| 248 |
print("Fetching recent AI news...")
|
src/streamlit_app.py
CHANGED
|
@@ -19,7 +19,7 @@ st.set_page_config(
|
|
| 19 |
|
| 20 |
# Custom CSS for better styling
|
| 21 |
st.markdown("""
|
| 22 |
-
<style>
|
| 23 |
.main-header {
|
| 24 |
font-size: 2.5rem;
|
| 25 |
font-weight: bold;
|
|
@@ -46,11 +46,11 @@ def load_config():
|
|
| 46 |
return json.load(f)
|
| 47 |
|
| 48 |
@st.cache_data(ttl=1800) # Cache for 30 minutes
|
| 49 |
-
def load_news_data(query, days, sources=None):
|
| 50 |
"""Load and cache news data"""
|
| 51 |
try:
|
| 52 |
analyzer = AINewsAnalyzer()
|
| 53 |
-
df = analyzer.get_ai_news_with_sentiment(query=query, days=days, sources=sources)
|
| 54 |
return df, None
|
| 55 |
except Exception as e:
|
| 56 |
return pd.DataFrame(), str(e)
|
|
@@ -105,7 +105,7 @@ def create_source_analysis(df):
|
|
| 105 |
|
| 106 |
return fig
|
| 107 |
|
| 108 |
-
def create_polarity_distribution(df):
|
| 109 |
"""Create sentiment polarity distribution"""
|
| 110 |
if df.empty:
|
| 111 |
return None
|
|
@@ -119,10 +119,9 @@ def create_polarity_distribution(df):
|
|
| 119 |
)
|
| 120 |
|
| 121 |
# Add vertical lines for sentiment boundaries
|
| 122 |
-
fig.add_vline(x=
|
| 123 |
-
fig.add_vline(x=-
|
| 124 |
-
fig.add_vline(x=0, line_dash="dash", line_color="gray", annotation_text="Neutral")
|
| 125 |
-
|
| 126 |
return fig
|
| 127 |
|
| 128 |
|
|
@@ -150,6 +149,12 @@ def main():
|
|
| 150 |
"Or enter custom search:",
|
| 151 |
placeholder="e.g., 'generative AI'"
|
| 152 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
|
| 154 |
# Use custom query if provided
|
| 155 |
final_query = custom_query if custom_query else selected_query
|
|
@@ -186,7 +191,7 @@ def main():
|
|
| 186 |
# Load data
|
| 187 |
if st.sidebar.button("🚀 Analyze News", type="primary"):
|
| 188 |
with st.spinner(f"Fetching and analyzing news about '{final_query}'..."):
|
| 189 |
-
df, error = load_news_data(final_query, days, sources)
|
| 190 |
|
| 191 |
if error:
|
| 192 |
st.error(f"Error loading data: {error}")
|
|
@@ -242,7 +247,8 @@ def main():
|
|
| 242 |
st.plotly_chart(source_fig, use_container_width=True)
|
| 243 |
|
| 244 |
# Row 2: Polarity distribution (full width)
|
| 245 |
-
|
|
|
|
| 246 |
if polarity_fig:
|
| 247 |
st.plotly_chart(polarity_fig, use_container_width=True)
|
| 248 |
|
|
|
|
| 19 |
|
| 20 |
# Custom CSS for better styling
|
| 21 |
st.markdown("""
|
| 22 |
+
<style>
|
| 23 |
.main-header {
|
| 24 |
font-size: 2.5rem;
|
| 25 |
font-weight: bold;
|
|
|
|
| 46 |
return json.load(f)
|
| 47 |
|
| 48 |
@st.cache_data(ttl=1800) # Cache for 30 minutes
|
| 49 |
+
def load_news_data(query, days, sources=None, model="TextBlob"):
|
| 50 |
"""Load and cache news data"""
|
| 51 |
try:
|
| 52 |
analyzer = AINewsAnalyzer()
|
| 53 |
+
df = analyzer.get_ai_news_with_sentiment(query=query, days=days, sources=sources, model=model)
|
| 54 |
return df, None
|
| 55 |
except Exception as e:
|
| 56 |
return pd.DataFrame(), str(e)
|
|
|
|
| 105 |
|
| 106 |
return fig
|
| 107 |
|
| 108 |
+
def create_polarity_distribution(df, thresh: float):
|
| 109 |
"""Create sentiment polarity distribution"""
|
| 110 |
if df.empty:
|
| 111 |
return None
|
|
|
|
| 119 |
)
|
| 120 |
|
| 121 |
# Add vertical lines for sentiment boundaries
|
| 122 |
+
fig.add_vline(x=thresh, line_dash="dash", line_color="green", annotation_text="Positive Threshold", annotation_position="top right")
|
| 123 |
+
fig.add_vline(x=-thresh, line_dash="dash", line_color="red", annotation_text="Negative Threshold", annotation_position="top left")
|
| 124 |
+
fig.add_vline(x=0, line_dash="dash", line_color="gray", annotation_text="Neutral", annotation_position="top")
|
|
|
|
| 125 |
return fig
|
| 126 |
|
| 127 |
|
|
|
|
| 149 |
"Or enter custom search:",
|
| 150 |
placeholder="e.g., 'generative AI'"
|
| 151 |
)
|
| 152 |
+
|
| 153 |
+
model_query = st.sidebar.selectbox(
|
| 154 |
+
"📝 Search a Sentiment Model:",
|
| 155 |
+
options=config["model_options"],
|
| 156 |
+
index=0
|
| 157 |
+
)
|
| 158 |
|
| 159 |
# Use custom query if provided
|
| 160 |
final_query = custom_query if custom_query else selected_query
|
|
|
|
| 191 |
# Load data
|
| 192 |
if st.sidebar.button("🚀 Analyze News", type="primary"):
|
| 193 |
with st.spinner(f"Fetching and analyzing news about '{final_query}'..."):
|
| 194 |
+
df, error = load_news_data(final_query, days, sources=sources, model=model_query)
|
| 195 |
|
| 196 |
if error:
|
| 197 |
st.error(f"Error loading data: {error}")
|
|
|
|
| 247 |
st.plotly_chart(source_fig, use_container_width=True)
|
| 248 |
|
| 249 |
# Row 2: Polarity distribution (full width)
|
| 250 |
+
thresh = 0.05 if model_query == "Vader" else 0.1
|
| 251 |
+
polarity_fig = create_polarity_distribution(df, thresh=thresh)
|
| 252 |
if polarity_fig:
|
| 253 |
st.plotly_chart(polarity_fig, use_container_width=True)
|
| 254 |
|