|
|
""" |
|
|
AI News API Handler |
|
|
Fetches AI-related news from NewsAPI and performs sentiment analysis |
|
|
""" |
|
|
import requests |
|
|
import pandas as pd |
|
|
from datetime import datetime, timedelta |
|
|
import os |
|
|
import json |
|
|
from dotenv import load_dotenv |
|
|
from textblob import TextBlob |
|
|
from typing import List, Dict, Optional |
|
|
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer as SIA |
|
|
|
|
|
|
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
class AINewsAnalyzer: |
|
|
def __init__(self): |
|
|
self.api_key = os.getenv('NEWSAPI_KEY') |
|
|
self.base_url = "https://newsapi.org/v2/everything" |
|
|
|
|
|
if not self.api_key: |
|
|
raise ValueError("NewsAPI key not found. Please set NEWSAPI_KEY in your .env file") |
|
|
|
|
|
def fetch_ai_news(self, |
|
|
query: str = "artificial intelligence", |
|
|
days: tuple[int] = (7,14), |
|
|
language: str = "en", |
|
|
sources: Optional[str] = None, |
|
|
page_size: int = 100) -> List[Dict]: |
|
|
""" |
|
|
Fetch AI-related news from NewsAPI |
|
|
|
|
|
Args: |
|
|
query: Search query for news articles |
|
|
days: Number of days to look back |
|
|
language: Language code (default: "en") |
|
|
sources: Comma-separated string of news sources |
|
|
page_size: Number of articles to fetch (max 100) |
|
|
|
|
|
Returns: |
|
|
List of news articles with metadata |
|
|
""" |
|
|
|
|
|
today = datetime.now() |
|
|
from_date = today - timedelta(days=days[0]) |
|
|
to_date = today - timedelta(days=days[1]) |
|
|
|
|
|
print(from_date, to_date) |
|
|
|
|
|
params = { |
|
|
'q': query, |
|
|
'from': from_date.strftime('%Y-%m-%d'), |
|
|
'to': to_date.strftime('%Y-%m-%d'), |
|
|
'language': language, |
|
|
'sortBy': 'publishedAt', |
|
|
'pageSize': page_size, |
|
|
'apiKey': self.api_key |
|
|
} |
|
|
|
|
|
|
|
|
if sources: |
|
|
params['sources'] = sources |
|
|
|
|
|
try: |
|
|
|
|
|
response = requests.get(self.base_url, params=params) |
|
|
response.raise_for_status() |
|
|
|
|
|
data = response.json() |
|
|
|
|
|
if data['status'] == 'ok': |
|
|
return data['articles'] |
|
|
else: |
|
|
print(f"API Error: {data.get('message', 'Unknown error')}") |
|
|
return [] |
|
|
|
|
|
except requests.exceptions.RequestException as e: |
|
|
print(f"Request failed: {e}") |
|
|
return [] |
|
|
|
|
|
def analyze_sentiment(self, text: str, model: str) -> Dict: |
|
|
""" |
|
|
Analyze sentiment of given text using TextBlob |
|
|
|
|
|
Args: |
|
|
text: Text to analyze |
|
|
|
|
|
Returns: |
|
|
Dictionary with sentiment metrics |
|
|
""" |
|
|
if not text: |
|
|
return { |
|
|
'polarity': 0.0, |
|
|
'subjectivity': 0.0, |
|
|
'label': 'neutral', |
|
|
'confidence': 0.0 |
|
|
} |
|
|
blob = TextBlob(text) |
|
|
subjectivity = blob.sentiment.subjectivity |
|
|
|
|
|
|
|
|
if model == "Vader": |
|
|
vader = SIA() |
|
|
fullpolarity = vader.polarity_scores(text) |
|
|
polarity=fullpolarity['compound'] |
|
|
polarity_thresh = 0.05 |
|
|
|
|
|
else: |
|
|
polarity = blob.sentiment.polarity |
|
|
polarity_thresh = 0.1 |
|
|
|
|
|
|
|
|
if polarity > polarity_thresh: |
|
|
label = 'positive' |
|
|
elif polarity < -polarity_thresh: |
|
|
label = 'negative' |
|
|
else: |
|
|
label = 'neutral' |
|
|
|
|
|
|
|
|
|
|
|
confidence = abs(polarity) |
|
|
res = { |
|
|
'polarity': polarity, |
|
|
'subjectivity': subjectivity, |
|
|
'label': label, |
|
|
'confidence': confidence |
|
|
} |
|
|
return res |
|
|
def process_news_articles(self, articles: List[Dict], model: str) -> pd.DataFrame: |
|
|
""" |
|
|
Process news articles and add sentiment analysis |
|
|
|
|
|
Args: |
|
|
articles: List of news articles from API |
|
|
|
|
|
Returns: |
|
|
DataFrame with processed articles and sentiment data |
|
|
""" |
|
|
processed_articles = [] |
|
|
|
|
|
for article in articles: |
|
|
|
|
|
if not article.get('title') or not article.get('publishedAt'): |
|
|
continue |
|
|
|
|
|
|
|
|
title_sentiment = self.analyze_sentiment(article['title'], model=model) |
|
|
description_sentiment = self.analyze_sentiment(article['description'], model=model) |
|
|
|
|
|
|
|
|
combined_polarity = (title_sentiment['polarity'] * 0.7 + |
|
|
description_sentiment['polarity'] * 0.3) |
|
|
combined_subjectivity = (title_sentiment['subjectivity'] * 0.7 + |
|
|
description_sentiment['subjectivity'] * 0.3) |
|
|
|
|
|
if combined_polarity > 0.1: |
|
|
overall_sentiment = 'positive' |
|
|
elif combined_polarity < -0.1: |
|
|
overall_sentiment = 'negative' |
|
|
else: |
|
|
overall_sentiment = 'neutral' |
|
|
|
|
|
processed_article = { |
|
|
'title': article['title'], |
|
|
'description': article.get('description', ''), |
|
|
'url': article['url'], |
|
|
'source': article['source']['name'], |
|
|
'published_at': article['publishedAt'], |
|
|
'author': article.get('author', 'Unknown'), |
|
|
'sentiment_label': overall_sentiment, |
|
|
'sentiment_polarity': combined_polarity, |
|
|
'sentiment_subjectivity': combined_subjectivity, |
|
|
'title_sentiment': title_sentiment['label'], |
|
|
'title_polarity': title_sentiment['polarity'], |
|
|
'description_sentiment': description_sentiment['label'], |
|
|
'description_polarity': description_sentiment['polarity'] |
|
|
} |
|
|
|
|
|
processed_articles.append(processed_article) |
|
|
|
|
|
|
|
|
df = pd.DataFrame(processed_articles) |
|
|
|
|
|
|
|
|
if not df.empty: |
|
|
df['published_at'] = pd.to_datetime(df['published_at']) |
|
|
df = df.sort_values('published_at', ascending=False) |
|
|
|
|
|
return df |
|
|
|
|
|
def get_ai_news_with_sentiment(self, |
|
|
query: str = "artificial intelligence", |
|
|
days: tuple[int] = (7,14), |
|
|
sources: Optional[str] = None, |
|
|
model: str = "Textblob") -> pd.DataFrame: |
|
|
""" |
|
|
Complete pipeline: fetch news and analyze sentiment |
|
|
|
|
|
Args: |
|
|
query: Search query for news articles |
|
|
days: Number of days to look back |
|
|
sources: Comma-separated string of news sources |
|
|
|
|
|
Returns: |
|
|
DataFrame with news articles and sentiment analysis |
|
|
""" |
|
|
print(f"Fetching {query} news from the last {days} days...") |
|
|
|
|
|
|
|
|
articles = self.fetch_ai_news(query=query, days=days, sources=sources) |
|
|
|
|
|
if not articles: |
|
|
print("No articles found.") |
|
|
return pd.DataFrame() |
|
|
|
|
|
print(f"Found {len(articles)} articles. Analyzing sentiment...") |
|
|
|
|
|
|
|
|
df = self.process_news_articles(articles, model=model) |
|
|
|
|
|
print(f"Processed {len(df)} articles with sentiment analysis. \nUsed {model} for polarity analysis and Textblob for sentiment analysis.") |
|
|
return df |
|
|
def load_config(): |
|
|
"""Load configuration from config.json""" |
|
|
with open('config.json', 'r') as f: |
|
|
return json.load(f) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
analyzer = AINewsAnalyzer() |
|
|
config = load_config() |
|
|
|
|
|
print("Testing AI News Sentiment Analyzer...") |
|
|
print("=" * 50) |
|
|
|
|
|
|
|
|
test_texts = config["test_texts"] |
|
|
|
|
|
print("\nSentiment Analysis Examples:") |
|
|
for text in test_texts: |
|
|
sentiment = analyzer.analyze_sentiment(text) |
|
|
print(f"Text: {text}") |
|
|
print(f"Sentiment: {sentiment['label']} (polarity: {sentiment['polarity']:.2f}\n") |
|
|
|
|
|
|
|
|
print("Fetching recent AI news...") |
|
|
df = analyzer.get_ai_news_with_sentiment(days=3) |
|
|
|
|
|
if not df.empty: |
|
|
print(f"\nFound {len(df)} articles") |
|
|
print("\nSentiment Distribution:") |
|
|
print(df['sentiment_label'].value_counts()) |
|
|
|
|
|
print("\nTop 3 Most Positive Headlines:") |
|
|
positive_articles = df[df['sentiment_label'] == 'positive'].nlargest(3, 'sentiment_polarity') |
|
|
for _, article in positive_articles.iterrows(): |
|
|
print(f"📈 {article['title']} (Score: {article['sentiment_polarity']:.2f})") |
|
|
|
|
|
print("\nTop 3 Most Negative Headlines:") |
|
|
negative_articles = df[df['sentiment_label'] == 'negative'].nsmallest(3, 'sentiment_polarity') |
|
|
for _, article in negative_articles.iterrows(): |
|
|
print(f"📉 {article['title']} (Score: {article['sentiment_polarity']:.2f})") |
|
|
else: |
|
|
print("No articles found. Check your API key and internet connection.") |