""" AI News API Handler Fetches AI-related news from NewsAPI and performs sentiment analysis """ import requests import pandas as pd from datetime import datetime, timedelta import os import json from dotenv import load_dotenv from textblob import TextBlob from typing import List, Dict, Optional from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer as SIA # Load environment variables load_dotenv() class AINewsAnalyzer: def __init__(self): self.api_key = os.getenv('NEWSAPI_KEY') self.base_url = "https://newsapi.org/v2/everything" if not self.api_key: raise ValueError("NewsAPI key not found. Please set NEWSAPI_KEY in your .env file") def fetch_ai_news(self, query: str = "artificial intelligence", days: tuple[int] = (7,14), language: str = "en", sources: Optional[str] = None, page_size: int = 100) -> List[Dict]: """ Fetch AI-related news from NewsAPI Args: query: Search query for news articles days: Number of days to look back language: Language code (default: "en") sources: Comma-separated string of news sources page_size: Number of articles to fetch (max 100) Returns: List of news articles with metadata """ # Calculate date range today = datetime.now() from_date = today - timedelta(days=days[0]) # 7 to_date = today - timedelta(days=days[1]) # 14 print(from_date, to_date) # Prepare API parameters params = { 'q': query, 'from': from_date.strftime('%Y-%m-%d'), 'to': to_date.strftime('%Y-%m-%d'), 'language': language, 'sortBy': 'publishedAt', 'pageSize': page_size, 'apiKey': self.api_key } # Add sources if specified if sources: params['sources'] = sources try: # Make API request response = requests.get(self.base_url, params=params) response.raise_for_status() data = response.json() if data['status'] == 'ok': return data['articles'] else: print(f"API Error: {data.get('message', 'Unknown error')}") return [] except requests.exceptions.RequestException as e: print(f"Request failed: {e}") return [] def analyze_sentiment(self, text: str, model: str) -> Dict: """ Analyze sentiment of given text using TextBlob Args: text: Text to analyze Returns: Dictionary with sentiment metrics """ if not text: return { 'polarity': 0.0, 'subjectivity': 0.0, 'label': 'neutral', 'confidence': 0.0 } blob = TextBlob(text) subjectivity = blob.sentiment.subjectivity # implement Vader Analysis for polarity scores if model == "Vader": vader = SIA() fullpolarity = vader.polarity_scores(text) polarity=fullpolarity['compound'] polarity_thresh = 0.05 # otherwise else: polarity = blob.sentiment.polarity polarity_thresh = 0.1 # Determine sentiment label through polarity threshold if polarity > polarity_thresh: label = 'positive' elif polarity < -polarity_thresh: label = 'negative' else: label = 'neutral' # Calculate confidence (distance from neutral) confidence = abs(polarity) res = { 'polarity': polarity, 'subjectivity': subjectivity, 'label': label, 'confidence': confidence } return res def process_news_articles(self, articles: List[Dict], model: str) -> pd.DataFrame: """ Process news articles and add sentiment analysis Args: articles: List of news articles from API Returns: DataFrame with processed articles and sentiment data """ processed_articles = [] for article in articles: # Skip articles with missing essential data if not article.get('title') or not article.get('publishedAt'): continue # Analyze sentiment of title and description title_sentiment = self.analyze_sentiment(article['title'], model=model) description_sentiment = self.analyze_sentiment(article['description'], model=model) # Combine title and description sentiment (weighted toward title) combined_polarity = (title_sentiment['polarity'] * 0.7 + description_sentiment['polarity'] * 0.3) combined_subjectivity = (title_sentiment['subjectivity'] * 0.7 + description_sentiment['subjectivity'] * 0.3) # Determine overall sentiment if combined_polarity > 0.1: overall_sentiment = 'positive' elif combined_polarity < -0.1: overall_sentiment = 'negative' else: overall_sentiment = 'neutral' processed_article = { 'title': article['title'], 'description': article.get('description', ''), 'url': article['url'], 'source': article['source']['name'], 'published_at': article['publishedAt'], 'author': article.get('author', 'Unknown'), 'sentiment_label': overall_sentiment, 'sentiment_polarity': combined_polarity, 'sentiment_subjectivity': combined_subjectivity, 'title_sentiment': title_sentiment['label'], 'title_polarity': title_sentiment['polarity'], 'description_sentiment': description_sentiment['label'], 'description_polarity': description_sentiment['polarity'] } processed_articles.append(processed_article) # Convert to DataFrame df = pd.DataFrame(processed_articles) # Convert published_at to datetime if not df.empty: df['published_at'] = pd.to_datetime(df['published_at']) df = df.sort_values('published_at', ascending=False) return df def get_ai_news_with_sentiment(self, query: str = "artificial intelligence", days: tuple[int] = (7,14), sources: Optional[str] = None, model: str = "Textblob") -> pd.DataFrame: """ Complete pipeline: fetch news and analyze sentiment Args: query: Search query for news articles days: Number of days to look back sources: Comma-separated string of news sources Returns: DataFrame with news articles and sentiment analysis """ print(f"Fetching {query} news from the last {days} days...") # Fetch articles articles = self.fetch_ai_news(query=query, days=days, sources=sources) if not articles: print("No articles found.") return pd.DataFrame() print(f"Found {len(articles)} articles. Analyzing sentiment...") # Process and analyze df = self.process_news_articles(articles, model=model) print(f"Processed {len(df)} articles with sentiment analysis. \nUsed {model} for polarity analysis and Textblob for sentiment analysis.") return df def load_config(): """Load configuration from config.json""" with open('config.json', 'r') as f: return json.load(f) if __name__ == "__main__": # Test the API when run directly analyzer = AINewsAnalyzer() config = load_config() print("Testing AI News Sentiment Analyzer...") print("=" * 50) # Test sentiment analysis test_texts = config["test_texts"] print("\nSentiment Analysis Examples:") for text in test_texts: sentiment = analyzer.analyze_sentiment(text) print(f"Text: {text}") print(f"Sentiment: {sentiment['label']} (polarity: {sentiment['polarity']:.2f}\n") # Test news fetching print("Fetching recent AI news...") df = analyzer.get_ai_news_with_sentiment(days=3) if not df.empty: print(f"\nFound {len(df)} articles") print("\nSentiment Distribution:") print(df['sentiment_label'].value_counts()) print("\nTop 3 Most Positive Headlines:") positive_articles = df[df['sentiment_label'] == 'positive'].nlargest(3, 'sentiment_polarity') for _, article in positive_articles.iterrows(): print(f"📈 {article['title']} (Score: {article['sentiment_polarity']:.2f})") print("\nTop 3 Most Negative Headlines:") negative_articles = df[df['sentiment_label'] == 'negative'].nsmallest(3, 'sentiment_polarity') for _, article in negative_articles.iterrows(): print(f"📉 {article['title']} (Score: {article['sentiment_polarity']:.2f})") else: print("No articles found. Check your API key and internet connection.")