Spaces:

ai-sentiment-group
/

BootcampFinalProject

Sleeping

File size: 9,811 Bytes

"""
AI News API Handler
Fetches AI-related news from NewsAPI and performs sentiment analysis
"""
import requests
import pandas as pd
from datetime import datetime, timedelta
import os
import json
from dotenv import load_dotenv
from textblob import TextBlob
from typing import List, Dict, Optional
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer as SIA


# Load environment variables
load_dotenv()

class AINewsAnalyzer:
    def __init__(self):
        self.api_key = os.getenv('NEWSAPI_KEY')
        self.base_url = "https://newsapi.org/v2/everything"
        
        if not self.api_key:
            raise ValueError("NewsAPI key not found. Please set NEWSAPI_KEY in your .env file")
    
    def fetch_ai_news(self, 
                      query: str = "artificial intelligence", 
                      days: tuple[int] = (7,14), 
                      language: str = "en",
                      sources: Optional[str] = None,
                      page_size: int = 100) -> List[Dict]:
        """
        Fetch AI-related news from NewsAPI
        
        Args:
            query: Search query for news articles
            days: Number of days to look back
            language: Language code (default: "en")
            sources: Comma-separated string of news sources
            page_size: Number of articles to fetch (max 100)
            
        Returns:
            List of news articles with metadata
        """
        # Calculate date range
        today = datetime.now()
        from_date = today - timedelta(days=days[0]) # 7 
        to_date = today - timedelta(days=days[1]) # 14
        
        print(from_date, to_date)
        # Prepare API parameters
        params = {
            'q': query,
            'from': from_date.strftime('%Y-%m-%d'),
            'to': to_date.strftime('%Y-%m-%d'),
            'language': language,
            'sortBy': 'publishedAt',
            'pageSize': page_size,
            'apiKey': self.api_key
        }
        
        # Add sources if specified
        if sources:
            params['sources'] = sources
        
        try:
            # Make API request
            response = requests.get(self.base_url, params=params)
            response.raise_for_status()
            
            data = response.json()
            
            if data['status'] == 'ok':
                return data['articles']
            else:
                print(f"API Error: {data.get('message', 'Unknown error')}")
                return []
                
        except requests.exceptions.RequestException as e:
            print(f"Request failed: {e}")
            return []
    
    def analyze_sentiment(self, text: str, model: str) -> Dict:
        """
        Analyze sentiment of given text using TextBlob
        
        Args:
            text: Text to analyze
            
        Returns:
            Dictionary with sentiment metrics
        """
        if not text:
            return {
                'polarity': 0.0,
                'subjectivity': 0.0,
                'label': 'neutral',
                'confidence': 0.0
            }
        blob = TextBlob(text)
        subjectivity = blob.sentiment.subjectivity

        # implement Vader Analysis for polarity scores
        if model == "Vader":
            vader = SIA()
            fullpolarity = vader.polarity_scores(text)
            polarity=fullpolarity['compound']
            polarity_thresh = 0.05
        # otherwise 
        else:
            polarity = blob.sentiment.polarity
            polarity_thresh = 0.1

        # Determine sentiment label through polarity threshold
        if polarity > polarity_thresh:
            label = 'positive'
        elif polarity < -polarity_thresh:
            label = 'negative'
        else:
            label = 'neutral'
        
        
        # Calculate confidence (distance from neutral)
        confidence = abs(polarity)
        res = {
            'polarity': polarity,
            'subjectivity': subjectivity,
            'label': label,
            'confidence': confidence
        }
        return res
    def process_news_articles(self, articles: List[Dict], model: str) -> pd.DataFrame:
        """
        Process news articles and add sentiment analysis
        
        Args:
            articles: List of news articles from API
            
        Returns:
            DataFrame with processed articles and sentiment data
        """
        processed_articles = []
        
        for article in articles:
            # Skip articles with missing essential data
            if not article.get('title') or not article.get('publishedAt'):
                continue
            
            # Analyze sentiment of title and description
            title_sentiment = self.analyze_sentiment(article['title'], model=model)
            description_sentiment = self.analyze_sentiment(article['description'], model=model)
            
            # Combine title and description sentiment (weighted toward title)
            combined_polarity = (title_sentiment['polarity'] * 0.7 + 
                               description_sentiment['polarity'] * 0.3)
            combined_subjectivity = (title_sentiment['subjectivity'] * 0.7 + 
                                   description_sentiment['subjectivity'] * 0.3)
            # Determine overall sentiment
            if combined_polarity > 0.1:
                overall_sentiment = 'positive'
            elif combined_polarity < -0.1:
                overall_sentiment = 'negative'
            else:
                overall_sentiment = 'neutral'
            
            processed_article = {
                'title': article['title'],
                'description': article.get('description', ''),
                'url': article['url'],
                'source': article['source']['name'],
                'published_at': article['publishedAt'],
                'author': article.get('author', 'Unknown'),
                'sentiment_label': overall_sentiment,
                'sentiment_polarity': combined_polarity,
                'sentiment_subjectivity': combined_subjectivity,
                'title_sentiment': title_sentiment['label'],
                'title_polarity': title_sentiment['polarity'],
                'description_sentiment': description_sentiment['label'],
                'description_polarity': description_sentiment['polarity']
            }
            
            processed_articles.append(processed_article)
        
        # Convert to DataFrame
        df = pd.DataFrame(processed_articles)
        
        # Convert published_at to datetime
        if not df.empty:
            df['published_at'] = pd.to_datetime(df['published_at'])
            df = df.sort_values('published_at', ascending=False)
        
        return df
    
    def get_ai_news_with_sentiment(self, 
                                   query: str = "artificial intelligence",
                                   days: tuple[int] = (7,14),
                                   sources: Optional[str] = None,
                                   model: str = "Textblob") -> pd.DataFrame:
        """
        Complete pipeline: fetch news and analyze sentiment
        
        Args:
            query: Search query for news articles
            days: Number of days to look back
            sources: Comma-separated string of news sources
            
        Returns:
            DataFrame with news articles and sentiment analysis
        """
        print(f"Fetching {query} news from the last {days} days...")
        
        # Fetch articles
        articles = self.fetch_ai_news(query=query, days=days, sources=sources)
        
        if not articles:
            print("No articles found.")
            return pd.DataFrame()
        
        print(f"Found {len(articles)} articles. Analyzing sentiment...")
        
        # Process and analyze
        df = self.process_news_articles(articles, model=model)
        
        print(f"Processed {len(df)} articles with sentiment analysis. \nUsed {model} for polarity analysis and Textblob for sentiment analysis.")
        return df
def load_config():
    """Load configuration from config.json"""
    with open('config.json', 'r') as f:
        return json.load(f)

if __name__ == "__main__":
    # Test the API when run directly
    analyzer = AINewsAnalyzer()
    config = load_config()
    
    print("Testing AI News Sentiment Analyzer...")
    print("=" * 50)
    
    # Test sentiment analysis
    test_texts = config["test_texts"]
    
    print("\nSentiment Analysis Examples:")
    for text in test_texts:
        sentiment = analyzer.analyze_sentiment(text)
        print(f"Text: {text}")
        print(f"Sentiment: {sentiment['label']} (polarity: {sentiment['polarity']:.2f}\n")
    
    # Test news fetching
    print("Fetching recent AI news...")
    df = analyzer.get_ai_news_with_sentiment(days=3)
    
    if not df.empty:
        print(f"\nFound {len(df)} articles")
        print("\nSentiment Distribution:")
        print(df['sentiment_label'].value_counts())
        
        print("\nTop 3 Most Positive Headlines:")
        positive_articles = df[df['sentiment_label'] == 'positive'].nlargest(3, 'sentiment_polarity')
        for _, article in positive_articles.iterrows():
            print(f"📈 {article['title']} (Score: {article['sentiment_polarity']:.2f})")
        
        print("\nTop 3 Most Negative Headlines:")
        negative_articles = df[df['sentiment_label'] == 'negative'].nsmallest(3, 'sentiment_polarity')
        for _, article in negative_articles.iterrows():
            print(f"📉 {article['title']} (Score: {article['sentiment_polarity']:.2f})")
    else:
        print("No articles found. Check your API key and internet connection.")