from smolagents import tool
import os
from newsapi import NewsApiClient
import datetime

@tool
def get_news_headlines(topic: str, count: int = 5) -> str:
    """Get latest news headlines for a specific topic
    Args:
        topic: Topic to search news for
        count: Number of headlines to return (default: 5)
    """
    API_KEY = os.getenv("NEWSAPI_KEY")
    if not API_KEY:
        return "Error: NewsAPI key not found in environment variables"
        
    newsapi = NewsApiClient(api_key=API_KEY)
    
    try:
        # Define search strategies with different parameters
        search_strategies = [
            {
                'query': f'"{topic}"',  # Exact phrase match
                'relevance': 'high'
            },
            {
                'query': topic,  # Normal search
                'relevance': 'high'
            },
            {
                'query': f"{topic} latest",  # Latest news
                'relevance': 'medium'
            }
        ]
        
        relevant_articles = []  # Store only relevant articles
        seen_titles = set()
        required_keywords = set(topic.lower().split())
        
        # Function to check article relevance
        def is_relevant(article, required_words, relevance_level):
            title = article['title'].lower()
            description = (article.get('description') or '').lower()
            content = (article.get('content') or '').lower()
            
            # Count how many required words appear in the article
            title_matches = sum(1 for word in required_words if word in title)
            desc_matches = sum(1 for word in required_words if word in description)
            content_matches = sum(1 for word in required_words if word in content)
            
            # Calculate relevance score
            total_score = (title_matches * 3) + (desc_matches * 2) + content_matches
            
            # For exact phrase matching
            if relevance_level == 'high':
                # Check if the exact topic phrase appears
                if topic.lower() in title or topic.lower() in description:
                    return True
                return total_score >= len(required_words) * 2
            else:
                return total_score >= len(required_words)

        for strategy in search_strategies:
            if len(relevant_articles) >= count:
                break
                
            # Calculate how many more articles we need
            remaining_count = count - len(relevant_articles)
            
            try:
                news = newsapi.get_everything(
                    q=strategy['query'],
                    language='en',
                    sort_by='relevancy',  # Changed to relevancy sort
                    page_size=min(50, remaining_count * 5)  # Request more articles to filter through
                )
                
                if news['articles']:
                    for article in news['articles']:
                        # Skip if we've seen this title or have enough articles
                        if article['title'] in seen_titles:
                            continue
                        
                        # Check if article is relevant enough
                        if is_relevant(article, required_keywords, strategy['relevance']):
                            seen_titles.add(article['title'])
                            pub_date = datetime.datetime.strptime(article['publishedAt'], '%Y-%m-%dT%H:%M:%SZ')
                            relevant_articles.append({
                                'title': article['title'],
                                'source': article['source']['name'],
                                'date': pub_date,
                                'url': article['url'],
                                'relevance': strategy['relevance']
                            })
                            
                            # Break if we have enough relevant articles
                            if len(relevant_articles) >= count:
                                break
            except Exception as e:
                continue  # If one strategy fails, try the next one
        
        # Sort by date (newest first)
        relevant_articles.sort(key=lambda x: x['date'], reverse=True)
        
        if relevant_articles:
            headlines = []
            for idx, article in enumerate(relevant_articles, 1):
                date_str = article['date'].strftime('%Y-%m-%d %H:%M UTC')
                relevance_indicator = "🎯" if article['relevance'] == 'high' else "✓"
                headlines.append(f"{idx}. {relevance_indicator} [{date_str}] {article['title']} ({article['source']})")
            
            # Add a summary of how many relevant articles were found
            found_count = len(relevant_articles)
            summary = f"Found {found_count} relevant {'article' if found_count == 1 else 'articles'} out of {count} requested.\n\n"
            return summary + "\n".join(headlines)
            
        return f"No relevant news found for topic: {topic}"
    except Exception as e:
        return f"Error fetching news: {str(e)}"