File size: 5,212 Bytes
49cd06a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
from smolagents import tool
import os
from newsapi import NewsApiClient
import datetime

@tool
def get_news_headlines(topic: str, count: int = 5) -> str:
    """Get latest news headlines for a specific topic
    Args:
        topic: Topic to search news for
        count: Number of headlines to return (default: 5)
    """
    API_KEY = os.getenv("NEWSAPI_KEY")
    if not API_KEY:
        return "Error: NewsAPI key not found in environment variables"
        
    newsapi = NewsApiClient(api_key=API_KEY)
    
    try:
        # Define search strategies with different parameters
        search_strategies = [
            {
                'query': f'"{topic}"',  # Exact phrase match
                'relevance': 'high'
            },
            {
                'query': topic,  # Normal search
                'relevance': 'high'
            },
            {
                'query': f"{topic} latest",  # Latest news
                'relevance': 'medium'
            }
        ]
        
        relevant_articles = []  # Store only relevant articles
        seen_titles = set()
        required_keywords = set(topic.lower().split())
        
        # Function to check article relevance
        def is_relevant(article, required_words, relevance_level):
            title = article['title'].lower()
            description = (article.get('description') or '').lower()
            content = (article.get('content') or '').lower()
            
            # Count how many required words appear in the article
            title_matches = sum(1 for word in required_words if word in title)
            desc_matches = sum(1 for word in required_words if word in description)
            content_matches = sum(1 for word in required_words if word in content)
            
            # Calculate relevance score
            total_score = (title_matches * 3) + (desc_matches * 2) + content_matches
            
            # For exact phrase matching
            if relevance_level == 'high':
                # Check if the exact topic phrase appears
                if topic.lower() in title or topic.lower() in description:
                    return True
                return total_score >= len(required_words) * 2
            else:
                return total_score >= len(required_words)

        for strategy in search_strategies:
            if len(relevant_articles) >= count:
                break
                
            # Calculate how many more articles we need
            remaining_count = count - len(relevant_articles)
            
            try:
                news = newsapi.get_everything(
                    q=strategy['query'],
                    language='en',
                    sort_by='relevancy',  # Changed to relevancy sort
                    page_size=min(50, remaining_count * 5)  # Request more articles to filter through
                )
                
                if news['articles']:
                    for article in news['articles']:
                        # Skip if we've seen this title or have enough articles
                        if article['title'] in seen_titles:
                            continue
                        
                        # Check if article is relevant enough
                        if is_relevant(article, required_keywords, strategy['relevance']):
                            seen_titles.add(article['title'])
                            pub_date = datetime.datetime.strptime(article['publishedAt'], '%Y-%m-%dT%H:%M:%SZ')
                            relevant_articles.append({
                                'title': article['title'],
                                'source': article['source']['name'],
                                'date': pub_date,
                                'url': article['url'],
                                'relevance': strategy['relevance']
                            })
                            
                            # Break if we have enough relevant articles
                            if len(relevant_articles) >= count:
                                break
            except Exception as e:
                continue  # If one strategy fails, try the next one
        
        # Sort by date (newest first)
        relevant_articles.sort(key=lambda x: x['date'], reverse=True)
        
        if relevant_articles:
            headlines = []
            for idx, article in enumerate(relevant_articles, 1):
                date_str = article['date'].strftime('%Y-%m-%d %H:%M UTC')
                relevance_indicator = "🎯" if article['relevance'] == 'high' else "✓"
                headlines.append(f"{idx}. {relevance_indicator} [{date_str}] {article['title']} ({article['source']})")
            
            # Add a summary of how many relevant articles were found
            found_count = len(relevant_articles)
            summary = f"Found {found_count} relevant {'article' if found_count == 1 else 'articles'} out of {count} requested.\n\n"
            return summary + "\n".join(headlines)
            
        return f"No relevant news found for topic: {topic}"
    except Exception as e:
        return f"Error fetching news: {str(e)}"