EmincanY commited on
Commit
200f1cf
·
verified ·
1 Parent(s): 81bd92e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -22
app.py CHANGED
@@ -118,39 +118,87 @@ def get_news_headlines(topic: str, count: int = 5) -> str:
118
  newsapi = NewsApiClient(api_key=API_KEY)
119
 
120
  try:
121
- # Try different search queries to get more relevant results
122
- queries = [
123
- topic, # Original topic
124
- f"{topic} latest", # Latest news
125
- f"{topic} important", # Important news
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  ]
127
 
128
  all_articles = []
129
- seen_titles = set() # To avoid duplicates
 
130
 
131
- for query in queries:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  news = newsapi.get_everything(
133
- q=query,
134
  language='en',
135
  sort_by='publishedAt',
136
- page_size=count
 
137
  )
138
 
139
  if news['articles']:
140
  for article in news['articles']:
141
- # Skip if we've seen this title before
142
- if article['title'] in seen_titles:
143
  continue
144
 
145
- seen_titles.add(article['title'])
146
- # Convert UTC timestamp to datetime
147
- pub_date = datetime.datetime.strptime(article['publishedAt'], '%Y-%m-%dT%H:%M:%SZ')
148
- all_articles.append({
149
- 'title': article['title'],
150
- 'source': article['source']['name'],
151
- 'date': pub_date,
152
- 'url': article['url']
153
- })
 
 
154
 
155
  # Sort by date (newest first) and take the top 'count' articles
156
  all_articles.sort(key=lambda x: x['date'], reverse=True)
@@ -160,9 +208,10 @@ def get_news_headlines(topic: str, count: int = 5) -> str:
160
  headlines = []
161
  for idx, article in enumerate(all_articles, 1):
162
  date_str = article['date'].strftime('%Y-%m-%d %H:%M UTC')
163
- headlines.append(f"{idx}. [{date_str}] {article['title']} ({article['source']})")
 
164
  return "\n".join(headlines)
165
- return f"No news found for topic: {topic}"
166
  except Exception as e:
167
  return f"Error fetching news: {str(e)}"
168
 
 
118
  newsapi = NewsApiClient(api_key=API_KEY)
119
 
120
  try:
121
+ # Define search strategies with different parameters
122
+ search_strategies = [
123
+ {
124
+ 'query': topic,
125
+ 'days_back': 1,
126
+ 'relevance': 'high'
127
+ },
128
+ {
129
+ 'query': f'"{topic}"', # Exact match
130
+ 'days_back': 7,
131
+ 'relevance': 'high'
132
+ },
133
+ {
134
+ 'query': f"{topic} latest news",
135
+ 'days_back': 30,
136
+ 'relevance': 'medium'
137
+ },
138
+ {
139
+ 'query': f"{topic} announcement",
140
+ 'days_back': 30,
141
+ 'relevance': 'medium'
142
+ }
143
  ]
144
 
145
  all_articles = []
146
+ seen_titles = set()
147
+ required_keywords = set(topic.lower().split())
148
 
149
+ # Function to check article relevance
150
+ def is_relevant(article, required_words, relevance_level):
151
+ title = article['title'].lower()
152
+ description = (article.get('description') or '').lower()
153
+ content = (article.get('content') or '').lower()
154
+
155
+ # Count how many required words appear in the article
156
+ title_matches = sum(1 for word in required_words if word in title)
157
+ desc_matches = sum(1 for word in required_words if word in description)
158
+ content_matches = sum(1 for word in required_words if word in content)
159
+
160
+ # Calculate relevance score
161
+ total_score = (title_matches * 3) + (desc_matches * 2) + content_matches
162
+
163
+ if relevance_level == 'high':
164
+ return total_score >= len(required_words) * 2
165
+ elif relevance_level == 'medium':
166
+ return total_score >= len(required_words)
167
+ else:
168
+ return total_score > 0
169
+
170
+ for strategy in search_strategies:
171
+ if len(all_articles) >= count:
172
+ break
173
+
174
+ # Calculate date range
175
+ from_date = (datetime.datetime.now() - datetime.timedelta(days=strategy['days_back'])).strftime('%Y-%m-%d')
176
+
177
  news = newsapi.get_everything(
178
+ q=strategy['query'],
179
  language='en',
180
  sort_by='publishedAt',
181
+ from_param=from_date,
182
+ page_size=30 # Get more articles to filter through
183
  )
184
 
185
  if news['articles']:
186
  for article in news['articles']:
187
+ # Skip if we've seen this title or have enough articles
188
+ if article['title'] in seen_titles or len(all_articles) >= count:
189
  continue
190
 
191
+ # Check if article is relevant enough
192
+ if is_relevant(article, required_keywords, strategy['relevance']):
193
+ seen_titles.add(article['title'])
194
+ pub_date = datetime.datetime.strptime(article['publishedAt'], '%Y-%m-%dT%H:%M:%SZ')
195
+ all_articles.append({
196
+ 'title': article['title'],
197
+ 'source': article['source']['name'],
198
+ 'date': pub_date,
199
+ 'url': article['url'],
200
+ 'relevance': strategy['relevance']
201
+ })
202
 
203
  # Sort by date (newest first) and take the top 'count' articles
204
  all_articles.sort(key=lambda x: x['date'], reverse=True)
 
208
  headlines = []
209
  for idx, article in enumerate(all_articles, 1):
210
  date_str = article['date'].strftime('%Y-%m-%d %H:%M UTC')
211
+ relevance_indicator = "🎯" if article['relevance'] == 'high' else "✓"
212
+ headlines.append(f"{idx}. {relevance_indicator} [{date_str}] {article['title']} ({article['source']})")
213
  return "\n".join(headlines)
214
+ return f"No relevant news found for topic: {topic}"
215
  except Exception as e:
216
  return f"Error fetching news: {str(e)}"
217