EmincanY commited on
Commit
190d464
·
verified ·
1 Parent(s): 200f1cf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -49
app.py CHANGED
@@ -121,28 +121,20 @@ def get_news_headlines(topic: str, count: int = 5) -> str:
121
  # Define search strategies with different parameters
122
  search_strategies = [
123
  {
124
- 'query': topic,
125
- 'days_back': 1,
126
  'relevance': 'high'
127
  },
128
  {
129
- 'query': f'"{topic}"', # Exact match
130
- 'days_back': 7,
131
  'relevance': 'high'
132
  },
133
  {
134
- 'query': f"{topic} latest news",
135
- 'days_back': 30,
136
- 'relevance': 'medium'
137
- },
138
- {
139
- 'query': f"{topic} announcement",
140
- 'days_back': 30,
141
  'relevance': 'medium'
142
  }
143
  ]
144
 
145
- all_articles = []
146
  seen_titles = set()
147
  required_keywords = set(topic.lower().split())
148
 
@@ -160,57 +152,69 @@ def get_news_headlines(topic: str, count: int = 5) -> str:
160
  # Calculate relevance score
161
  total_score = (title_matches * 3) + (desc_matches * 2) + content_matches
162
 
 
163
  if relevance_level == 'high':
 
 
 
164
  return total_score >= len(required_words) * 2
165
- elif relevance_level == 'medium':
166
- return total_score >= len(required_words)
167
  else:
168
- return total_score > 0
169
 
170
  for strategy in search_strategies:
171
- if len(all_articles) >= count:
172
  break
173
 
174
- # Calculate date range
175
- from_date = (datetime.datetime.now() - datetime.timedelta(days=strategy['days_back'])).strftime('%Y-%m-%d')
176
 
177
- news = newsapi.get_everything(
178
- q=strategy['query'],
179
- language='en',
180
- sort_by='publishedAt',
181
- from_param=from_date,
182
- page_size=30 # Get more articles to filter through
183
- )
184
-
185
- if news['articles']:
186
- for article in news['articles']:
187
- # Skip if we've seen this title or have enough articles
188
- if article['title'] in seen_titles or len(all_articles) >= count:
189
- continue
190
-
191
- # Check if article is relevant enough
192
- if is_relevant(article, required_keywords, strategy['relevance']):
193
- seen_titles.add(article['title'])
194
- pub_date = datetime.datetime.strptime(article['publishedAt'], '%Y-%m-%dT%H:%M:%SZ')
195
- all_articles.append({
196
- 'title': article['title'],
197
- 'source': article['source']['name'],
198
- 'date': pub_date,
199
- 'url': article['url'],
200
- 'relevance': strategy['relevance']
201
- })
 
 
 
 
 
 
202
 
203
- # Sort by date (newest first) and take the top 'count' articles
204
- all_articles.sort(key=lambda x: x['date'], reverse=True)
205
- all_articles = all_articles[:count]
206
 
207
- if all_articles:
208
  headlines = []
209
- for idx, article in enumerate(all_articles, 1):
210
  date_str = article['date'].strftime('%Y-%m-%d %H:%M UTC')
211
  relevance_indicator = "🎯" if article['relevance'] == 'high' else "✓"
212
  headlines.append(f"{idx}. {relevance_indicator} [{date_str}] {article['title']} ({article['source']})")
213
- return "\n".join(headlines)
 
 
 
 
 
214
  return f"No relevant news found for topic: {topic}"
215
  except Exception as e:
216
  return f"Error fetching news: {str(e)}"
 
121
  # Define search strategies with different parameters
122
  search_strategies = [
123
  {
124
+ 'query': f'"{topic}"', # Exact phrase match
 
125
  'relevance': 'high'
126
  },
127
  {
128
+ 'query': topic, # Normal search
 
129
  'relevance': 'high'
130
  },
131
  {
132
+ 'query': f"{topic} latest", # Latest news
 
 
 
 
 
 
133
  'relevance': 'medium'
134
  }
135
  ]
136
 
137
+ relevant_articles = [] # Store only relevant articles
138
  seen_titles = set()
139
  required_keywords = set(topic.lower().split())
140
 
 
152
  # Calculate relevance score
153
  total_score = (title_matches * 3) + (desc_matches * 2) + content_matches
154
 
155
+ # For exact phrase matching
156
  if relevance_level == 'high':
157
+ # Check if the exact topic phrase appears
158
+ if topic.lower() in title or topic.lower() in description:
159
+ return True
160
  return total_score >= len(required_words) * 2
 
 
161
  else:
162
+ return total_score >= len(required_words)
163
 
164
  for strategy in search_strategies:
165
+ if len(relevant_articles) >= count:
166
  break
167
 
168
+ # Calculate how many more articles we need
169
+ remaining_count = count - len(relevant_articles)
170
 
171
+ try:
172
+ news = newsapi.get_everything(
173
+ q=strategy['query'],
174
+ language='en',
175
+ sort_by='relevancy', # Changed to relevancy sort
176
+ page_size=min(50, remaining_count * 5) # Request more articles to filter through
177
+ )
178
+
179
+ if news['articles']:
180
+ for article in news['articles']:
181
+ # Skip if we've seen this title or have enough articles
182
+ if article['title'] in seen_titles:
183
+ continue
184
+
185
+ # Check if article is relevant enough
186
+ if is_relevant(article, required_keywords, strategy['relevance']):
187
+ seen_titles.add(article['title'])
188
+ pub_date = datetime.datetime.strptime(article['publishedAt'], '%Y-%m-%dT%H:%M:%SZ')
189
+ relevant_articles.append({
190
+ 'title': article['title'],
191
+ 'source': article['source']['name'],
192
+ 'date': pub_date,
193
+ 'url': article['url'],
194
+ 'relevance': strategy['relevance']
195
+ })
196
+
197
+ # Break if we have enough relevant articles
198
+ if len(relevant_articles) >= count:
199
+ break
200
+ except Exception as e:
201
+ continue # If one strategy fails, try the next one
202
 
203
+ # Sort by date (newest first)
204
+ relevant_articles.sort(key=lambda x: x['date'], reverse=True)
 
205
 
206
+ if relevant_articles:
207
  headlines = []
208
+ for idx, article in enumerate(relevant_articles, 1):
209
  date_str = article['date'].strftime('%Y-%m-%d %H:%M UTC')
210
  relevance_indicator = "🎯" if article['relevance'] == 'high' else "✓"
211
  headlines.append(f"{idx}. {relevance_indicator} [{date_str}] {article['title']} ({article['source']})")
212
+
213
+ # Add a summary of how many relevant articles were found
214
+ found_count = len(relevant_articles)
215
+ summary = f"Found {found_count} relevant {'article' if found_count == 1 else 'articles'} out of {count} requested.\n\n"
216
+ return summary + "\n".join(headlines)
217
+
218
  return f"No relevant news found for topic: {topic}"
219
  except Exception as e:
220
  return f"Error fetching news: {str(e)}"