Gabandino commited on
Commit
231c862
·
verified ·
1 Parent(s): 32eae92

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -6
app.py CHANGED
@@ -30,7 +30,7 @@ def search_news_headlines(source: str) -> str:
30
  Formatted news results or error message
31
  """
32
  domain_map = {
33
- "BBC News": "bbc.com",
34
  "CNN": "cnn.com",
35
  "Reuters": "reuters.com",
36
  "AP": "apnews.com",
@@ -45,20 +45,29 @@ def search_news_headlines(source: str) -> str:
45
  if not domain:
46
  return f"Error: {source} is not a supported news source"
47
 
48
- # Create search query with site filter and time filter
49
- search_query = f"site:{domain} latest news after:2023-01-01"
50
 
51
  # Use existing DuckDuckGoSearchTool implementation
52
  ddg_tool = DuckDuckGoSearchTool(max_results=5)
53
  raw_results = ddg_tool.forward(search_query)
54
 
55
  # Parse the raw results to extract titles and links
56
- # DuckDuckGo results come in markdown format: [title](link)
57
  import re
58
  headlines = re.findall(r'\[(.*?)\]\((.*?)\)', raw_results)
59
 
 
 
 
 
 
 
60
  # Format the results
61
- formatted_headlines = [f"{i+1}. {title} {link}" for i, (title, link) in enumerate(headlines[:5])]
 
 
 
 
62
  return f"Here are the latest news from {source}:\n\n" + "\n".join(formatted_headlines)
63
 
64
  except Exception as e:
@@ -82,7 +91,7 @@ with open("prompts.yaml", 'r') as stream:
82
  agent = CodeAgent(
83
  model=model,
84
  tools=[final_answer, search_news_headlines], ## add your tools here (don't remove final answer)
85
- max_steps=6,
86
  verbosity_level=1,
87
  grammar=None,
88
  planning_interval=None,
 
30
  Formatted news results or error message
31
  """
32
  domain_map = {
33
+ "BBC News": "bbc.com/news",
34
  "CNN": "cnn.com",
35
  "Reuters": "reuters.com",
36
  "AP": "apnews.com",
 
45
  if not domain:
46
  return f"Error: {source} is not a supported news source"
47
 
48
+ # Modified search query to better target actual news articles
49
+ search_query = f"site:{domain} -inurl:video -inurl:about -inurl:contact -inurl:search news article when:7d"
50
 
51
  # Use existing DuckDuckGoSearchTool implementation
52
  ddg_tool = DuckDuckGoSearchTool(max_results=5)
53
  raw_results = ddg_tool.forward(search_query)
54
 
55
  # Parse the raw results to extract titles and links
 
56
  import re
57
  headlines = re.findall(r'\[(.*?)\]\((.*?)\)', raw_results)
58
 
59
+ # Filter out navigation/section pages
60
+ filtered_headlines = []
61
+ for title, link in headlines:
62
+ if not any(x in title.lower() for x in ['breaking news updates', 'latest news headlines', 'news video', 'video clips', 'news article', 'breaking news', 'news']):
63
+ filtered_headlines.append((title, link))
64
+
65
  # Format the results
66
+ formatted_headlines = [f"{i+1}. {title} {link}" for i, (title, link) in enumerate(filtered_headlines[:5])]
67
+
68
+ if not formatted_headlines:
69
+ return f"No recent news articles found from {source}. Please try again."
70
+
71
  return f"Here are the latest news from {source}:\n\n" + "\n".join(formatted_headlines)
72
 
73
  except Exception as e:
 
91
  agent = CodeAgent(
92
  model=model,
93
  tools=[final_answer, search_news_headlines], ## add your tools here (don't remove final answer)
94
+ max_steps=10,
95
  verbosity_level=1,
96
  grammar=None,
97
  planning_interval=None,