Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -30,7 +30,7 @@ def search_news_headlines(source: str) -> str:
|
|
| 30 |
Formatted news results or error message
|
| 31 |
"""
|
| 32 |
domain_map = {
|
| 33 |
-
"BBC News": "bbc.com",
|
| 34 |
"CNN": "cnn.com",
|
| 35 |
"Reuters": "reuters.com",
|
| 36 |
"AP": "apnews.com",
|
|
@@ -45,20 +45,29 @@ def search_news_headlines(source: str) -> str:
|
|
| 45 |
if not domain:
|
| 46 |
return f"Error: {source} is not a supported news source"
|
| 47 |
|
| 48 |
-
#
|
| 49 |
-
search_query = f"site:{domain}
|
| 50 |
|
| 51 |
# Use existing DuckDuckGoSearchTool implementation
|
| 52 |
ddg_tool = DuckDuckGoSearchTool(max_results=5)
|
| 53 |
raw_results = ddg_tool.forward(search_query)
|
| 54 |
|
| 55 |
# Parse the raw results to extract titles and links
|
| 56 |
-
# DuckDuckGo results come in markdown format: [title](link)
|
| 57 |
import re
|
| 58 |
headlines = re.findall(r'\[(.*?)\]\((.*?)\)', raw_results)
|
| 59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
# Format the results
|
| 61 |
-
formatted_headlines = [f"{i+1}. {title} {link}" for i, (title, link) in enumerate(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
return f"Here are the latest news from {source}:\n\n" + "\n".join(formatted_headlines)
|
| 63 |
|
| 64 |
except Exception as e:
|
|
@@ -82,7 +91,7 @@ with open("prompts.yaml", 'r') as stream:
|
|
| 82 |
agent = CodeAgent(
|
| 83 |
model=model,
|
| 84 |
tools=[final_answer, search_news_headlines], ## add your tools here (don't remove final answer)
|
| 85 |
-
max_steps=
|
| 86 |
verbosity_level=1,
|
| 87 |
grammar=None,
|
| 88 |
planning_interval=None,
|
|
|
|
| 30 |
Formatted news results or error message
|
| 31 |
"""
|
| 32 |
domain_map = {
|
| 33 |
+
"BBC News": "bbc.com/news",
|
| 34 |
"CNN": "cnn.com",
|
| 35 |
"Reuters": "reuters.com",
|
| 36 |
"AP": "apnews.com",
|
|
|
|
| 45 |
if not domain:
|
| 46 |
return f"Error: {source} is not a supported news source"
|
| 47 |
|
| 48 |
+
# Modified search query to better target actual news articles
|
| 49 |
+
search_query = f"site:{domain} -inurl:video -inurl:about -inurl:contact -inurl:search news article when:7d"
|
| 50 |
|
| 51 |
# Use existing DuckDuckGoSearchTool implementation
|
| 52 |
ddg_tool = DuckDuckGoSearchTool(max_results=5)
|
| 53 |
raw_results = ddg_tool.forward(search_query)
|
| 54 |
|
| 55 |
# Parse the raw results to extract titles and links
|
|
|
|
| 56 |
import re
|
| 57 |
headlines = re.findall(r'\[(.*?)\]\((.*?)\)', raw_results)
|
| 58 |
|
| 59 |
+
# Filter out navigation/section pages
|
| 60 |
+
filtered_headlines = []
|
| 61 |
+
for title, link in headlines:
|
| 62 |
+
if not any(x in title.lower() for x in ['breaking news updates', 'latest news headlines', 'news video', 'video clips', 'news article', 'breaking news', 'news']):
|
| 63 |
+
filtered_headlines.append((title, link))
|
| 64 |
+
|
| 65 |
# Format the results
|
| 66 |
+
formatted_headlines = [f"{i+1}. {title} {link}" for i, (title, link) in enumerate(filtered_headlines[:5])]
|
| 67 |
+
|
| 68 |
+
if not formatted_headlines:
|
| 69 |
+
return f"No recent news articles found from {source}. Please try again."
|
| 70 |
+
|
| 71 |
return f"Here are the latest news from {source}:\n\n" + "\n".join(formatted_headlines)
|
| 72 |
|
| 73 |
except Exception as e:
|
|
|
|
| 91 |
agent = CodeAgent(
|
| 92 |
model=model,
|
| 93 |
tools=[final_answer, search_news_headlines], ## add your tools here (don't remove final answer)
|
| 94 |
+
max_steps=10,
|
| 95 |
verbosity_level=1,
|
| 96 |
grammar=None,
|
| 97 |
planning_interval=None,
|