Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1196,68 +1196,64 @@ tavily_tool = TavilySearchResults(
|
|
| 1196 |
search_depth="advanced",
|
| 1197 |
days=1,
|
| 1198 |
include_answer=True,
|
|
|
|
| 1199 |
include_raw_content=True,
|
| 1200 |
exclude_domains=['example.com'],
|
| 1201 |
)
|
| 1202 |
|
| 1203 |
-
def validate_tavily_results(query):
|
| 1204 |
-
|
| 1205 |
-
|
| 1206 |
-
|
| 1207 |
-
|
| 1208 |
-
|
| 1209 |
-
|
| 1210 |
-
|
| 1211 |
-
# Check if results are in expected format (list of dicts). Adjust if necessary.
|
| 1212 |
-
if isinstance(results, str):
|
| 1213 |
-
# If results are returned as a string, parse them into a structured format
|
| 1214 |
-
# For example, if results are JSON-formatted strings
|
| 1215 |
-
import json
|
| 1216 |
-
try:
|
| 1217 |
-
results = json.loads(results)
|
| 1218 |
-
except json.JSONDecodeError:
|
| 1219 |
-
# If parsing fails, split by newlines assuming URLs are separated by lines
|
| 1220 |
-
results = [{"title": f"Result {i+1}", "url": url.strip()} for i, url in enumerate(results.splitlines())]
|
| 1221 |
|
| 1222 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1223 |
|
| 1224 |
-
|
| 1225 |
-
url = result.get("url")
|
| 1226 |
-
if not url:
|
| 1227 |
-
continue
|
| 1228 |
|
| 1229 |
-
|
| 1230 |
-
|
| 1231 |
-
|
| 1232 |
-
|
| 1233 |
-
continue
|
| 1234 |
|
| 1235 |
-
|
| 1236 |
-
|
| 1237 |
-
|
|
|
|
|
|
|
| 1238 |
|
| 1239 |
-
|
| 1240 |
-
|
| 1241 |
-
|
| 1242 |
|
| 1243 |
-
|
| 1244 |
-
|
|
|
|
| 1245 |
|
| 1246 |
-
|
| 1247 |
-
|
| 1248 |
|
| 1249 |
-
|
|
|
|
| 1250 |
|
| 1251 |
-
#
|
| 1252 |
-
def tavily_tool_with_validation(query):
|
| 1253 |
-
valid_results = validate_tavily_results(query)
|
| 1254 |
-
return "\n".join([f"{res.get('title')}: {res.get('url')}" for res in valid_results])
|
| 1255 |
|
| 1256 |
|
| 1257 |
#Compile all tool functions into a list
|
| 1258 |
tools = [
|
| 1259 |
knowledge_base_tool, # Tool for querying the knowledge base and retrieving responses
|
| 1260 |
-
|
| 1261 |
#google_search_tool, # Tool for performing a Google search and retrieving search result snippets
|
| 1262 |
]
|
| 1263 |
|
|
|
|
| 1196 |
search_depth="advanced",
|
| 1197 |
days=1,
|
| 1198 |
include_answer=True,
|
| 1199 |
+
include_images=True,
|
| 1200 |
include_raw_content=True,
|
| 1201 |
exclude_domains=['example.com'],
|
| 1202 |
)
|
| 1203 |
|
| 1204 |
+
# def validate_tavily_results(query):
|
| 1205 |
+
# """
|
| 1206 |
+
# Fetch and validate results from TavilySearchResults.
|
| 1207 |
+
# Ensures that results do not lead to 'Page Not Found' pages.
|
| 1208 |
+
# """
|
| 1209 |
+
# # Use the Tavily tool to fetch results
|
| 1210 |
+
# results = tavily_tool.run(query) # Pass the query as tool_input to the Tavily tool
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1211 |
|
| 1212 |
+
# # Check if results are in expected format (list of dicts). Adjust if necessary.
|
| 1213 |
+
# if isinstance(results, str):
|
| 1214 |
+
# # If results are returned as a string, parse them into a structured format
|
| 1215 |
+
# # For example, if results are JSON-formatted strings
|
| 1216 |
+
# import json
|
| 1217 |
+
# try:
|
| 1218 |
+
# results = json.loads(results)
|
| 1219 |
+
# except json.JSONDecodeError:
|
| 1220 |
+
# # If parsing fails, split by newlines assuming URLs are separated by lines
|
| 1221 |
+
# results = [{"title": f"Result {i+1}", "url": url.strip()} for i, url in enumerate(results.splitlines())]
|
| 1222 |
|
| 1223 |
+
# valid_results = []
|
|
|
|
|
|
|
|
|
|
| 1224 |
|
| 1225 |
+
# for result in results:
|
| 1226 |
+
# url = result.get("url")
|
| 1227 |
+
# if not url:
|
| 1228 |
+
# continue
|
|
|
|
| 1229 |
|
| 1230 |
+
# try:
|
| 1231 |
+
# # Fetch page content
|
| 1232 |
+
# response = requests.get(url, timeout=10)
|
| 1233 |
+
# if response.status_code != 200:
|
| 1234 |
+
# continue
|
| 1235 |
|
| 1236 |
+
# # Parse page content to check for "Page Not Found"
|
| 1237 |
+
# soup = BeautifulSoup(response.content, "html.parser")
|
| 1238 |
+
# page_text = soup.get_text(separator=' ', strip=True)
|
| 1239 |
|
| 1240 |
+
# # Common markers of "Page Not Found"
|
| 1241 |
+
# if any(marker.lower() in page_text.lower() for marker in ["Page Not Found", "404", "Error", "Not Available"]):
|
| 1242 |
+
# continue
|
| 1243 |
|
| 1244 |
+
# # Add valid result
|
| 1245 |
+
# valid_results.append(result)
|
| 1246 |
|
| 1247 |
+
# except requests.RequestException:
|
| 1248 |
+
# continue
|
| 1249 |
|
| 1250 |
+
# return valid_results
|
|
|
|
|
|
|
|
|
|
| 1251 |
|
| 1252 |
|
| 1253 |
#Compile all tool functions into a list
|
| 1254 |
tools = [
|
| 1255 |
knowledge_base_tool, # Tool for querying the knowledge base and retrieving responses
|
| 1256 |
+
tavily_tool,
|
| 1257 |
#google_search_tool, # Tool for performing a Google search and retrieving search result snippets
|
| 1258 |
]
|
| 1259 |
|