Wajahat698 commited on
Commit
536c3f9
·
verified ·
1 Parent(s): 368f938

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -44
app.py CHANGED
@@ -1196,68 +1196,64 @@ tavily_tool = TavilySearchResults(
1196
  search_depth="advanced",
1197
  days=1,
1198
  include_answer=True,
 
1199
  include_raw_content=True,
1200
  exclude_domains=['example.com'],
1201
  )
1202
 
1203
- def validate_tavily_results(query):
1204
- """
1205
- Fetch and validate results from TavilySearchResults.
1206
- Ensures that results do not lead to 'Page Not Found' pages.
1207
- """
1208
- # Use the Tavily tool to fetch results
1209
- results = tavily_tool.run(query) # Pass the query as tool_input to the Tavily tool
1210
-
1211
- # Check if results are in expected format (list of dicts). Adjust if necessary.
1212
- if isinstance(results, str):
1213
- # If results are returned as a string, parse them into a structured format
1214
- # For example, if results are JSON-formatted strings
1215
- import json
1216
- try:
1217
- results = json.loads(results)
1218
- except json.JSONDecodeError:
1219
- # If parsing fails, split by newlines assuming URLs are separated by lines
1220
- results = [{"title": f"Result {i+1}", "url": url.strip()} for i, url in enumerate(results.splitlines())]
1221
 
1222
- valid_results = []
 
 
 
 
 
 
 
 
 
1223
 
1224
- for result in results:
1225
- url = result.get("url")
1226
- if not url:
1227
- continue
1228
 
1229
- try:
1230
- # Fetch page content
1231
- response = requests.get(url, timeout=10)
1232
- if response.status_code != 200:
1233
- continue
1234
 
1235
- # Parse page content to check for "Page Not Found"
1236
- soup = BeautifulSoup(response.content, "html.parser")
1237
- page_text = soup.get_text(separator=' ', strip=True)
 
 
1238
 
1239
- # Common markers of "Page Not Found"
1240
- if any(marker.lower() in page_text.lower() for marker in ["Page Not Found", "404", "Error", "Not Available"]):
1241
- continue
1242
 
1243
- # Add valid result
1244
- valid_results.append(result)
 
1245
 
1246
- except requests.RequestException:
1247
- continue
1248
 
1249
- return valid_results
 
1250
 
1251
- # Define a custom tool for Tavily
1252
- def tavily_tool_with_validation(query):
1253
- valid_results = validate_tavily_results(query)
1254
- return "\n".join([f"{res.get('title')}: {res.get('url')}" for res in valid_results])
1255
 
1256
 
1257
  #Compile all tool functions into a list
1258
  tools = [
1259
  knowledge_base_tool, # Tool for querying the knowledge base and retrieving responses
1260
- google_search_tool,
1261
  #google_search_tool, # Tool for performing a Google search and retrieving search result snippets
1262
  ]
1263
 
 
1196
  search_depth="advanced",
1197
  days=1,
1198
  include_answer=True,
1199
+ include_images=True,
1200
  include_raw_content=True,
1201
  exclude_domains=['example.com'],
1202
  )
1203
 
1204
+ # def validate_tavily_results(query):
1205
+ # """
1206
+ # Fetch and validate results from TavilySearchResults.
1207
+ # Ensures that results do not lead to 'Page Not Found' pages.
1208
+ # """
1209
+ # # Use the Tavily tool to fetch results
1210
+ # results = tavily_tool.run(query) # Pass the query as tool_input to the Tavily tool
 
 
 
 
 
 
 
 
 
 
 
1211
 
1212
+ # # Check if results are in expected format (list of dicts). Adjust if necessary.
1213
+ # if isinstance(results, str):
1214
+ # # If results are returned as a string, parse them into a structured format
1215
+ # # For example, if results are JSON-formatted strings
1216
+ # import json
1217
+ # try:
1218
+ # results = json.loads(results)
1219
+ # except json.JSONDecodeError:
1220
+ # # If parsing fails, split by newlines assuming URLs are separated by lines
1221
+ # results = [{"title": f"Result {i+1}", "url": url.strip()} for i, url in enumerate(results.splitlines())]
1222
 
1223
+ # valid_results = []
 
 
 
1224
 
1225
+ # for result in results:
1226
+ # url = result.get("url")
1227
+ # if not url:
1228
+ # continue
 
1229
 
1230
+ # try:
1231
+ # # Fetch page content
1232
+ # response = requests.get(url, timeout=10)
1233
+ # if response.status_code != 200:
1234
+ # continue
1235
 
1236
+ # # Parse page content to check for "Page Not Found"
1237
+ # soup = BeautifulSoup(response.content, "html.parser")
1238
+ # page_text = soup.get_text(separator=' ', strip=True)
1239
 
1240
+ # # Common markers of "Page Not Found"
1241
+ # if any(marker.lower() in page_text.lower() for marker in ["Page Not Found", "404", "Error", "Not Available"]):
1242
+ # continue
1243
 
1244
+ # # Add valid result
1245
+ # valid_results.append(result)
1246
 
1247
+ # except requests.RequestException:
1248
+ # continue
1249
 
1250
+ # return valid_results
 
 
 
1251
 
1252
 
1253
  #Compile all tool functions into a list
1254
  tools = [
1255
  knowledge_base_tool, # Tool for querying the knowledge base and retrieving responses
1256
+ tavily_tool,
1257
  #google_search_tool, # Tool for performing a Google search and retrieving search result snippets
1258
  ]
1259