Spaces:
Runtime error
Runtime error
multiple updates updated prompt + updated toolset
Browse files
helper.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
from langchain_experimental.utilities import PythonREPL
|
| 2 |
from langchain.tools import Tool
|
| 3 |
from langchain_community.tools import TavilySearchResults
|
|
|
|
| 4 |
|
| 5 |
# For newer LangChain versions, sometimes it's directly from langchain.tools.python
|
| 6 |
# from langchain.tools.python.tool import PythonREPLTool
|
|
@@ -233,212 +234,11 @@ from langchain_community.utilities import WikipediaAPIWrapper
|
|
| 233 |
from langchain.tools import Tool # Ensure Tool is imported
|
| 234 |
|
| 235 |
|
| 236 |
-
def get_wikipedia_tool() -> Tool:
|
| 237 |
-
"""
|
| 238 |
-
Creates and returns a LangChain Tool for querying Wikipedia.
|
| 239 |
-
"""
|
| 240 |
-
wikipedia_api_wrapper = WikipediaAPIWrapper(
|
| 241 |
-
top_k_results=3,
|
| 242 |
-
doc_content_chars_max=4000
|
| 243 |
-
)
|
| 244 |
-
|
| 245 |
-
wikipedia_query_tool = WikipediaQueryRun(api_wrapper=wikipedia_api_wrapper)
|
| 246 |
-
|
| 247 |
-
return Tool(
|
| 248 |
-
name="wikipedia_search_tool",
|
| 249 |
-
description="""
|
| 250 |
-
A specialized search tool for retrieving information from Wikipedia.
|
| 251 |
-
Use this tool when you need:
|
| 252 |
-
- Authoritative and well-established facts.
|
| 253 |
-
- Historical information, biographies, or scientific explanations.
|
| 254 |
-
- General knowledge about specific concepts, people, places, or events.
|
| 255 |
-
- Background information that is unlikely to be very recent.
|
| 256 |
-
|
| 257 |
-
**Input Format (CRITICAL):**
|
| 258 |
-
The input MUST be a concise and clear query string representing the topic you want to search on Wikipedia.
|
| 259 |
-
Think of a noun phrase or a short question that directly names the subject.
|
| 260 |
-
Example: "Albert Einstein"
|
| 261 |
-
Example: "Battle of Gettysburg"
|
| 262 |
-
Example: "photosynthesis process"
|
| 263 |
-
|
| 264 |
-
**DO NOT:**
|
| 265 |
-
- Ask natural language questions that are not search queries.
|
| 266 |
-
- Seek real-time information (e.g., current news, today's weather). For real-time data, use 'tavily_search'.
|
| 267 |
-
- Provide incomplete sentences or ambiguous terms.
|
| 268 |
-
- Expect this tool to perform calculations or access external websites beyond Wikipedia.
|
| 269 |
-
|
| 270 |
-
**Output:**
|
| 271 |
-
The tool returns a string containing snippets of relevant Wikipedia articles.
|
| 272 |
-
The output is limited in length to save tokens. If the answer is not found in the snippet,
|
| 273 |
-
you might need to refine your query or **consider using 'tavily_search' or other available tools for a broader search.**
|
| 274 |
-
""",
|
| 275 |
-
func=wikipedia_query_tool.run,
|
| 276 |
-
)
|
| 277 |
-
|
| 278 |
-
wikipedia_search_tool = get_wikipedia_tool()
|
| 279 |
-
|
| 280 |
-
import wikipedia
|
| 281 |
-
|
| 282 |
-
def wikipedia_full_content(query: str) -> str:
|
| 283 |
-
"""
|
| 284 |
-
Fetches the full content of the top Wikipedia article for a query.
|
| 285 |
-
If a section is specified in the query (e.g., "Mercedes Sosa Discography"),
|
| 286 |
-
it tries to extract that section.
|
| 287 |
-
"""
|
| 288 |
-
try:
|
| 289 |
-
# Try to split query into page and section
|
| 290 |
-
if " section:" in query:
|
| 291 |
-
page_query, section = query.split(" section:", 1)
|
| 292 |
-
else:
|
| 293 |
-
page_query, section = query, None
|
| 294 |
-
|
| 295 |
-
results = wikipedia.search(page_query)
|
| 296 |
-
if not results:
|
| 297 |
-
return "No Wikipedia article found for your query."
|
| 298 |
-
page = wikipedia.page(results[0])
|
| 299 |
-
content = page.content
|
| 300 |
-
|
| 301 |
-
# If a section is specified, try to extract it
|
| 302 |
-
if section:
|
| 303 |
-
import re
|
| 304 |
-
# Simple regex to extract section
|
| 305 |
-
pattern = rf"==+\s*{re.escape(section.strip())}\s*==+(.*?)(==+|$)"
|
| 306 |
-
match = re.search(pattern, content, re.DOTALL | re.IGNORECASE)
|
| 307 |
-
if match:
|
| 308 |
-
section_content = match.group(1).strip()
|
| 309 |
-
return section_content[:2000] # Limit for token safety
|
| 310 |
-
else:
|
| 311 |
-
return f"Section '{section}' not found. Returning start of article:\n\n{content[:2000]}"
|
| 312 |
-
else:
|
| 313 |
-
return content[:2000] # Limit for token safety
|
| 314 |
-
|
| 315 |
-
except Exception as e:
|
| 316 |
-
return f"Wikipedia tool error: {e}"
|
| 317 |
-
|
| 318 |
-
wikipedia_full_content_tool = Tool(
|
| 319 |
-
name="wikipedia_full_content_tool",
|
| 320 |
-
description="""
|
| 321 |
-
Fetches the full content (or a specific section) of a Wikipedia article for a given query.
|
| 322 |
-
Use this tool for questions about nominations, discographies, lists, or when you need more than a summary.
|
| 323 |
-
To get a section, use the format: "Page Title section: Section Name"
|
| 324 |
-
Example: "Mercedes Sosa section: Discography"
|
| 325 |
-
Example: "Dinosaur featured articles section: Featured article nominations"
|
| 326 |
-
""",
|
| 327 |
-
func=wikipedia_full_content,
|
| 328 |
-
)
|
| 329 |
-
|
| 330 |
import os
|
| 331 |
from serpapi import GoogleSearch # Or use SerpApiClient for other engines
|
| 332 |
from typing import Dict, Any
|
| 333 |
from langchain.tools import Tool # Import the Tool class
|
| 334 |
|
| 335 |
-
class SerpApiSearchTool:
|
| 336 |
-
"""
|
| 337 |
-
A tool to perform searches using SerpApi.
|
| 338 |
-
Supports various search engines and extracts structured data.
|
| 339 |
-
"""
|
| 340 |
-
def __init__(self):
|
| 341 |
-
# Retrieve API key from environment variables for security
|
| 342 |
-
self.api_key = os.getenv("SERPAPI_API_KEY")
|
| 343 |
-
|
| 344 |
-
if not self.api_key:
|
| 345 |
-
raise ValueError(
|
| 346 |
-
"SERPAPI_API_KEY must be set as an environment variable. "
|
| 347 |
-
"Get your API key from https://serpapi.com/dashboard"
|
| 348 |
-
)
|
| 349 |
-
|
| 350 |
-
def search_google(self, query: str, num_results: int = 5) -> str:
|
| 351 |
-
"""
|
| 352 |
-
Performs a Google search via SerpApi and returns a formatted string of organic results.
|
| 353 |
-
|
| 354 |
-
Args:
|
| 355 |
-
query (str): The search query string.
|
| 356 |
-
num_results (int): The number of organic search results to return (max 100).
|
| 357 |
-
|
| 358 |
-
Returns:
|
| 359 |
-
str: A formatted string containing the title, link, and snippet of each result.
|
| 360 |
-
Also includes any featured snippet or knowledge graph if available.
|
| 361 |
-
Returns an error message if the search fails or no results are found.
|
| 362 |
-
"""
|
| 363 |
-
if not query:
|
| 364 |
-
return "Error: Search query cannot be empty."
|
| 365 |
-
|
| 366 |
-
params = {
|
| 367 |
-
"api_key": self.api_key,
|
| 368 |
-
"engine": "google",
|
| 369 |
-
"q": query,
|
| 370 |
-
"num": num_results, # Number of organic results
|
| 371 |
-
"gl": "in", # Geo-location for the search (India in this case)
|
| 372 |
-
"hl": "en" # Host language for the search
|
| 373 |
-
}
|
| 374 |
-
|
| 375 |
-
try:
|
| 376 |
-
print(f"[TOOL: SerpApiSearch] Searching Google for: '{query}'")
|
| 377 |
-
search = GoogleSearch(params)
|
| 378 |
-
results = search.get_dict() # Execute the search and get results as a dictionary
|
| 379 |
-
|
| 380 |
-
formatted_output = []
|
| 381 |
-
|
| 382 |
-
# Check for common structured results first
|
| 383 |
-
if 'answer_box' in results and results['answer_box'].get('answer'):
|
| 384 |
-
formatted_output.append(f"Answer Box: {results['answer_box']['answer']}")
|
| 385 |
-
if 'knowledge_graph' in results and results['knowledge_graph'].get('description'):
|
| 386 |
-
formatted_output.append(f"Knowledge Graph: {results['knowledge_graph']['description']}")
|
| 387 |
-
if results['knowledge_graph'].get('title'):
|
| 388 |
-
formatted_output.append(f" Title: {results['knowledge_graph']['title']}")
|
| 389 |
-
if results['knowledge_graph'].get('link'):
|
| 390 |
-
formatted_output.append(f" Link: {results['knowledge_graph']['link']}")
|
| 391 |
-
|
| 392 |
-
# Then process organic results
|
| 393 |
-
organic_results = results.get('organic_results', [])
|
| 394 |
-
if organic_results:
|
| 395 |
-
if formatted_output: # Add a separator if other sections were added
|
| 396 |
-
formatted_output.append("\n--- Organic Results ---")
|
| 397 |
-
else:
|
| 398 |
-
formatted_output.append("Organic Results:")
|
| 399 |
-
for i, item in enumerate(organic_results):
|
| 400 |
-
title = item.get('title', 'No Title')
|
| 401 |
-
link = item.get('link', '#')
|
| 402 |
-
snippet = item.get('snippet', 'No Snippet')
|
| 403 |
-
formatted_output.append(
|
| 404 |
-
f"Result {i+1}:\n"
|
| 405 |
-
f" Title: {title}\n"
|
| 406 |
-
f" Link: {link}\n"
|
| 407 |
-
f" Snippet: {snippet}\n"
|
| 408 |
-
)
|
| 409 |
-
|
| 410 |
-
if not formatted_output: # If no structured data or organic results
|
| 411 |
-
return "No relevant search results found."
|
| 412 |
-
|
| 413 |
-
return "\n".join(formatted_output)
|
| 414 |
-
|
| 415 |
-
except Exception as e:
|
| 416 |
-
return f"Error performing SerpApi search: {e}"
|
| 417 |
-
|
| 418 |
-
# Instantiate the SerpApiSearchTool class
|
| 419 |
-
serpapi_search_instance = SerpApiSearchTool()
|
| 420 |
-
|
| 421 |
-
# Create the LangChain Tool object
|
| 422 |
-
serpapi_Google_Search_tool = Tool(
|
| 423 |
-
name="serpapi_Google Search",
|
| 424 |
-
description="""
|
| 425 |
-
Performs a Google search using SerpApi to get current and detailed information from the web.
|
| 426 |
-
Use this for factual queries, general knowledge, recent events, or when TavilySearch might not be sufficient.
|
| 427 |
-
It can return rich results including answer boxes, knowledge graphs, and multiple organic search results.
|
| 428 |
-
Input should be a clear, concise search query string.
|
| 429 |
-
""",
|
| 430 |
-
func=serpapi_search_instance.search_google,
|
| 431 |
-
)
|
| 432 |
-
|
| 433 |
-
# Remember to set your SERPAPI_API_KEY environment variable before running!
|
| 434 |
-
# Example: os.environ["SERPAPI_API_KEY"] = "YOUR_API_KEY_HERE"
|
| 435 |
-
|
| 436 |
-
# To use this tool, you would add `serpapi_Google Search_tool` to your `tools` list
|
| 437 |
-
# in your `BasicAgent` initialization, like this:
|
| 438 |
-
# tools = [travily_api_search_tool, python_repl, ..., serpapi_Google Search_tool]
|
| 439 |
-
#
|
| 440 |
-
# And you would need to update your prompt's "Available Tools" section
|
| 441 |
-
# to describe `serpapi_Google Search` to the LLM.
|
| 442 |
|
| 443 |
# In helper.py
|
| 444 |
|
|
@@ -538,4 +338,49 @@ gemini_multimodal_tool = Tool(
|
|
| 538 |
name="gemini_multimodal_tool",
|
| 539 |
description=analyze_image_with_gemini.__doc__, # Use the docstring as description
|
| 540 |
func=analyze_image_with_gemini,
|
| 541 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from langchain_experimental.utilities import PythonREPL
|
| 2 |
from langchain.tools import Tool
|
| 3 |
from langchain_community.tools import TavilySearchResults
|
| 4 |
+
import re
|
| 5 |
|
| 6 |
# For newer LangChain versions, sometimes it's directly from langchain.tools.python
|
| 7 |
# from langchain.tools.python.tool import PythonREPLTool
|
|
|
|
| 234 |
from langchain.tools import Tool # Ensure Tool is imported
|
| 235 |
|
| 236 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
import os
|
| 238 |
from serpapi import GoogleSearch # Or use SerpApiClient for other engines
|
| 239 |
from typing import Dict, Any
|
| 240 |
from langchain.tools import Tool # Import the Tool class
|
| 241 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 242 |
|
| 243 |
# In helper.py
|
| 244 |
|
|
|
|
| 338 |
name="gemini_multimodal_tool",
|
| 339 |
description=analyze_image_with_gemini.__doc__, # Use the docstring as description
|
| 340 |
func=analyze_image_with_gemini,
|
| 341 |
+
)
|
| 342 |
+
|
| 343 |
+
from langchain_community.document_loaders import WikipediaLoader
|
| 344 |
+
|
| 345 |
+
def wiki_search(query: str) -> str:
|
| 346 |
+
"""Search Wikipedia for a query and return maximum 2 results.
|
| 347 |
+
|
| 348 |
+
Args:
|
| 349 |
+
query: The search query.
|
| 350 |
+
Returns:
|
| 351 |
+
A string with formatted Wikipedia search results.
|
| 352 |
+
"""
|
| 353 |
+
search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
|
| 354 |
+
formatted_search_docs = "\n\n---\n\n".join(
|
| 355 |
+
[
|
| 356 |
+
f'<Document source="{doc.metadata.get("source", "")}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
|
| 357 |
+
for doc in search_docs
|
| 358 |
+
])
|
| 359 |
+
return formatted_search_docs
|
| 360 |
+
|
| 361 |
+
wikipedia_search_tool = Tool(
|
| 362 |
+
name="wikipedia_search_tool",
|
| 363 |
+
description=wiki_search.__doc__,
|
| 364 |
+
func=wiki_search,
|
| 365 |
+
)
|
| 366 |
+
|
| 367 |
+
def load_local_text_file(path: str) -> str:
|
| 368 |
+
"""
|
| 369 |
+
Load the content of a text file and return its contents as a string.
|
| 370 |
+
This tool is not appropriate for pdf, xlsx, jpg, or other binary formats - it only works for text files like txt and py files.
|
| 371 |
+
|
| 372 |
+
Args:
|
| 373 |
+
path: the path to the file to be read
|
| 374 |
+
"""
|
| 375 |
+
try:
|
| 376 |
+
with open(path, 'r') as f:
|
| 377 |
+
resp = f.read()
|
| 378 |
+
return resp
|
| 379 |
+
except Exception as e:
|
| 380 |
+
return f"Error loading file '{path}': {e}"
|
| 381 |
+
|
| 382 |
+
load_text_file_tool = Tool(
|
| 383 |
+
name="load_text_file_tool",
|
| 384 |
+
description=load_local_text_file.__doc__,
|
| 385 |
+
func=load_local_text_file,
|
| 386 |
+
)
|