nikhmr1235 commited on
Commit
b8ccbf5
·
verified ·
1 Parent(s): 5101b9c

multiple updates updated prompt + updated toolset

Browse files
Files changed (1) hide show
  1. helper.py +47 -202
helper.py CHANGED
@@ -1,6 +1,7 @@
1
  from langchain_experimental.utilities import PythonREPL
2
  from langchain.tools import Tool
3
  from langchain_community.tools import TavilySearchResults
 
4
 
5
  # For newer LangChain versions, sometimes it's directly from langchain.tools.python
6
  # from langchain.tools.python.tool import PythonREPLTool
@@ -233,212 +234,11 @@ from langchain_community.utilities import WikipediaAPIWrapper
233
  from langchain.tools import Tool # Ensure Tool is imported
234
 
235
 
236
- def get_wikipedia_tool() -> Tool:
237
- """
238
- Creates and returns a LangChain Tool for querying Wikipedia.
239
- """
240
- wikipedia_api_wrapper = WikipediaAPIWrapper(
241
- top_k_results=3,
242
- doc_content_chars_max=4000
243
- )
244
-
245
- wikipedia_query_tool = WikipediaQueryRun(api_wrapper=wikipedia_api_wrapper)
246
-
247
- return Tool(
248
- name="wikipedia_search_tool",
249
- description="""
250
- A specialized search tool for retrieving information from Wikipedia.
251
- Use this tool when you need:
252
- - Authoritative and well-established facts.
253
- - Historical information, biographies, or scientific explanations.
254
- - General knowledge about specific concepts, people, places, or events.
255
- - Background information that is unlikely to be very recent.
256
-
257
- **Input Format (CRITICAL):**
258
- The input MUST be a concise and clear query string representing the topic you want to search on Wikipedia.
259
- Think of a noun phrase or a short question that directly names the subject.
260
- Example: "Albert Einstein"
261
- Example: "Battle of Gettysburg"
262
- Example: "photosynthesis process"
263
-
264
- **DO NOT:**
265
- - Ask natural language questions that are not search queries.
266
- - Seek real-time information (e.g., current news, today's weather). For real-time data, use 'tavily_search'.
267
- - Provide incomplete sentences or ambiguous terms.
268
- - Expect this tool to perform calculations or access external websites beyond Wikipedia.
269
-
270
- **Output:**
271
- The tool returns a string containing snippets of relevant Wikipedia articles.
272
- The output is limited in length to save tokens. If the answer is not found in the snippet,
273
- you might need to refine your query or **consider using 'tavily_search' or other available tools for a broader search.**
274
- """,
275
- func=wikipedia_query_tool.run,
276
- )
277
-
278
- wikipedia_search_tool = get_wikipedia_tool()
279
-
280
- import wikipedia
281
-
282
- def wikipedia_full_content(query: str) -> str:
283
- """
284
- Fetches the full content of the top Wikipedia article for a query.
285
- If a section is specified in the query (e.g., "Mercedes Sosa Discography"),
286
- it tries to extract that section.
287
- """
288
- try:
289
- # Try to split query into page and section
290
- if " section:" in query:
291
- page_query, section = query.split(" section:", 1)
292
- else:
293
- page_query, section = query, None
294
-
295
- results = wikipedia.search(page_query)
296
- if not results:
297
- return "No Wikipedia article found for your query."
298
- page = wikipedia.page(results[0])
299
- content = page.content
300
-
301
- # If a section is specified, try to extract it
302
- if section:
303
- import re
304
- # Simple regex to extract section
305
- pattern = rf"==+\s*{re.escape(section.strip())}\s*==+(.*?)(==+|$)"
306
- match = re.search(pattern, content, re.DOTALL | re.IGNORECASE)
307
- if match:
308
- section_content = match.group(1).strip()
309
- return section_content[:2000] # Limit for token safety
310
- else:
311
- return f"Section '{section}' not found. Returning start of article:\n\n{content[:2000]}"
312
- else:
313
- return content[:2000] # Limit for token safety
314
-
315
- except Exception as e:
316
- return f"Wikipedia tool error: {e}"
317
-
318
- wikipedia_full_content_tool = Tool(
319
- name="wikipedia_full_content_tool",
320
- description="""
321
- Fetches the full content (or a specific section) of a Wikipedia article for a given query.
322
- Use this tool for questions about nominations, discographies, lists, or when you need more than a summary.
323
- To get a section, use the format: "Page Title section: Section Name"
324
- Example: "Mercedes Sosa section: Discography"
325
- Example: "Dinosaur featured articles section: Featured article nominations"
326
- """,
327
- func=wikipedia_full_content,
328
- )
329
-
330
  import os
331
  from serpapi import GoogleSearch # Or use SerpApiClient for other engines
332
  from typing import Dict, Any
333
  from langchain.tools import Tool # Import the Tool class
334
 
335
- class SerpApiSearchTool:
336
- """
337
- A tool to perform searches using SerpApi.
338
- Supports various search engines and extracts structured data.
339
- """
340
- def __init__(self):
341
- # Retrieve API key from environment variables for security
342
- self.api_key = os.getenv("SERPAPI_API_KEY")
343
-
344
- if not self.api_key:
345
- raise ValueError(
346
- "SERPAPI_API_KEY must be set as an environment variable. "
347
- "Get your API key from https://serpapi.com/dashboard"
348
- )
349
-
350
- def search_google(self, query: str, num_results: int = 5) -> str:
351
- """
352
- Performs a Google search via SerpApi and returns a formatted string of organic results.
353
-
354
- Args:
355
- query (str): The search query string.
356
- num_results (int): The number of organic search results to return (max 100).
357
-
358
- Returns:
359
- str: A formatted string containing the title, link, and snippet of each result.
360
- Also includes any featured snippet or knowledge graph if available.
361
- Returns an error message if the search fails or no results are found.
362
- """
363
- if not query:
364
- return "Error: Search query cannot be empty."
365
-
366
- params = {
367
- "api_key": self.api_key,
368
- "engine": "google",
369
- "q": query,
370
- "num": num_results, # Number of organic results
371
- "gl": "in", # Geo-location for the search (India in this case)
372
- "hl": "en" # Host language for the search
373
- }
374
-
375
- try:
376
- print(f"[TOOL: SerpApiSearch] Searching Google for: '{query}'")
377
- search = GoogleSearch(params)
378
- results = search.get_dict() # Execute the search and get results as a dictionary
379
-
380
- formatted_output = []
381
-
382
- # Check for common structured results first
383
- if 'answer_box' in results and results['answer_box'].get('answer'):
384
- formatted_output.append(f"Answer Box: {results['answer_box']['answer']}")
385
- if 'knowledge_graph' in results and results['knowledge_graph'].get('description'):
386
- formatted_output.append(f"Knowledge Graph: {results['knowledge_graph']['description']}")
387
- if results['knowledge_graph'].get('title'):
388
- formatted_output.append(f" Title: {results['knowledge_graph']['title']}")
389
- if results['knowledge_graph'].get('link'):
390
- formatted_output.append(f" Link: {results['knowledge_graph']['link']}")
391
-
392
- # Then process organic results
393
- organic_results = results.get('organic_results', [])
394
- if organic_results:
395
- if formatted_output: # Add a separator if other sections were added
396
- formatted_output.append("\n--- Organic Results ---")
397
- else:
398
- formatted_output.append("Organic Results:")
399
- for i, item in enumerate(organic_results):
400
- title = item.get('title', 'No Title')
401
- link = item.get('link', '#')
402
- snippet = item.get('snippet', 'No Snippet')
403
- formatted_output.append(
404
- f"Result {i+1}:\n"
405
- f" Title: {title}\n"
406
- f" Link: {link}\n"
407
- f" Snippet: {snippet}\n"
408
- )
409
-
410
- if not formatted_output: # If no structured data or organic results
411
- return "No relevant search results found."
412
-
413
- return "\n".join(formatted_output)
414
-
415
- except Exception as e:
416
- return f"Error performing SerpApi search: {e}"
417
-
418
- # Instantiate the SerpApiSearchTool class
419
- serpapi_search_instance = SerpApiSearchTool()
420
-
421
- # Create the LangChain Tool object
422
- serpapi_Google_Search_tool = Tool(
423
- name="serpapi_Google Search",
424
- description="""
425
- Performs a Google search using SerpApi to get current and detailed information from the web.
426
- Use this for factual queries, general knowledge, recent events, or when TavilySearch might not be sufficient.
427
- It can return rich results including answer boxes, knowledge graphs, and multiple organic search results.
428
- Input should be a clear, concise search query string.
429
- """,
430
- func=serpapi_search_instance.search_google,
431
- )
432
-
433
- # Remember to set your SERPAPI_API_KEY environment variable before running!
434
- # Example: os.environ["SERPAPI_API_KEY"] = "YOUR_API_KEY_HERE"
435
-
436
- # To use this tool, you would add `serpapi_Google Search_tool` to your `tools` list
437
- # in your `BasicAgent` initialization, like this:
438
- # tools = [travily_api_search_tool, python_repl, ..., serpapi_Google Search_tool]
439
- #
440
- # And you would need to update your prompt's "Available Tools" section
441
- # to describe `serpapi_Google Search` to the LLM.
442
 
443
  # In helper.py
444
 
@@ -538,4 +338,49 @@ gemini_multimodal_tool = Tool(
538
  name="gemini_multimodal_tool",
539
  description=analyze_image_with_gemini.__doc__, # Use the docstring as description
540
  func=analyze_image_with_gemini,
541
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from langchain_experimental.utilities import PythonREPL
2
  from langchain.tools import Tool
3
  from langchain_community.tools import TavilySearchResults
4
+ import re
5
 
6
  # For newer LangChain versions, sometimes it's directly from langchain.tools.python
7
  # from langchain.tools.python.tool import PythonREPLTool
 
234
  from langchain.tools import Tool # Ensure Tool is imported
235
 
236
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
  import os
238
  from serpapi import GoogleSearch # Or use SerpApiClient for other engines
239
  from typing import Dict, Any
240
  from langchain.tools import Tool # Import the Tool class
241
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
 
243
  # In helper.py
244
 
 
338
  name="gemini_multimodal_tool",
339
  description=analyze_image_with_gemini.__doc__, # Use the docstring as description
340
  func=analyze_image_with_gemini,
341
+ )
342
+
343
+ from langchain_community.document_loaders import WikipediaLoader
344
+
345
+ def wiki_search(query: str) -> str:
346
+ """Search Wikipedia for a query and return maximum 2 results.
347
+
348
+ Args:
349
+ query: The search query.
350
+ Returns:
351
+ A string with formatted Wikipedia search results.
352
+ """
353
+ search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
354
+ formatted_search_docs = "\n\n---\n\n".join(
355
+ [
356
+ f'<Document source="{doc.metadata.get("source", "")}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
357
+ for doc in search_docs
358
+ ])
359
+ return formatted_search_docs
360
+
361
+ wikipedia_search_tool = Tool(
362
+ name="wikipedia_search_tool",
363
+ description=wiki_search.__doc__,
364
+ func=wiki_search,
365
+ )
366
+
367
+ def load_local_text_file(path: str) -> str:
368
+ """
369
+ Load the content of a text file and return its contents as a string.
370
+ This tool is not appropriate for pdf, xlsx, jpg, or other binary formats - it only works for text files like txt and py files.
371
+
372
+ Args:
373
+ path: the path to the file to be read
374
+ """
375
+ try:
376
+ with open(path, 'r') as f:
377
+ resp = f.read()
378
+ return resp
379
+ except Exception as e:
380
+ return f"Error loading file '{path}': {e}"
381
+
382
+ load_text_file_tool = Tool(
383
+ name="load_text_file_tool",
384
+ description=load_local_text_file.__doc__,
385
+ func=load_local_text_file,
386
+ )