Final_Assignment_Template

Runtime error

App Files Files Community

nikhmr1235 commited on Jun 21, 2025

Commit

b8ccbf5

verified ·

1 Parent(s): 5101b9c

multiple updates updated prompt + updated toolset

Browse files

Files changed (1) hide show

helper.py +47 -202

helper.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from langchain_experimental.utilities import PythonREPL
 from langchain.tools import Tool
 from langchain_community.tools import TavilySearchResults
 # For newer LangChain versions, sometimes it's directly from langchain.tools.python
 # from langchain.tools.python.tool import PythonREPLTool
@@ -233,212 +234,11 @@ from langchain_community.utilities import WikipediaAPIWrapper
 from langchain.tools import Tool # Ensure Tool is imported
-def get_wikipedia_tool() -> Tool:
-    """
-    Creates and returns a LangChain Tool for querying Wikipedia.
-    """
-    wikipedia_api_wrapper = WikipediaAPIWrapper(
-        top_k_results=3,
-        doc_content_chars_max=4000
-    )
-    wikipedia_query_tool = WikipediaQueryRun(api_wrapper=wikipedia_api_wrapper)
-    return Tool(
-        name="wikipedia_search_tool",
-        description="""
-        A specialized search tool for retrieving information from Wikipedia.
-        Use this tool when you need:
-        - Authoritative and well-established facts.
-        - Historical information, biographies, or scientific explanations.
-        - General knowledge about specific concepts, people, places, or events.
-        - Background information that is unlikely to be very recent.
-        **Input Format (CRITICAL):**
-        The input MUST be a concise and clear query string representing the topic you want to search on Wikipedia.
-        Think of a noun phrase or a short question that directly names the subject.
-        Example: "Albert Einstein"
-        Example: "Battle of Gettysburg"
-        Example: "photosynthesis process"
-        **DO NOT:**
-        - Ask natural language questions that are not search queries.
-        - Seek real-time information (e.g., current news, today's weather). For real-time data, use 'tavily_search'.
-        - Provide incomplete sentences or ambiguous terms.
-        - Expect this tool to perform calculations or access external websites beyond Wikipedia.
-        **Output:**
-        The tool returns a string containing snippets of relevant Wikipedia articles.
-        The output is limited in length to save tokens. If the answer is not found in the snippet,
-        you might need to refine your query or **consider using 'tavily_search' or other available tools for a broader search.**
-        """,
-        func=wikipedia_query_tool.run,
-    )
-wikipedia_search_tool = get_wikipedia_tool()
-import wikipedia
-def wikipedia_full_content(query: str) -> str:
-    """
-    Fetches the full content of the top Wikipedia article for a query.
-    If a section is specified in the query (e.g., "Mercedes Sosa Discography"),
-    it tries to extract that section.
-    """
-    try:
-        # Try to split query into page and section
-        if " section:" in query:
-            page_query, section = query.split(" section:", 1)
-        else:
-            page_query, section = query, None
-        results = wikipedia.search(page_query)
-        if not results:
-            return "No Wikipedia article found for your query."
-        page = wikipedia.page(results[0])
-        content = page.content
-        # If a section is specified, try to extract it
-        if section:
-            import re
-            # Simple regex to extract section
-            pattern = rf"==+\s*{re.escape(section.strip())}\s*==+(.*?)(==+|$)"
-            match = re.search(pattern, content, re.DOTALL | re.IGNORECASE)
-            if match:
-                section_content = match.group(1).strip()
-                return section_content[:2000]  # Limit for token safety
-            else:
-                return f"Section '{section}' not found. Returning start of article:\n\n{content[:2000]}"
-        else:
-            return content[:2000]  # Limit for token safety
-    except Exception as e:
-        return f"Wikipedia tool error: {e}"
-wikipedia_full_content_tool = Tool(
-    name="wikipedia_full_content_tool",
-    description="""
-    Fetches the full content (or a specific section) of a Wikipedia article for a given query.
-    Use this tool for questions about nominations, discographies, lists, or when you need more than a summary.
-    To get a section, use the format: "Page Title section: Section Name"
-    Example: "Mercedes Sosa section: Discography"
-    Example: "Dinosaur featured articles section: Featured article nominations"
-    """,
-    func=wikipedia_full_content,
-)
 import os
 from serpapi import GoogleSearch # Or use SerpApiClient for other engines
 from typing import Dict, Any
 from langchain.tools import Tool # Import the Tool class
-class SerpApiSearchTool:
-    """
-    A tool to perform searches using SerpApi.
-    Supports various search engines and extracts structured data.
-    """
-    def __init__(self):
-        # Retrieve API key from environment variables for security
-        self.api_key = os.getenv("SERPAPI_API_KEY")
-        if not self.api_key:
-            raise ValueError(
-                "SERPAPI_API_KEY must be set as an environment variable. "
-                "Get your API key from https://serpapi.com/dashboard"
-            )
-    def search_google(self, query: str, num_results: int = 5) -> str:
-        """
-        Performs a Google search via SerpApi and returns a formatted string of organic results.
-        Args:
-            query (str): The search query string.
-            num_results (int): The number of organic search results to return (max 100).
-        Returns:
-            str: A formatted string containing the title, link, and snippet of each result.
-                 Also includes any featured snippet or knowledge graph if available.
-                 Returns an error message if the search fails or no results are found.
-        """
-        if not query:
-            return "Error: Search query cannot be empty."
-        params = {
-            "api_key": self.api_key,
-            "engine": "google",
-            "q": query,
-            "num": num_results, # Number of organic results
-            "gl": "in",         # Geo-location for the search (India in this case)
-            "hl": "en"          # Host language for the search
-        }
-        try:
-            print(f"[TOOL: SerpApiSearch] Searching Google for: '{query}'")
-            search = GoogleSearch(params)
-            results = search.get_dict() # Execute the search and get results as a dictionary
-            formatted_output = []
-            # Check for common structured results first
-            if 'answer_box' in results and results['answer_box'].get('answer'):
-                formatted_output.append(f"Answer Box: {results['answer_box']['answer']}")
-            if 'knowledge_graph' in results and results['knowledge_graph'].get('description'):
-                formatted_output.append(f"Knowledge Graph: {results['knowledge_graph']['description']}")
-                if results['knowledge_graph'].get('title'):
-                     formatted_output.append(f"  Title: {results['knowledge_graph']['title']}")
-                if results['knowledge_graph'].get('link'):
-                     formatted_output.append(f"  Link: {results['knowledge_graph']['link']}")
-            # Then process organic results
-            organic_results = results.get('organic_results', [])
-            if organic_results:
-                if formatted_output: # Add a separator if other sections were added
-                    formatted_output.append("\n--- Organic Results ---")
-                else:
-                    formatted_output.append("Organic Results:")
-                for i, item in enumerate(organic_results):
-                    title = item.get('title', 'No Title')
-                    link = item.get('link', '#')
-                    snippet = item.get('snippet', 'No Snippet')
-                    formatted_output.append(
-                        f"Result {i+1}:\n"
-                        f"  Title: {title}\n"
-                        f"  Link: {link}\n"
-                        f"  Snippet: {snippet}\n"
-                    )
-            if not formatted_output: # If no structured data or organic results
-                return "No relevant search results found."
-            return "\n".join(formatted_output)
-        except Exception as e:
-            return f"Error performing SerpApi search: {e}"
-# Instantiate the SerpApiSearchTool class
-serpapi_search_instance = SerpApiSearchTool()
-# Create the LangChain Tool object
-serpapi_Google_Search_tool = Tool(
-    name="serpapi_Google Search",
-    description="""
-    Performs a Google search using SerpApi to get current and detailed information from the web.
-    Use this for factual queries, general knowledge, recent events, or when TavilySearch might not be sufficient.
-    It can return rich results including answer boxes, knowledge graphs, and multiple organic search results.
-    Input should be a clear, concise search query string.
-    """,
-    func=serpapi_search_instance.search_google,
-)
-# Remember to set your SERPAPI_API_KEY environment variable before running!
-# Example: os.environ["SERPAPI_API_KEY"] = "YOUR_API_KEY_HERE"
-# To use this tool, you would add `serpapi_Google Search_tool` to your `tools` list
-# in your `BasicAgent` initialization, like this:
-# tools = [travily_api_search_tool, python_repl, ..., serpapi_Google Search_tool]
-#
-# And you would need to update your prompt's "Available Tools" section
-# to describe `serpapi_Google Search` to the LLM.
 # In helper.py
@@ -538,4 +338,49 @@ gemini_multimodal_tool = Tool(
     name="gemini_multimodal_tool",
     description=analyze_image_with_gemini.__doc__, # Use the docstring as description
     func=analyze_image_with_gemini,
-)

 from langchain_experimental.utilities import PythonREPL
 from langchain.tools import Tool
 from langchain_community.tools import TavilySearchResults
+import re
 # For newer LangChain versions, sometimes it's directly from langchain.tools.python
 # from langchain.tools.python.tool import PythonREPLTool
 from langchain.tools import Tool # Ensure Tool is imported
 import os
 from serpapi import GoogleSearch # Or use SerpApiClient for other engines
 from typing import Dict, Any
 from langchain.tools import Tool # Import the Tool class
 # In helper.py
     name="gemini_multimodal_tool",
     description=analyze_image_with_gemini.__doc__, # Use the docstring as description
     func=analyze_image_with_gemini,
+)
+from langchain_community.document_loaders import WikipediaLoader
+def wiki_search(query: str) -> str:
+    """Search Wikipedia for a query and return maximum 2 results.
+    Args:
+        query: The search query.
+    Returns:
+        A string with formatted Wikipedia search results.
+    """
+    search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
+    formatted_search_docs = "\n\n---\n\n".join(
+        [
+            f'<Document source="{doc.metadata.get("source", "")}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
+            for doc in search_docs
+        ])
+    return formatted_search_docs
+wikipedia_search_tool = Tool(
+    name="wikipedia_search_tool",
+    description=wiki_search.__doc__,
+    func=wiki_search,
+)
+def load_local_text_file(path: str) -> str:
+    """
+    Load the content of a text file and return its contents as a string.
+    This tool is not appropriate for pdf, xlsx, jpg, or other binary formats - it only works for text files like txt and py files.
+    Args:
+        path: the path to the file to be read
+    """
+    try:
+        with open(path, 'r') as f:
+            resp = f.read()
+        return resp
+    except Exception as e:
+        return f"Error loading file '{path}': {e}"
+load_text_file_tool = Tool(
+    name="load_text_file_tool",
+    description=load_local_text_file.__doc__,
+    func=load_local_text_file,
+)