Final_Assignment_Template

Runtime error

App Files Files Community

nikhmr1235 commited on Jun 5, 2025

Commit

480b629

verified ·

1 Parent(s): 824eaad

Update helper.py

Browse files

Files changed (1) hide show

helper.py +0 -80

helper.py CHANGED Viewed

@@ -451,83 +451,3 @@ import os
 # Your existing tools (PythonREPL, TavilySearchResults, file_saver, audio_transcriber, Wikipedia, SerpAPI) go here...
 # ... (rest of your helper.py code for other tools) ...
-def analyze_image_with_gemini(args: dict) -> str:
-    """
-    Analyzes an image using Google's Gemini Multimodal LLM to answer a given question.
-    This tool is designed for tasks requiring visual understanding, such as
-    describing image content, identifying objects, or answering questions about
-    information presented visually (e.g., charts, diagrams, chess boards).
-    **Input Format (CRITICAL):**
-    The input MUST be a JSON string with 'image_path' and 'question' keys.
-    - 'image_path': The local file path to the image (e.g., 'path/to/my_image.png').
-      This image MUST have been previously downloaded and saved locally using the 'file_saver' tool.
-    - 'question': The question to answer based on the image content.
-    Example: '{"image_path": "downloaded_image.png", "question": "What is depicted in this image?"}'
-    Example: '{"image_path": "chess_board.jpg", "question": "What is the next best move in this chess position?"}'
-    **DO NOT:**
-    - Pass URLs directly to this tool; always use 'file_saver' first.
-    - Ask questions unrelated to the image content.
-    - Expect real-time actions or external website access.
-    **Output:**
-    The tool returns the answer generated by the Gemini Multimodal LLM based on the image and question.
-    Returns an informative error message if the image file is not found,
-    the API key is missing, or the LLM encounters an issue.
-    """
-    try:
-        # Ensure the input is parsed if it comes as a string (common from LLMs)
-        if isinstance(args, str):
-            import json
-            args = json.loads(args)
-        image_path = args.get("image_path")
-        question = args.get("question")
-        if not image_path or not question:
-            return "Error: Both 'image_path' and 'question' must be provided."
-        if not os.path.exists(image_path):
-            return f"Error: Local image file not found at '{image_path}'. Did you save it with 'file_saver'?"
-        google_api_key = os.getenv("GOOGLE_API_KEY")
-        if not google_api_key:
-            return "Error: GOOGLE_API_KEY not found in environment variables for multimodal tool."
-        # Initialize the multimodal LLM (Gemini-Pro-Vision is recommended for image understanding)
-        # Using a fallback to 'gemini-pro' if 'gemini-pro-vision' isn't directly available or preferred
-        llm = ChatGoogleGenerativeAI(
-            model="gemini-pro-vision" if "gemini-pro-vision" in ChatGoogleGenerativeAI.get_available_models(google_api_key) else "gemini-2.0-flash",
-            google_api_key=google_api_key,
-            temperature=0.0 # Set temperature to 0 for more factual/deterministic responses
-        )
-        # Load the image as base64 for multimodal input
-        with open(image_path, "rb") as f:
-            image_bytes = f.read()
-            # Encode image to base64
-            image_base64 = base64.b64encode(image_bytes).decode('utf-8')
-        # Create a multimodal message for the LLM
-        message = HumanMessage(
-            content=[
-                {"type": "text", "text": question},
-                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
-            ]
-        )
-        # Invoke the LLM
-        response = llm.invoke([message])
-        return response.content
-    except Exception as e:
-        return f"Error in gemini_multimodal_tool: {e}"
-# Define the Tool object for the agent
-gemini_multimodal_tool = Tool(
-    name="gemini_multimodal_tool",
-    description=analyze_image_with_gemini.__doc__, # Use the docstring as description
-    func=analyze_image_with_gemini,
-)


451	# Your existing tools (PythonREPL, TavilySearchResults, file_saver, audio_transcriber, Wikipedia, SerpAPI) go here...
452	# ... (rest of your helper.py code for other tools) ...
453