Final_Assignment_Template

Runtime error

App Files Files Community

nikhmr1235 commited on Jun 5, 2025

Commit

4409e88

verified ·

1 Parent(s): 480b629

Update app.py

Browse files

Files changed (1) hide show

app.py +90 -1

app.py CHANGED Viewed

@@ -22,7 +22,7 @@ from langchain_openai import ChatOpenAI
 from openai import OpenAI
 # tools imported from helper.py
-from helper import repl_tool, get_travily_api_search_tool,audio_transcriber_tool,wikipedia_search_tool,file_saver_tool,wikipedia_full_content_tool,serpapi_Google_Search_tool, gemini_multimodal_tool
@@ -102,6 +102,86 @@ class BasicAgent:
         return self.invoke_with_retry(question)
 def run_and_submit_all( profile: gr.OAuthProfile | None):
@@ -135,6 +215,13 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         return "OpenAI API key not found. Please set OPENAI_API_KEY environment variable.", None
     print(f"Using OpenAI API key: {openai_api_key[:4]}... (truncated for security)")
     #NMODEL
     #'''
     llm_client = ChatGoogleGenerativeAI(
@@ -159,6 +246,8 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         return "Tavily API key not found. Please set TAVILY_API_KEY environment variable.", None
     print(f"Using Tavily API key: {tavily_api_key[:4]}... (truncated for security)")
     travily_api_search_tool = get_travily_api_search_tool(tavily_api_key)
     #tools = [travily_api_search_tool, repl_tool, file_saver_tool,audio_transcriber_tool,wikipedia_search_tool,wikipedia_full_content_tool]
     tools = [ repl_tool, file_saver_tool,audio_transcriber_tool,travily_api_search_tool, gemini_multimodal_tool]

 from openai import OpenAI
 # tools imported from helper.py
+from helper import repl_tool, get_travily_api_search_tool,audio_transcriber_tool,wikipedia_search_tool,file_saver_tool,wikipedia_full_content_tool,serpapi_Google_Search_tool
         return self.invoke_with_retry(question)
+import base64
+from langchain.tools import Tool
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_core.messages import HumanMessage
+import os
+def analyze_image_with_gemini(args: dict) -> str:
+    """
+    Analyzes an image using Google's Gemini Multimodal LLM to answer a given question.
+    This tool is designed for tasks requiring visual understanding, such as
+    describing image content, identifying objects, or answering questions about
+    information presented visually (e.g., charts, diagrams, chess boards).
+    **Input Format (CRITICAL):**
+    The input MUST be a JSON string with 'image_path' and 'question' keys.
+    - 'image_path': The local file path to the image (e.g., 'path/to/my_image.png').
+      This image MUST have been previously downloaded and saved locally using the 'file_saver' tool.
+    - 'question': The question to answer based on the image content.
+    Example: '{"image_path": "downloaded_image.png", "question": "What is depicted in this image?"}'
+    Example: '{"image_path": "chess_board.jpg", "question": "What is the next best move in this chess position?"}'
+    **DO NOT:**
+    - Pass URLs directly to this tool; always use 'file_saver' first.
+    - Ask questions unrelated to the image content.
+    - Expect real-time actions or external website access.
+    **Output:**
+    The tool returns the answer generated by the Gemini Multimodal LLM based on the image and question.
+    Returns an informative error message if the image file is not found,
+    the API key is missing, or the LLM encounters an issue.
+    """
+    try:
+        # Ensure the input is parsed if it comes as a string (common from LLMs)
+        if isinstance(args, str):
+            import json
+            args = json.loads(args)
+        image_path = args.get("image_path")
+        question = args.get("question")
+        if not image_path or not question:
+            return "Error: Both 'image_path' and 'question' must be provided."
+        if not os.path.exists(image_path):
+            return f"Error: Local image file not found at '{image_path}'. Did you save it with 'file_saver'?"
+        google_api_key = os.getenv("GOOGLE_API_KEY")
+        if not google_api_key:
+            return "Error: GOOGLE_API_KEY not found in environment variables for multimodal tool."
+        # Initialize the multimodal LLM (Gemini-Pro-Vision is recommended for image understanding)
+        # Using a fallback to 'gemini-pro' if 'gemini-pro-vision' isn't directly available or preferred
+        llm = ChatGoogleGenerativeAI(
+            model="gemini-pro-vision" if "gemini-pro-vision" in ChatGoogleGenerativeAI.get_available_models(google_api_key) else "gemini-2.0-flash",
+            google_api_key=google_api_key,
+            temperature=0.0 # Set temperature to 0 for more factual/deterministic responses
+        )
+        # Load the image as base64 for multimodal input
+        with open(image_path, "rb") as f:
+            image_bytes = f.read()
+            # Encode image to base64
+            image_base64 = base64.b64encode(image_bytes).decode('utf-8')
+        # Create a multimodal message for the LLM
+        message = HumanMessage(
+            content=[
+                {"type": "text", "text": question},
+                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
+            ]
+        )
+        # Invoke the LLM
+        response = llm.invoke([message])
+        return response.content
+    except Exception as e:
+        return f"Error in gemini_multimodal_tool: {e}"
 def run_and_submit_all( profile: gr.OAuthProfile | None):
         return "OpenAI API key not found. Please set OPENAI_API_KEY environment variable.", None
     print(f"Using OpenAI API key: {openai_api_key[:4]}... (truncated for security)")
+    # Define the Tool object for the agent
+    gemini_multimodal_tool = Tool(
+        name="gemini_multimodal_tool",
+        description=analyze_image_with_gemini.__doc__, # Use the docstring as description
+        func=analyze_image_with_gemini,
+    )
     #NMODEL
     #'''
     llm_client = ChatGoogleGenerativeAI(
         return "Tavily API key not found. Please set TAVILY_API_KEY environment variable.", None
     print(f"Using Tavily API key: {tavily_api_key[:4]}... (truncated for security)")
     travily_api_search_tool = get_travily_api_search_tool(tavily_api_key)
     #tools = [travily_api_search_tool, repl_tool, file_saver_tool,audio_transcriber_tool,wikipedia_search_tool,wikipedia_full_content_tool]
     tools = [ repl_tool, file_saver_tool,audio_transcriber_tool,travily_api_search_tool, gemini_multimodal_tool]