Final_Assignment_Template

Runtime error

App Files Files Community

nikhmr1235 commited on Jun 5, 2025

Commit

169060d

verified ·

1 Parent(s): 9192353

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -181

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import gradio as gr
 import inspect
 import pandas as pd
 import time
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_community.tools import TavilySearchResults
 from langchain import hub # Used to pull predefined prompts from LangChain Hub
@@ -22,7 +23,7 @@ from langchain_openai import ChatOpenAI
 from openai import OpenAI
 # tools imported from helper.py
-from helper import repl_tool, get_travily_api_search_tool,audio_transcriber_tool,wikipedia_search_tool,file_saver_tool,wikipedia_full_content_tool,serpapi_Google_Search_tool
@@ -102,87 +103,6 @@ class BasicAgent:
         return self.invoke_with_retry(question)
-import base64
-from langchain.tools import Tool
-from langchain_google_genai import ChatGoogleGenerativeAI
-from langchain_core.messages import HumanMessage
-import os
-def analyze_image_with_gemini(args: dict) -> str:
-    """
-    Analyzes an image using Google's Gemini Multimodal LLM to answer a given question.
-    This tool is designed for tasks requiring visual understanding, such as
-    describing image content, identifying objects, or answering questions about
-    information presented visually (e.g., charts, diagrams, chess boards).
-    **Input Format (CRITICAL):**
-    The input MUST be a JSON string with 'image_path' and 'question' keys.
-    - 'image_path': The local file path to the image (e.g., 'path/to/my_image.png').
-      This image MUST have been previously downloaded and saved locally using the 'file_saver' tool.
-    - 'question': The question to answer based on the image content.
-    Example: '{"image_path": "downloaded_image.png", "question": "What is depicted in this image?"}'
-    Example: '{"image_path": "chess_board.jpg", "question": "What is the next best move in this chess position?"}'
-    **DO NOT:**
-    - Pass URLs directly to this tool; always use 'file_saver' first.
-    - Ask questions unrelated to the image content.
-    - Expect real-time actions or external website access.
-    **Output:**
-    The tool returns the answer generated by the Gemini Multimodal LLM based on the image and question.
-    Returns an informative error message if the image file is not found,
-    the API key is missing, or the LLM encounters an issue.
-    """
-    try:
-        # Ensure the input is parsed if it comes as a string (common from LLMs)
-        if isinstance(args, str):
-            import json
-            args = json.loads(args)
-        image_path = args.get("image_path")
-        question = args.get("question")
-        if not image_path or not question:
-            return "Error: Both 'image_path' and 'question' must be provided."
-        if not os.path.exists(image_path):
-            return f"Error: Local image file not found at '{image_path}'. Did you save it with 'file_saver'?"
-        google_api_key = os.getenv("GOOGLE_API_KEY")
-        if not google_api_key:
-            return "Error: GOOGLE_API_KEY not found in environment variables for multimodal tool."
-        # Initialize the multimodal LLM (Gemini-Pro-Vision is recommended for image understanding)
-        # Using a fallback to 'gemini-pro' if 'gemini-pro-vision' isn't directly available or preferred
-        llm = ChatGoogleGenerativeAI(
-            #model="gemini-pro-vision" if "gemini-pro-vision" in ChatGoogleGenerativeAI.get_available_models(google_api_key) else "gemini-2.0-flash",
-            model="gemini-2.0-flash",
-            google_api_key=google_api_key,
-            temperature=0.0 # Set temperature to 0 for more factual/deterministic responses
-        )
-        # Load the image as base64 for multimodal input
-        with open(image_path, "rb") as f:
-            image_bytes = f.read()
-            # Encode image to base64
-            image_base64 = base64.b64encode(image_bytes).decode('utf-8')
-        # Create a multimodal message for the LLM
-        message = HumanMessage(
-            content=[
-                {"type": "text", "text": question},
-                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
-            ]
-        )
-        # Invoke the LLM
-        response = llm.invoke([message])
-        return response.content
-    except Exception as e:
-        return f"Error in gemini_multimodal_tool: {e}"
 def run_and_submit_all( profile: gr.OAuthProfile | None):
@@ -217,12 +137,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     print(f"Using OpenAI API key: {openai_api_key[:4]}... (truncated for security)")
-    # Define the Tool object for the agent
-    gemini_multimodal_tool = Tool(
-        name="gemini_multimodal_tool",
-        description=analyze_image_with_gemini.__doc__, # Use the docstring as description
-        func=analyze_image_with_gemini,
-    )
     #NMODEL
     #'''
     llm_client = ChatGoogleGenerativeAI(
@@ -256,95 +171,6 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     # Pull a predefined prompt from LangChain Hub
     # "hwchase17/react-chat" is a prompt template designed for ReAct-style conversational agents.
     #prompt = hub.pull("hwchase17/react-chat")
-    '''
-    prompt = PromptTemplate(
-    input_variables=["input", "agent_scratchpad", "chat_history", "tool_names"],
-    template="""
-    You are a smart and helpful AI Agent/Assistant. You are allowed and encouraged to use one or more tools as needed to answer complex questions and perform tasks.
-    It is CRUCIAL that you ALWAYS follow the exact format below. Do not deviate.
-    NOTE: it is MANDATORY for you to be precise and concise in your response. Respond directly with ONLY the answer, without any introductory phrases or additional details.
-    For example, if asked for the number of letters in the English alphabet, respond with '26'. Do NOT say "The number of letters is 26."
-    You have access to the following tools:
-    {tools}
-    To use a tool, you MUST follow this precise format:
-    Thought: I need to use a tool to find the answer.
-    Action: [tool_name] # This will be one of [{tool_names}]
-    Action Input: [input_for_the_tool]
-    Observation: [result_from_the_tool]
-    IMPORTANT NOTE ON TOOL USAGE:
-    - If an 'Observation' from a tool does NOT directly contain the specific answer to your question, you MUST refine your query or switch to a different, more suitable tool (e.g., 'tavily_search' for broader or more current information if 'wikipedia_search_tool' was insufficient). Do NOT get stuck repeatedly using the same tool if it's not yielding the direct answer.
-    - If the input contains the exact phrase "Attachment '{{file_name}}' available at: {{attachment_url}}" (where '{{file_name}}' and '{{attachment_url}}' are placeholders for actual values), consider the file type:
-      - If the file type is binary/text (e.g., .xlsx, .docx, .mp3, .jpg, .pdf,.png), you MUST use the 'file_saver' tool to download and save it.
-        For 'file_saver', the Action Input must be a JSON string like: '{{"url": "the_attachment_url", "local_filename": "the_file_name_from_attachment"}}'
-        example: for input, Attachment '1f975693-876d-457b-a649-393859e79bf3.mp3' available at EXACT URL: https://agents-course-unit4-scoring.hf.space/files/1f975693-876d-457b-a649-393859e79bf3, Action Input for file_saver would be '{{"url": "https://agents-course-unit4-scoring.hf.space/files/1f975693-876d-457b-a649-393859e79bf3", "local_filename": "1f975693-876d-457b-a649-393859e79bf3.mp3"}}'
-    IMPORTANT: When processing audio files (like .mp3) that have been saved using 'file_saver', the 'audio_transcriber_tool' MUST be used with the 'local_filename' of the saved audio file as its Action Input. Do NOT pass URLs or remote paths directly to 'audio_transcriber_tool'.
-    For any incoming image files (e.g., .jpg, .png), it's crucial to download and save them locally using the 'file_saver' tool. Once the image is saved, you should then decide whether to utilize other available tools or your Multimodal LLM to formulate a response.    If you have sufficient information and can provide a CONCISE response, or if no tool is needed, you MUST use this precise format:
-    if you can use a LLM to answer the question, think step-by-step and then answer the question.
-    Example: given a chess board image and asked to predict the next best move, if Multi-modal LLM is available, you can use it to answer the question.
-    Thought: I have enough information, or no tool is needed.
-    Final Answer: [your concise/short response here]
-    NOTE: it is MANDATORY for you to be precise and concise in your response. Respond directly with ONLY the answer, without any introductory phrases or additional details.
-    For example, if asked for the number of letters in the English alphabet, respond with '26'. Do NOT say "The number of letters is 26."
-    VERY IMPORTANT: Your response MUST always start with 'Thought:'.
-    Here are some examples of how you should respond:
-    Example 1:
-    Question: What is the capital of France?
-    Thought: I need to use a tool to find the capital of France.
-    Action: tavily_search_results
-    Action Input: capital of France
-    Observation: The capital of France is Paris.
-    Thought: I have found the answer.
-    Final Answer: Paris
-    Example 2:
-    Question: What is 2 + 2?
-    Thought: This is a simple arithmetic question, no tool is needed.
-    Final Answer: 4
-    Example 3:
-    Question: How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.
-    Thought: The user is asking for specific information from Wikipedia, likely requiring a list or discography. The `travily_api_search_tool` is best for this to get the detailed section. After getting the content, I will need to parse it using `python_repl` to count the albums within the specified years.
-    Action: serpapi_Google Search
-    Action Input: Mercedes Sosa section: Discography
-    Observation: [Discography text content]
-    Thought: I have retrieved the discography text. Now I need to parse this text to identify and count studio albums released between 2000 and 2009. I will use the `python_repl` tool for this.
-    Action: python_repl
-    Action Input:
-    ```python
-    import re
-    text = "[Discography text content from previous observation]" # Replace with actual text
-    albums_2000_2009 = []
-    # This is a simplified regex example; actual parsing might be more complex depending on text format
-    pattern = r"\((\d{{4}})\s*(.*?)(?:\[|\n|$)"
-    for match in re.finditer(pattern, text):
-    year = int(match.group(1))
-    if 2000 <= year <= 2009:
-        albums_2000_2009.append(match.group(2).strip())
-    print(len(albums_2000_2009))
-    Observation: 3
-    Thought: I have parsed the discography and counted the albums. I have found the answer.
-    Final Answer: 3
-    ---
-    Previous conversation history:
-    {chat_history}
-    New input: {input}
-    ---
-    {agent_scratchpad}
-    """
-    )
-    '''
     prompt = PromptTemplate(
     input_variables=["input", "agent_scratchpad", "chat_history", "tool_names"],
@@ -526,18 +352,20 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
             full_question_for_agent += f"\n\nAttachment '{file_name}' available at EXACT URL: {attachment_url}"
             print(f"Running agent on task {task_id}: {full_question_for_agent}",flush=True)
-        '''
         allowed_ids = {
-            "7bd855d8-463d-4ed5-93ca-5fe35145f733",
             "cca530fc-4052-43b2-b130-b30968d8aa44",
             #"1f975693-876d-457b-a649-393859e79bf3",
             #"99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3",
             #"4fc2f1ae-8625-45b5-ab34-ad4433bc21f8",
             #"8e867cd7-cff9-4e6c-867a-ff5ddc2550be",
         }
         if task_id not in allowed_ids:
             continue
-        '''
         try:
             submitted_answer = agent(full_question_for_agent)
@@ -571,8 +399,17 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
             f"Message: {result_data.get('message', 'No message received.')}"
         )
         print("Submission successful.")
         results_df = pd.DataFrame(results_log)
-        return final_status, results_df
     except requests.exceptions.HTTPError as e:
         error_detail = f"Server responded with status {e.response.status_code}."
         try:

 import inspect
 import pandas as pd
 import time
+import re
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_community.tools import TavilySearchResults
 from langchain import hub # Used to pull predefined prompts from LangChain Hub
 from openai import OpenAI
 # tools imported from helper.py
+from helper import repl_tool, get_travily_api_search_tool,audio_transcriber_tool,wikipedia_search_tool,file_saver_tool,wikipedia_full_content_tool,serpapi_Google_Search_tool,gemini_multimodal_tool
         return self.invoke_with_retry(question)
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     print(f"Using OpenAI API key: {openai_api_key[:4]}... (truncated for security)")
     #NMODEL
     #'''
     llm_client = ChatGoogleGenerativeAI(
     # Pull a predefined prompt from LangChain Hub
     # "hwchase17/react-chat" is a prompt template designed for ReAct-style conversational agents.
     #prompt = hub.pull("hwchase17/react-chat")
     prompt = PromptTemplate(
     input_variables=["input", "agent_scratchpad", "chat_history", "tool_names"],
             full_question_for_agent += f"\n\nAttachment '{file_name}' available at EXACT URL: {attachment_url}"
             print(f"Running agent on task {task_id}: {full_question_for_agent}",flush=True)
         allowed_ids = {
+            #"7bd855d8-463d-4ed5-93ca-5fe35145f733",
             "cca530fc-4052-43b2-b130-b30968d8aa44",
             #"1f975693-876d-457b-a649-393859e79bf3",
             #"99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3",
             #"4fc2f1ae-8625-45b5-ab34-ad4433bc21f8",
             #"8e867cd7-cff9-4e6c-867a-ff5ddc2550be",
+            "a1e91b78-d3d8-4675-bb8d-62741b4b68a6",
+            "3f57289b-8c60-48be-bd80-01f8099ca449",
         }
         if task_id not in allowed_ids:
             continue
         try:
             submitted_answer = agent(full_question_for_agent)
             f"Message: {result_data.get('message', 'No message received.')}"
         )
         print("Submission successful.")
+        # Step 1: Remove common problematic characters (like null bytes, non-breaking spaces, etc.)
+        # This regex removes characters that are not printable ASCII.
+        # \x20-\x7E covers space through tilde (~)
+        # \n\r\t covers newlines and tabs
+        # You might need to adjust this regex based on what 'wonky chars' you specifically observe.
+        cleaned_final_status = re.sub(r'[^\x20-\x7E\n\r\t]+', '', final_status)
+        # Step 2: Strip leading/trailing whitespace (including newlines from formatting)
+        cleaned_final_status = cleaned_final_status.strip()
         results_df = pd.DataFrame(results_log)
+        return cleaned_final_status, results_df
     except requests.exceptions.HTTPError as e:
         error_detail = f"Server responded with status {e.response.status_code}."
         try: