Final_Assignment_Template

Runtime error

App Files Files Community

nikhmr1235 commited on Jun 5, 2025

Commit

e12ca19

verified ·

1 Parent(s): 45f56a3

Update app.py

Browse files

Files changed (1) hide show

app.py +112 -7

app.py CHANGED Viewed

@@ -22,7 +22,8 @@ from langchain_openai import ChatOpenAI
 from openai import OpenAI
 # tools imported from helper.py
-from helper import repl_tool, get_travily_api_search_tool,audio_transcriber_tool,wikipedia_search_tool,file_saver_tool,wikipedia_full_content_tool,serpapi_Google_Search_tool
@@ -160,12 +161,12 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     travily_api_search_tool = get_travily_api_search_tool(tavily_api_key)
     #tools = [travily_api_search_tool, repl_tool, file_saver_tool,audio_transcriber_tool,wikipedia_search_tool,wikipedia_full_content_tool]
-    tools = [ repl_tool, file_saver_tool,audio_transcriber_tool,travily_api_search_tool]
     # Pull a predefined prompt from LangChain Hub
     # "hwchase17/react-chat" is a prompt template designed for ReAct-style conversational agents.
     #prompt = hub.pull("hwchase17/react-chat")
-    #'''
     prompt = PromptTemplate(
     input_variables=["input", "agent_scratchpad", "chat_history", "tool_names"],
     template="""
@@ -193,7 +194,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     IMPORTANT: When processing audio files (like .mp3) that have been saved using 'file_saver', the 'audio_transcriber_tool' MUST be used with the 'local_filename' of the saved audio file as its Action Input. Do NOT pass URLs or remote paths directly to 'audio_transcriber_tool'.
-    For any incoming image files (e.g., .jpg, .png), it's crucial to download and save them locally using the 'file_saver' tool. Once the image is saved, you should then analyze its content and decide whether to utilize other available tools or your LLM to formulate a response.    If you have sufficient information and can provide a CONCISE response, or if no tool is needed, you MUST use this precise format:
     if you can use a LLM to answer the question, think step-by-step and then answer the question.
     Example: given a chess board image and asked to predict the next best move, if Multi-modal LLM is available, you can use it to answer the question.
@@ -253,7 +254,111 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     {agent_scratchpad}
     """
     )
-    #'''
     summary_memory = ConversationSummaryMemory(llm=llm_client, memory_key="chat_history")
     '''summary_memory = ConversationSummaryBufferMemory(llm=llm_client, memory_key="chat_history",
                 max_token_limit=4000) # Adjust this value based on your observations and model's context window'''
@@ -335,9 +440,9 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
             #"7bd855d8-463d-4ed5-93ca-5fe35145f733",
             "cca530fc-4052-43b2-b130-b30968d8aa44",
             #"1f975693-876d-457b-a649-393859e79bf3",
-            "99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3",
             #"4fc2f1ae-8625-45b5-ab34-ad4433bc21f8",
-            "8e867cd7-cff9-4e6c-867a-ff5ddc2550be",
         }
         if task_id not in allowed_ids:
             continue

 from openai import OpenAI
 # tools imported from helper.py
+from helper import repl_tool, get_travily_api_search_tool,audio_transcriber_tool,wikipedia_search_tool,file_saver_tool,wikipedia_full_content_tool,serpapi_Google_Search_tool, gemini_multimodal_tool
     travily_api_search_tool = get_travily_api_search_tool(tavily_api_key)
     #tools = [travily_api_search_tool, repl_tool, file_saver_tool,audio_transcriber_tool,wikipedia_search_tool,wikipedia_full_content_tool]
+    tools = [ repl_tool, file_saver_tool,audio_transcriber_tool,travily_api_search_tool, gemini_multimodal_tool]
     # Pull a predefined prompt from LangChain Hub
     # "hwchase17/react-chat" is a prompt template designed for ReAct-style conversational agents.
     #prompt = hub.pull("hwchase17/react-chat")
+    '''
     prompt = PromptTemplate(
     input_variables=["input", "agent_scratchpad", "chat_history", "tool_names"],
     template="""
     IMPORTANT: When processing audio files (like .mp3) that have been saved using 'file_saver', the 'audio_transcriber_tool' MUST be used with the 'local_filename' of the saved audio file as its Action Input. Do NOT pass URLs or remote paths directly to 'audio_transcriber_tool'.
+    For any incoming image files (e.g., .jpg, .png), it's crucial to download and save them locally using the 'file_saver' tool. Once the image is saved, you should then decide whether to utilize other available tools or your Multimodal LLM to formulate a response.    If you have sufficient information and can provide a CONCISE response, or if no tool is needed, you MUST use this precise format:
     if you can use a LLM to answer the question, think step-by-step and then answer the question.
     Example: given a chess board image and asked to predict the next best move, if Multi-modal LLM is available, you can use it to answer the question.
     {agent_scratchpad}
     """
     )
+    '''
+    prompt = PromptTemplate(
+    input_variables=["input", "agent_scratchpad", "chat_history", "tool_names"],
+    template="""
+    You are a smart and helpful AI Agent/Assistant. You are allowed and encouraged to use one or more tools as needed to answer complex questions and perform tasks.
+    It is CRUCIAL that you ALWAYS follow the exact format below. Do not deviate.
+    NOTE: it is MANDATORY for you to be precise and concise in your response. Respond directly with ONLY the answer, without any introductory phrases or additional details.
+    For example, if asked for the number of letters in the English alphabet, respond with '26'. Do NOT say "The number of letters is 26."
+    You have access to the following tools:
+    {tools}
+    To use a tool, you MUST follow this precise format:
+    Thought: I need to use a tool to find the answer.
+    Action: [tool_name] # This will be one of [{tool_names}]
+    Action Input: [input_for_the_tool]
+    Observation: [result_from_the_tool]
+    IMPORTANT NOTE ON TOOL USAGE:
+    - If an 'Observation' from a tool does NOT directly contain the specific answer to your question, you MUST refine your query or switch to a different, more suitable tool (e.g., 'tavily_search' for broader or more current information if 'wikipedia_search_tool' was insufficient). Do NOT get stuck repeatedly using the same tool if it's not yielding the direct answer.
+    - If the input contains the exact phrase "Attachment '{{file_name}}' available at: {{attachment_url}}" (where '{{file_name}}' and '{{attachment_url}}' are placeholders for actual values), consider the file type:
+      - If the file type is binary/text (e.g., .xlsx, .docx, .mp3, .jpg, .pdf,.png), you MUST use the 'file_saver' tool to download and save it.
+        For 'file_saver', the Action Input must be a JSON string like: '{{"url": "the_attachment_url", "local_filename": "the_file_name_from_attachment"}}'
+        example: for input, Attachment '1f975693-876d-457b-a649-393859e79bf3.mp3' available at EXACT URL: https://agents-course-unit4-scoring.hf.space/files/1f975693-876d-457b-a649-393859e79bf3, Action Input for file_saver would be '{{"url": "https://agents-course-unit4-scoring.hf.space/files/1f975693-876d-457b-a649-393859e79bf3", "local_filename": "1f975693-876d-457b-a649-393859e79bf3.mp3"}}'
+    IMPORTANT: When processing audio files (like .mp3) that have been saved using 'file_saver', the 'audio_transcriber_tool' MUST be used with the 'local_filename' of the saved audio file as its Action Input. Do NOT pass URLs or remote paths directly to 'audio_transcriber_tool'.
+    **For image files (like .jpg, .png) that have been saved using 'file_saver', the 'gemini_multimodal_tool' MUST be used to analyze their content and answer questions based on the image. The Action Input for 'gemini_multimodal_tool' must be a JSON string like: '{{"image_path": "the_local_filename", "question": "the_user_question"}}'**
+    If you have sufficient information and can provide a CONCISE response, or if no tool is needed, you MUST use this precise format:
+    if you can use a LLM to answer the question, think step-by-step and then answer the question.
+    Example: given a chess board image and asked to predict the next best move, if Multi-modal LLM is available, you can use it to answer the question.
+    Thought: I have enough information, or no tool is needed.
+    Final Answer: [your concise/short response here]
+    NOTE: it is MANDATORY for you to be precise and concise in your response. Respond directly with ONLY the answer, without any introductory phrases or additional details.
+    For example, if asked for the number of letters in the English alphabet, respond with '26'. Do NOT say "The number of letters is 26."
+    VERY IMPORTANT: Your response MUST always start with 'Thought:'.
+    Here are some examples of how you should respond:
+    Example 1:
+    Question: What is the capital of France?
+    Thought: I need to use a tool to find the capital of France.
+    Action: tavily_search
+    Action Input: capital of France
+    Observation: The capital of France is Paris.
+    Thought: I have found the answer.
+    Final Answer: Paris
+    Example 2:
+    Question: What is 2 + 2?
+    Thought: This is a simple arithmetic question, no tool is needed.
+    Final Answer: 4
+    Example 3:
+    Question: How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.
+    Thought: The user is asking for specific information about discography, which might be found with a search tool. The `serpapi_Google Search_tool` can fetch detailed sections. After getting the content, I will need to parse it using `python_repl` to count the albums within the specified years.
+    Action: serpapi_Google Search
+    Action Input: Mercedes Sosa discography
+    Observation: [Discography text content from search result]
+    Thought: I have retrieved discography text. Now I need to parse this text to identify and count studio albums released between 2000 and 2009. I will use the `python_repl` tool for this.
+    Action: python_repl
+    Action Input:
+    ```python
+    import re
+    text = "[Discography text content from previous observation]" # Replace with actual text
+    albums_2000_2009 = []
+    pattern = r"\((\d{{4}})\)\s*(.*?)(?:\[|\n|$)" # Ensures year is captured. Double braces {{}} to escape regex literal braces
+    for match in re.finditer(pattern, text):
+        year = int(match.group(1))
+        if 2000 <= year <= 2009:
+            albums_2000_2009.append(match.group(2).strip())
+    print(len(albums_2000_2009))
+    ```
+    Observation: 3
+    Thought: I have parsed the discography and counted the albums. I have found the answer.
+    Final Answer: 3
+    **Example 4: (Crucial new example for image processing)**
+    Question: What is the next best move in this chess position? Attachment 'chess_board.png' available at EXACT URL: https://agents-course-unit4-scoring.hf.space/files/cca530fc-4052-43b2-b130-b30968d8aa44
+    Thought: The user is asking a question about a chess position and has provided an image. I need to first save the image locally using the 'file_saver' tool, and then use the 'gemini_multimodal_tool' to analyze the image and answer the question.
+    Action: file_saver
+    Action Input: {{"url": "https://agents-course-unit4-scoring.hf.space/files/cca530fc-4052-43b2-b130-b30968d8aa44", "local_filename": "cca530fc-4052-43b2-b130-b30968d8aa44.png"}}
+    Observation: File downloaded successfully to cca530fc-4052-43b2-b130-b30968d8aa44.png
+    Thought: The image has been successfully downloaded. Now I need to analyze its content to determine the next best chess move using the 'gemini_multimodal_tool'.
+    Action: gemini_multimodal_tool
+    Action Input: {{"image_path": "cca530fc-4052-43b2-b130-b30968d8aa44.png", "question": "What is the next best move in this chess position?"}}
+    Observation: The next best move is e4.
+    Thought: I have used the 'gemini_multimodal_tool' to get the best move based on the image.
+    Final Answer: e4
+    ---
+    Previous conversation history:
+    {chat_history}
+    New input: {input}
+    ---
+    {agent_scratchpad}
+    """
+    )
     summary_memory = ConversationSummaryMemory(llm=llm_client, memory_key="chat_history")
     '''summary_memory = ConversationSummaryBufferMemory(llm=llm_client, memory_key="chat_history",
                 max_token_limit=4000) # Adjust this value based on your observations and model's context window'''
             #"7bd855d8-463d-4ed5-93ca-5fe35145f733",
             "cca530fc-4052-43b2-b130-b30968d8aa44",
             #"1f975693-876d-457b-a649-393859e79bf3",
+            #"99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3",
             #"4fc2f1ae-8625-45b5-ab34-ad4433bc21f8",
+            #"8e867cd7-cff9-4e6c-867a-ff5ddc2550be",
         }
         if task_id not in allowed_ids:
             continue