nikhmr1235 commited on
Commit
e12ca19
·
verified ·
1 Parent(s): 45f56a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -7
app.py CHANGED
@@ -22,7 +22,8 @@ from langchain_openai import ChatOpenAI
22
  from openai import OpenAI
23
 
24
  # tools imported from helper.py
25
- from helper import repl_tool, get_travily_api_search_tool,audio_transcriber_tool,wikipedia_search_tool,file_saver_tool,wikipedia_full_content_tool,serpapi_Google_Search_tool
 
26
 
27
 
28
 
@@ -160,12 +161,12 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
160
 
161
  travily_api_search_tool = get_travily_api_search_tool(tavily_api_key)
162
  #tools = [travily_api_search_tool, repl_tool, file_saver_tool,audio_transcriber_tool,wikipedia_search_tool,wikipedia_full_content_tool]
163
- tools = [ repl_tool, file_saver_tool,audio_transcriber_tool,travily_api_search_tool]
164
 
165
  # Pull a predefined prompt from LangChain Hub
166
  # "hwchase17/react-chat" is a prompt template designed for ReAct-style conversational agents.
167
  #prompt = hub.pull("hwchase17/react-chat")
168
- #'''
169
  prompt = PromptTemplate(
170
  input_variables=["input", "agent_scratchpad", "chat_history", "tool_names"],
171
  template="""
@@ -193,7 +194,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
193
 
194
 
195
  IMPORTANT: When processing audio files (like .mp3) that have been saved using 'file_saver', the 'audio_transcriber_tool' MUST be used with the 'local_filename' of the saved audio file as its Action Input. Do NOT pass URLs or remote paths directly to 'audio_transcriber_tool'.
196
- For any incoming image files (e.g., .jpg, .png), it's crucial to download and save them locally using the 'file_saver' tool. Once the image is saved, you should then analyze its content and decide whether to utilize other available tools or your LLM to formulate a response. If you have sufficient information and can provide a CONCISE response, or if no tool is needed, you MUST use this precise format:
197
 
198
  if you can use a LLM to answer the question, think step-by-step and then answer the question.
199
  Example: given a chess board image and asked to predict the next best move, if Multi-modal LLM is available, you can use it to answer the question.
@@ -253,7 +254,111 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
253
  {agent_scratchpad}
254
  """
255
  )
256
- #'''
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  summary_memory = ConversationSummaryMemory(llm=llm_client, memory_key="chat_history")
258
  '''summary_memory = ConversationSummaryBufferMemory(llm=llm_client, memory_key="chat_history",
259
  max_token_limit=4000) # Adjust this value based on your observations and model's context window'''
@@ -335,9 +440,9 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
335
  #"7bd855d8-463d-4ed5-93ca-5fe35145f733",
336
  "cca530fc-4052-43b2-b130-b30968d8aa44",
337
  #"1f975693-876d-457b-a649-393859e79bf3",
338
- "99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3",
339
  #"4fc2f1ae-8625-45b5-ab34-ad4433bc21f8",
340
- "8e867cd7-cff9-4e6c-867a-ff5ddc2550be",
341
  }
342
  if task_id not in allowed_ids:
343
  continue
 
22
  from openai import OpenAI
23
 
24
  # tools imported from helper.py
25
+ from helper import repl_tool, get_travily_api_search_tool,audio_transcriber_tool,wikipedia_search_tool,file_saver_tool,wikipedia_full_content_tool,serpapi_Google_Search_tool, gemini_multimodal_tool
26
+
27
 
28
 
29
 
 
161
 
162
  travily_api_search_tool = get_travily_api_search_tool(tavily_api_key)
163
  #tools = [travily_api_search_tool, repl_tool, file_saver_tool,audio_transcriber_tool,wikipedia_search_tool,wikipedia_full_content_tool]
164
+ tools = [ repl_tool, file_saver_tool,audio_transcriber_tool,travily_api_search_tool, gemini_multimodal_tool]
165
 
166
  # Pull a predefined prompt from LangChain Hub
167
  # "hwchase17/react-chat" is a prompt template designed for ReAct-style conversational agents.
168
  #prompt = hub.pull("hwchase17/react-chat")
169
+ '''
170
  prompt = PromptTemplate(
171
  input_variables=["input", "agent_scratchpad", "chat_history", "tool_names"],
172
  template="""
 
194
 
195
 
196
  IMPORTANT: When processing audio files (like .mp3) that have been saved using 'file_saver', the 'audio_transcriber_tool' MUST be used with the 'local_filename' of the saved audio file as its Action Input. Do NOT pass URLs or remote paths directly to 'audio_transcriber_tool'.
197
+ For any incoming image files (e.g., .jpg, .png), it's crucial to download and save them locally using the 'file_saver' tool. Once the image is saved, you should then decide whether to utilize other available tools or your Multimodal LLM to formulate a response. If you have sufficient information and can provide a CONCISE response, or if no tool is needed, you MUST use this precise format:
198
 
199
  if you can use a LLM to answer the question, think step-by-step and then answer the question.
200
  Example: given a chess board image and asked to predict the next best move, if Multi-modal LLM is available, you can use it to answer the question.
 
254
  {agent_scratchpad}
255
  """
256
  )
257
+ '''
258
+
259
+ prompt = PromptTemplate(
260
+ input_variables=["input", "agent_scratchpad", "chat_history", "tool_names"],
261
+ template="""
262
+ You are a smart and helpful AI Agent/Assistant. You are allowed and encouraged to use one or more tools as needed to answer complex questions and perform tasks.
263
+ It is CRUCIAL that you ALWAYS follow the exact format below. Do not deviate.
264
+ NOTE: it is MANDATORY for you to be precise and concise in your response. Respond directly with ONLY the answer, without any introductory phrases or additional details.
265
+ For example, if asked for the number of letters in the English alphabet, respond with '26'. Do NOT say "The number of letters is 26."
266
+
267
+ You have access to the following tools:
268
+ {tools}
269
+
270
+ To use a tool, you MUST follow this precise format:
271
+
272
+ Thought: I need to use a tool to find the answer.
273
+ Action: [tool_name] # This will be one of [{tool_names}]
274
+ Action Input: [input_for_the_tool]
275
+ Observation: [result_from_the_tool]
276
+
277
+ IMPORTANT NOTE ON TOOL USAGE:
278
+ - If an 'Observation' from a tool does NOT directly contain the specific answer to your question, you MUST refine your query or switch to a different, more suitable tool (e.g., 'tavily_search' for broader or more current information if 'wikipedia_search_tool' was insufficient). Do NOT get stuck repeatedly using the same tool if it's not yielding the direct answer.
279
+ - If the input contains the exact phrase "Attachment '{{file_name}}' available at: {{attachment_url}}" (where '{{file_name}}' and '{{attachment_url}}' are placeholders for actual values), consider the file type:
280
+ - If the file type is binary/text (e.g., .xlsx, .docx, .mp3, .jpg, .pdf,.png), you MUST use the 'file_saver' tool to download and save it.
281
+ For 'file_saver', the Action Input must be a JSON string like: '{{"url": "the_attachment_url", "local_filename": "the_file_name_from_attachment"}}'
282
+ example: for input, Attachment '1f975693-876d-457b-a649-393859e79bf3.mp3' available at EXACT URL: https://agents-course-unit4-scoring.hf.space/files/1f975693-876d-457b-a649-393859e79bf3, Action Input for file_saver would be '{{"url": "https://agents-course-unit4-scoring.hf.space/files/1f975693-876d-457b-a649-393859e79bf3", "local_filename": "1f975693-876d-457b-a649-393859e79bf3.mp3"}}'
283
+
284
+ IMPORTANT: When processing audio files (like .mp3) that have been saved using 'file_saver', the 'audio_transcriber_tool' MUST be used with the 'local_filename' of the saved audio file as its Action Input. Do NOT pass URLs or remote paths directly to 'audio_transcriber_tool'.
285
+
286
+ **For image files (like .jpg, .png) that have been saved using 'file_saver', the 'gemini_multimodal_tool' MUST be used to analyze their content and answer questions based on the image. The Action Input for 'gemini_multimodal_tool' must be a JSON string like: '{{"image_path": "the_local_filename", "question": "the_user_question"}}'**
287
+
288
+ If you have sufficient information and can provide a CONCISE response, or if no tool is needed, you MUST use this precise format:
289
+
290
+ if you can use a LLM to answer the question, think step-by-step and then answer the question.
291
+ Example: given a chess board image and asked to predict the next best move, if Multi-modal LLM is available, you can use it to answer the question.
292
+
293
+ Thought: I have enough information, or no tool is needed.
294
+ Final Answer: [your concise/short response here]
295
+
296
+ NOTE: it is MANDATORY for you to be precise and concise in your response. Respond directly with ONLY the answer, without any introductory phrases or additional details.
297
+ For example, if asked for the number of letters in the English alphabet, respond with '26'. Do NOT say "The number of letters is 26."
298
+ VERY IMPORTANT: Your response MUST always start with 'Thought:'.
299
+
300
+ Here are some examples of how you should respond:
301
+
302
+ Example 1:
303
+ Question: What is the capital of France?
304
+ Thought: I need to use a tool to find the capital of France.
305
+ Action: tavily_search
306
+ Action Input: capital of France
307
+ Observation: The capital of France is Paris.
308
+ Thought: I have found the answer.
309
+ Final Answer: Paris
310
+
311
+ Example 2:
312
+ Question: What is 2 + 2?
313
+ Thought: This is a simple arithmetic question, no tool is needed.
314
+ Final Answer: 4
315
+
316
+ Example 3:
317
+ Question: How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.
318
+ Thought: The user is asking for specific information about discography, which might be found with a search tool. The `serpapi_Google Search_tool` can fetch detailed sections. After getting the content, I will need to parse it using `python_repl` to count the albums within the specified years.
319
+ Action: serpapi_Google Search
320
+ Action Input: Mercedes Sosa discography
321
+ Observation: [Discography text content from search result]
322
+ Thought: I have retrieved discography text. Now I need to parse this text to identify and count studio albums released between 2000 and 2009. I will use the `python_repl` tool for this.
323
+ Action: python_repl
324
+ Action Input:
325
+ ```python
326
+ import re
327
+ text = "[Discography text content from previous observation]" # Replace with actual text
328
+ albums_2000_2009 = []
329
+ pattern = r"\((\d{{4}})\)\s*(.*?)(?:\[|\n|$)" # Ensures year is captured. Double braces {{}} to escape regex literal braces
330
+ for match in re.finditer(pattern, text):
331
+ year = int(match.group(1))
332
+ if 2000 <= year <= 2009:
333
+ albums_2000_2009.append(match.group(2).strip())
334
+ print(len(albums_2000_2009))
335
+ ```
336
+ Observation: 3
337
+ Thought: I have parsed the discography and counted the albums. I have found the answer.
338
+ Final Answer: 3
339
+
340
+ **Example 4: (Crucial new example for image processing)**
341
+ Question: What is the next best move in this chess position? Attachment 'chess_board.png' available at EXACT URL: https://agents-course-unit4-scoring.hf.space/files/cca530fc-4052-43b2-b130-b30968d8aa44
342
+ Thought: The user is asking a question about a chess position and has provided an image. I need to first save the image locally using the 'file_saver' tool, and then use the 'gemini_multimodal_tool' to analyze the image and answer the question.
343
+ Action: file_saver
344
+ Action Input: {{"url": "https://agents-course-unit4-scoring.hf.space/files/cca530fc-4052-43b2-b130-b30968d8aa44", "local_filename": "cca530fc-4052-43b2-b130-b30968d8aa44.png"}}
345
+ Observation: File downloaded successfully to cca530fc-4052-43b2-b130-b30968d8aa44.png
346
+ Thought: The image has been successfully downloaded. Now I need to analyze its content to determine the next best chess move using the 'gemini_multimodal_tool'.
347
+ Action: gemini_multimodal_tool
348
+ Action Input: {{"image_path": "cca530fc-4052-43b2-b130-b30968d8aa44.png", "question": "What is the next best move in this chess position?"}}
349
+ Observation: The next best move is e4.
350
+ Thought: I have used the 'gemini_multimodal_tool' to get the best move based on the image.
351
+ Final Answer: e4
352
+
353
+ ---
354
+ Previous conversation history:
355
+ {chat_history}
356
+
357
+ New input: {input}
358
+ ---
359
+ {agent_scratchpad}
360
+ """
361
+ )
362
  summary_memory = ConversationSummaryMemory(llm=llm_client, memory_key="chat_history")
363
  '''summary_memory = ConversationSummaryBufferMemory(llm=llm_client, memory_key="chat_history",
364
  max_token_limit=4000) # Adjust this value based on your observations and model's context window'''
 
440
  #"7bd855d8-463d-4ed5-93ca-5fe35145f733",
441
  "cca530fc-4052-43b2-b130-b30968d8aa44",
442
  #"1f975693-876d-457b-a649-393859e79bf3",
443
+ #"99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3",
444
  #"4fc2f1ae-8625-45b5-ab34-ad4433bc21f8",
445
+ #"8e867cd7-cff9-4e6c-867a-ff5ddc2550be",
446
  }
447
  if task_id not in allowed_ids:
448
  continue