Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -22,7 +22,8 @@ from langchain_openai import ChatOpenAI
|
|
| 22 |
from openai import OpenAI
|
| 23 |
|
| 24 |
# tools imported from helper.py
|
| 25 |
-
from helper import repl_tool, get_travily_api_search_tool,audio_transcriber_tool,wikipedia_search_tool,file_saver_tool,wikipedia_full_content_tool,serpapi_Google_Search_tool
|
|
|
|
| 26 |
|
| 27 |
|
| 28 |
|
|
@@ -160,12 +161,12 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 160 |
|
| 161 |
travily_api_search_tool = get_travily_api_search_tool(tavily_api_key)
|
| 162 |
#tools = [travily_api_search_tool, repl_tool, file_saver_tool,audio_transcriber_tool,wikipedia_search_tool,wikipedia_full_content_tool]
|
| 163 |
-
tools = [ repl_tool, file_saver_tool,audio_transcriber_tool,travily_api_search_tool]
|
| 164 |
|
| 165 |
# Pull a predefined prompt from LangChain Hub
|
| 166 |
# "hwchase17/react-chat" is a prompt template designed for ReAct-style conversational agents.
|
| 167 |
#prompt = hub.pull("hwchase17/react-chat")
|
| 168 |
-
|
| 169 |
prompt = PromptTemplate(
|
| 170 |
input_variables=["input", "agent_scratchpad", "chat_history", "tool_names"],
|
| 171 |
template="""
|
|
@@ -193,7 +194,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 193 |
|
| 194 |
|
| 195 |
IMPORTANT: When processing audio files (like .mp3) that have been saved using 'file_saver', the 'audio_transcriber_tool' MUST be used with the 'local_filename' of the saved audio file as its Action Input. Do NOT pass URLs or remote paths directly to 'audio_transcriber_tool'.
|
| 196 |
-
For any incoming image files (e.g., .jpg, .png), it's crucial to download and save them locally using the 'file_saver' tool. Once the image is saved, you should then
|
| 197 |
|
| 198 |
if you can use a LLM to answer the question, think step-by-step and then answer the question.
|
| 199 |
Example: given a chess board image and asked to predict the next best move, if Multi-modal LLM is available, you can use it to answer the question.
|
|
@@ -253,7 +254,111 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 253 |
{agent_scratchpad}
|
| 254 |
"""
|
| 255 |
)
|
| 256 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
summary_memory = ConversationSummaryMemory(llm=llm_client, memory_key="chat_history")
|
| 258 |
'''summary_memory = ConversationSummaryBufferMemory(llm=llm_client, memory_key="chat_history",
|
| 259 |
max_token_limit=4000) # Adjust this value based on your observations and model's context window'''
|
|
@@ -335,9 +440,9 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 335 |
#"7bd855d8-463d-4ed5-93ca-5fe35145f733",
|
| 336 |
"cca530fc-4052-43b2-b130-b30968d8aa44",
|
| 337 |
#"1f975693-876d-457b-a649-393859e79bf3",
|
| 338 |
-
"99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3",
|
| 339 |
#"4fc2f1ae-8625-45b5-ab34-ad4433bc21f8",
|
| 340 |
-
"8e867cd7-cff9-4e6c-867a-ff5ddc2550be",
|
| 341 |
}
|
| 342 |
if task_id not in allowed_ids:
|
| 343 |
continue
|
|
|
|
| 22 |
from openai import OpenAI
|
| 23 |
|
| 24 |
# tools imported from helper.py
|
| 25 |
+
from helper import repl_tool, get_travily_api_search_tool,audio_transcriber_tool,wikipedia_search_tool,file_saver_tool,wikipedia_full_content_tool,serpapi_Google_Search_tool, gemini_multimodal_tool
|
| 26 |
+
|
| 27 |
|
| 28 |
|
| 29 |
|
|
|
|
| 161 |
|
| 162 |
travily_api_search_tool = get_travily_api_search_tool(tavily_api_key)
|
| 163 |
#tools = [travily_api_search_tool, repl_tool, file_saver_tool,audio_transcriber_tool,wikipedia_search_tool,wikipedia_full_content_tool]
|
| 164 |
+
tools = [ repl_tool, file_saver_tool,audio_transcriber_tool,travily_api_search_tool, gemini_multimodal_tool]
|
| 165 |
|
| 166 |
# Pull a predefined prompt from LangChain Hub
|
| 167 |
# "hwchase17/react-chat" is a prompt template designed for ReAct-style conversational agents.
|
| 168 |
#prompt = hub.pull("hwchase17/react-chat")
|
| 169 |
+
'''
|
| 170 |
prompt = PromptTemplate(
|
| 171 |
input_variables=["input", "agent_scratchpad", "chat_history", "tool_names"],
|
| 172 |
template="""
|
|
|
|
| 194 |
|
| 195 |
|
| 196 |
IMPORTANT: When processing audio files (like .mp3) that have been saved using 'file_saver', the 'audio_transcriber_tool' MUST be used with the 'local_filename' of the saved audio file as its Action Input. Do NOT pass URLs or remote paths directly to 'audio_transcriber_tool'.
|
| 197 |
+
For any incoming image files (e.g., .jpg, .png), it's crucial to download and save them locally using the 'file_saver' tool. Once the image is saved, you should then decide whether to utilize other available tools or your Multimodal LLM to formulate a response. If you have sufficient information and can provide a CONCISE response, or if no tool is needed, you MUST use this precise format:
|
| 198 |
|
| 199 |
if you can use a LLM to answer the question, think step-by-step and then answer the question.
|
| 200 |
Example: given a chess board image and asked to predict the next best move, if Multi-modal LLM is available, you can use it to answer the question.
|
|
|
|
| 254 |
{agent_scratchpad}
|
| 255 |
"""
|
| 256 |
)
|
| 257 |
+
'''
|
| 258 |
+
|
| 259 |
+
prompt = PromptTemplate(
|
| 260 |
+
input_variables=["input", "agent_scratchpad", "chat_history", "tool_names"],
|
| 261 |
+
template="""
|
| 262 |
+
You are a smart and helpful AI Agent/Assistant. You are allowed and encouraged to use one or more tools as needed to answer complex questions and perform tasks.
|
| 263 |
+
It is CRUCIAL that you ALWAYS follow the exact format below. Do not deviate.
|
| 264 |
+
NOTE: it is MANDATORY for you to be precise and concise in your response. Respond directly with ONLY the answer, without any introductory phrases or additional details.
|
| 265 |
+
For example, if asked for the number of letters in the English alphabet, respond with '26'. Do NOT say "The number of letters is 26."
|
| 266 |
+
|
| 267 |
+
You have access to the following tools:
|
| 268 |
+
{tools}
|
| 269 |
+
|
| 270 |
+
To use a tool, you MUST follow this precise format:
|
| 271 |
+
|
| 272 |
+
Thought: I need to use a tool to find the answer.
|
| 273 |
+
Action: [tool_name] # This will be one of [{tool_names}]
|
| 274 |
+
Action Input: [input_for_the_tool]
|
| 275 |
+
Observation: [result_from_the_tool]
|
| 276 |
+
|
| 277 |
+
IMPORTANT NOTE ON TOOL USAGE:
|
| 278 |
+
- If an 'Observation' from a tool does NOT directly contain the specific answer to your question, you MUST refine your query or switch to a different, more suitable tool (e.g., 'tavily_search' for broader or more current information if 'wikipedia_search_tool' was insufficient). Do NOT get stuck repeatedly using the same tool if it's not yielding the direct answer.
|
| 279 |
+
- If the input contains the exact phrase "Attachment '{{file_name}}' available at: {{attachment_url}}" (where '{{file_name}}' and '{{attachment_url}}' are placeholders for actual values), consider the file type:
|
| 280 |
+
- If the file type is binary/text (e.g., .xlsx, .docx, .mp3, .jpg, .pdf,.png), you MUST use the 'file_saver' tool to download and save it.
|
| 281 |
+
For 'file_saver', the Action Input must be a JSON string like: '{{"url": "the_attachment_url", "local_filename": "the_file_name_from_attachment"}}'
|
| 282 |
+
example: for input, Attachment '1f975693-876d-457b-a649-393859e79bf3.mp3' available at EXACT URL: https://agents-course-unit4-scoring.hf.space/files/1f975693-876d-457b-a649-393859e79bf3, Action Input for file_saver would be '{{"url": "https://agents-course-unit4-scoring.hf.space/files/1f975693-876d-457b-a649-393859e79bf3", "local_filename": "1f975693-876d-457b-a649-393859e79bf3.mp3"}}'
|
| 283 |
+
|
| 284 |
+
IMPORTANT: When processing audio files (like .mp3) that have been saved using 'file_saver', the 'audio_transcriber_tool' MUST be used with the 'local_filename' of the saved audio file as its Action Input. Do NOT pass URLs or remote paths directly to 'audio_transcriber_tool'.
|
| 285 |
+
|
| 286 |
+
**For image files (like .jpg, .png) that have been saved using 'file_saver', the 'gemini_multimodal_tool' MUST be used to analyze their content and answer questions based on the image. The Action Input for 'gemini_multimodal_tool' must be a JSON string like: '{{"image_path": "the_local_filename", "question": "the_user_question"}}'**
|
| 287 |
+
|
| 288 |
+
If you have sufficient information and can provide a CONCISE response, or if no tool is needed, you MUST use this precise format:
|
| 289 |
+
|
| 290 |
+
if you can use a LLM to answer the question, think step-by-step and then answer the question.
|
| 291 |
+
Example: given a chess board image and asked to predict the next best move, if Multi-modal LLM is available, you can use it to answer the question.
|
| 292 |
+
|
| 293 |
+
Thought: I have enough information, or no tool is needed.
|
| 294 |
+
Final Answer: [your concise/short response here]
|
| 295 |
+
|
| 296 |
+
NOTE: it is MANDATORY for you to be precise and concise in your response. Respond directly with ONLY the answer, without any introductory phrases or additional details.
|
| 297 |
+
For example, if asked for the number of letters in the English alphabet, respond with '26'. Do NOT say "The number of letters is 26."
|
| 298 |
+
VERY IMPORTANT: Your response MUST always start with 'Thought:'.
|
| 299 |
+
|
| 300 |
+
Here are some examples of how you should respond:
|
| 301 |
+
|
| 302 |
+
Example 1:
|
| 303 |
+
Question: What is the capital of France?
|
| 304 |
+
Thought: I need to use a tool to find the capital of France.
|
| 305 |
+
Action: tavily_search
|
| 306 |
+
Action Input: capital of France
|
| 307 |
+
Observation: The capital of France is Paris.
|
| 308 |
+
Thought: I have found the answer.
|
| 309 |
+
Final Answer: Paris
|
| 310 |
+
|
| 311 |
+
Example 2:
|
| 312 |
+
Question: What is 2 + 2?
|
| 313 |
+
Thought: This is a simple arithmetic question, no tool is needed.
|
| 314 |
+
Final Answer: 4
|
| 315 |
+
|
| 316 |
+
Example 3:
|
| 317 |
+
Question: How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.
|
| 318 |
+
Thought: The user is asking for specific information about discography, which might be found with a search tool. The `serpapi_Google Search_tool` can fetch detailed sections. After getting the content, I will need to parse it using `python_repl` to count the albums within the specified years.
|
| 319 |
+
Action: serpapi_Google Search
|
| 320 |
+
Action Input: Mercedes Sosa discography
|
| 321 |
+
Observation: [Discography text content from search result]
|
| 322 |
+
Thought: I have retrieved discography text. Now I need to parse this text to identify and count studio albums released between 2000 and 2009. I will use the `python_repl` tool for this.
|
| 323 |
+
Action: python_repl
|
| 324 |
+
Action Input:
|
| 325 |
+
```python
|
| 326 |
+
import re
|
| 327 |
+
text = "[Discography text content from previous observation]" # Replace with actual text
|
| 328 |
+
albums_2000_2009 = []
|
| 329 |
+
pattern = r"\((\d{{4}})\)\s*(.*?)(?:\[|\n|$)" # Ensures year is captured. Double braces {{}} to escape regex literal braces
|
| 330 |
+
for match in re.finditer(pattern, text):
|
| 331 |
+
year = int(match.group(1))
|
| 332 |
+
if 2000 <= year <= 2009:
|
| 333 |
+
albums_2000_2009.append(match.group(2).strip())
|
| 334 |
+
print(len(albums_2000_2009))
|
| 335 |
+
```
|
| 336 |
+
Observation: 3
|
| 337 |
+
Thought: I have parsed the discography and counted the albums. I have found the answer.
|
| 338 |
+
Final Answer: 3
|
| 339 |
+
|
| 340 |
+
**Example 4: (Crucial new example for image processing)**
|
| 341 |
+
Question: What is the next best move in this chess position? Attachment 'chess_board.png' available at EXACT URL: https://agents-course-unit4-scoring.hf.space/files/cca530fc-4052-43b2-b130-b30968d8aa44
|
| 342 |
+
Thought: The user is asking a question about a chess position and has provided an image. I need to first save the image locally using the 'file_saver' tool, and then use the 'gemini_multimodal_tool' to analyze the image and answer the question.
|
| 343 |
+
Action: file_saver
|
| 344 |
+
Action Input: {{"url": "https://agents-course-unit4-scoring.hf.space/files/cca530fc-4052-43b2-b130-b30968d8aa44", "local_filename": "cca530fc-4052-43b2-b130-b30968d8aa44.png"}}
|
| 345 |
+
Observation: File downloaded successfully to cca530fc-4052-43b2-b130-b30968d8aa44.png
|
| 346 |
+
Thought: The image has been successfully downloaded. Now I need to analyze its content to determine the next best chess move using the 'gemini_multimodal_tool'.
|
| 347 |
+
Action: gemini_multimodal_tool
|
| 348 |
+
Action Input: {{"image_path": "cca530fc-4052-43b2-b130-b30968d8aa44.png", "question": "What is the next best move in this chess position?"}}
|
| 349 |
+
Observation: The next best move is e4.
|
| 350 |
+
Thought: I have used the 'gemini_multimodal_tool' to get the best move based on the image.
|
| 351 |
+
Final Answer: e4
|
| 352 |
+
|
| 353 |
+
---
|
| 354 |
+
Previous conversation history:
|
| 355 |
+
{chat_history}
|
| 356 |
+
|
| 357 |
+
New input: {input}
|
| 358 |
+
---
|
| 359 |
+
{agent_scratchpad}
|
| 360 |
+
"""
|
| 361 |
+
)
|
| 362 |
summary_memory = ConversationSummaryMemory(llm=llm_client, memory_key="chat_history")
|
| 363 |
'''summary_memory = ConversationSummaryBufferMemory(llm=llm_client, memory_key="chat_history",
|
| 364 |
max_token_limit=4000) # Adjust this value based on your observations and model's context window'''
|
|
|
|
| 440 |
#"7bd855d8-463d-4ed5-93ca-5fe35145f733",
|
| 441 |
"cca530fc-4052-43b2-b130-b30968d8aa44",
|
| 442 |
#"1f975693-876d-457b-a649-393859e79bf3",
|
| 443 |
+
#"99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3",
|
| 444 |
#"4fc2f1ae-8625-45b5-ab34-ad4433bc21f8",
|
| 445 |
+
#"8e867cd7-cff9-4e6c-867a-ff5ddc2550be",
|
| 446 |
}
|
| 447 |
if task_id not in allowed_ids:
|
| 448 |
continue
|