Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,6 +4,7 @@ import gradio as gr
|
|
| 4 |
import inspect
|
| 5 |
import pandas as pd
|
| 6 |
import time
|
|
|
|
| 7 |
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 8 |
from langchain_community.tools import TavilySearchResults
|
| 9 |
from langchain import hub # Used to pull predefined prompts from LangChain Hub
|
|
@@ -22,7 +23,7 @@ from langchain_openai import ChatOpenAI
|
|
| 22 |
from openai import OpenAI
|
| 23 |
|
| 24 |
# tools imported from helper.py
|
| 25 |
-
from helper import repl_tool, get_travily_api_search_tool,audio_transcriber_tool,wikipedia_search_tool,file_saver_tool,wikipedia_full_content_tool,serpapi_Google_Search_tool
|
| 26 |
|
| 27 |
|
| 28 |
|
|
@@ -102,87 +103,6 @@ class BasicAgent:
|
|
| 102 |
return self.invoke_with_retry(question)
|
| 103 |
|
| 104 |
|
| 105 |
-
import base64
|
| 106 |
-
from langchain.tools import Tool
|
| 107 |
-
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 108 |
-
from langchain_core.messages import HumanMessage
|
| 109 |
-
import os
|
| 110 |
-
|
| 111 |
-
def analyze_image_with_gemini(args: dict) -> str:
|
| 112 |
-
"""
|
| 113 |
-
Analyzes an image using Google's Gemini Multimodal LLM to answer a given question.
|
| 114 |
-
This tool is designed for tasks requiring visual understanding, such as
|
| 115 |
-
describing image content, identifying objects, or answering questions about
|
| 116 |
-
information presented visually (e.g., charts, diagrams, chess boards).
|
| 117 |
-
|
| 118 |
-
**Input Format (CRITICAL):**
|
| 119 |
-
The input MUST be a JSON string with 'image_path' and 'question' keys.
|
| 120 |
-
- 'image_path': The local file path to the image (e.g., 'path/to/my_image.png').
|
| 121 |
-
This image MUST have been previously downloaded and saved locally using the 'file_saver' tool.
|
| 122 |
-
- 'question': The question to answer based on the image content.
|
| 123 |
-
|
| 124 |
-
Example: '{"image_path": "downloaded_image.png", "question": "What is depicted in this image?"}'
|
| 125 |
-
Example: '{"image_path": "chess_board.jpg", "question": "What is the next best move in this chess position?"}'
|
| 126 |
-
|
| 127 |
-
**DO NOT:**
|
| 128 |
-
- Pass URLs directly to this tool; always use 'file_saver' first.
|
| 129 |
-
- Ask questions unrelated to the image content.
|
| 130 |
-
- Expect real-time actions or external website access.
|
| 131 |
-
|
| 132 |
-
**Output:**
|
| 133 |
-
The tool returns the answer generated by the Gemini Multimodal LLM based on the image and question.
|
| 134 |
-
Returns an informative error message if the image file is not found,
|
| 135 |
-
the API key is missing, or the LLM encounters an issue.
|
| 136 |
-
"""
|
| 137 |
-
try:
|
| 138 |
-
# Ensure the input is parsed if it comes as a string (common from LLMs)
|
| 139 |
-
if isinstance(args, str):
|
| 140 |
-
import json
|
| 141 |
-
args = json.loads(args)
|
| 142 |
-
|
| 143 |
-
image_path = args.get("image_path")
|
| 144 |
-
question = args.get("question")
|
| 145 |
-
|
| 146 |
-
if not image_path or not question:
|
| 147 |
-
return "Error: Both 'image_path' and 'question' must be provided."
|
| 148 |
-
|
| 149 |
-
if not os.path.exists(image_path):
|
| 150 |
-
return f"Error: Local image file not found at '{image_path}'. Did you save it with 'file_saver'?"
|
| 151 |
-
|
| 152 |
-
google_api_key = os.getenv("GOOGLE_API_KEY")
|
| 153 |
-
|
| 154 |
-
if not google_api_key:
|
| 155 |
-
return "Error: GOOGLE_API_KEY not found in environment variables for multimodal tool."
|
| 156 |
-
|
| 157 |
-
# Initialize the multimodal LLM (Gemini-Pro-Vision is recommended for image understanding)
|
| 158 |
-
# Using a fallback to 'gemini-pro' if 'gemini-pro-vision' isn't directly available or preferred
|
| 159 |
-
llm = ChatGoogleGenerativeAI(
|
| 160 |
-
#model="gemini-pro-vision" if "gemini-pro-vision" in ChatGoogleGenerativeAI.get_available_models(google_api_key) else "gemini-2.0-flash",
|
| 161 |
-
model="gemini-2.0-flash",
|
| 162 |
-
google_api_key=google_api_key,
|
| 163 |
-
temperature=0.0 # Set temperature to 0 for more factual/deterministic responses
|
| 164 |
-
)
|
| 165 |
-
|
| 166 |
-
# Load the image as base64 for multimodal input
|
| 167 |
-
with open(image_path, "rb") as f:
|
| 168 |
-
image_bytes = f.read()
|
| 169 |
-
# Encode image to base64
|
| 170 |
-
image_base64 = base64.b64encode(image_bytes).decode('utf-8')
|
| 171 |
-
|
| 172 |
-
# Create a multimodal message for the LLM
|
| 173 |
-
message = HumanMessage(
|
| 174 |
-
content=[
|
| 175 |
-
{"type": "text", "text": question},
|
| 176 |
-
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
|
| 177 |
-
]
|
| 178 |
-
)
|
| 179 |
-
|
| 180 |
-
# Invoke the LLM
|
| 181 |
-
response = llm.invoke([message])
|
| 182 |
-
return response.content
|
| 183 |
-
|
| 184 |
-
except Exception as e:
|
| 185 |
-
return f"Error in gemini_multimodal_tool: {e}"
|
| 186 |
|
| 187 |
|
| 188 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
@@ -217,12 +137,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 217 |
print(f"Using OpenAI API key: {openai_api_key[:4]}... (truncated for security)")
|
| 218 |
|
| 219 |
|
| 220 |
-
|
| 221 |
-
gemini_multimodal_tool = Tool(
|
| 222 |
-
name="gemini_multimodal_tool",
|
| 223 |
-
description=analyze_image_with_gemini.__doc__, # Use the docstring as description
|
| 224 |
-
func=analyze_image_with_gemini,
|
| 225 |
-
)
|
| 226 |
#NMODEL
|
| 227 |
#'''
|
| 228 |
llm_client = ChatGoogleGenerativeAI(
|
|
@@ -256,95 +171,6 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 256 |
# Pull a predefined prompt from LangChain Hub
|
| 257 |
# "hwchase17/react-chat" is a prompt template designed for ReAct-style conversational agents.
|
| 258 |
#prompt = hub.pull("hwchase17/react-chat")
|
| 259 |
-
'''
|
| 260 |
-
prompt = PromptTemplate(
|
| 261 |
-
input_variables=["input", "agent_scratchpad", "chat_history", "tool_names"],
|
| 262 |
-
template="""
|
| 263 |
-
You are a smart and helpful AI Agent/Assistant. You are allowed and encouraged to use one or more tools as needed to answer complex questions and perform tasks.
|
| 264 |
-
It is CRUCIAL that you ALWAYS follow the exact format below. Do not deviate.
|
| 265 |
-
NOTE: it is MANDATORY for you to be precise and concise in your response. Respond directly with ONLY the answer, without any introductory phrases or additional details.
|
| 266 |
-
For example, if asked for the number of letters in the English alphabet, respond with '26'. Do NOT say "The number of letters is 26."
|
| 267 |
-
|
| 268 |
-
You have access to the following tools:
|
| 269 |
-
{tools}
|
| 270 |
-
|
| 271 |
-
To use a tool, you MUST follow this precise format:
|
| 272 |
-
|
| 273 |
-
Thought: I need to use a tool to find the answer.
|
| 274 |
-
Action: [tool_name] # This will be one of [{tool_names}]
|
| 275 |
-
Action Input: [input_for_the_tool]
|
| 276 |
-
Observation: [result_from_the_tool]
|
| 277 |
-
|
| 278 |
-
IMPORTANT NOTE ON TOOL USAGE:
|
| 279 |
-
- If an 'Observation' from a tool does NOT directly contain the specific answer to your question, you MUST refine your query or switch to a different, more suitable tool (e.g., 'tavily_search' for broader or more current information if 'wikipedia_search_tool' was insufficient). Do NOT get stuck repeatedly using the same tool if it's not yielding the direct answer.
|
| 280 |
-
- If the input contains the exact phrase "Attachment '{{file_name}}' available at: {{attachment_url}}" (where '{{file_name}}' and '{{attachment_url}}' are placeholders for actual values), consider the file type:
|
| 281 |
-
- If the file type is binary/text (e.g., .xlsx, .docx, .mp3, .jpg, .pdf,.png), you MUST use the 'file_saver' tool to download and save it.
|
| 282 |
-
For 'file_saver', the Action Input must be a JSON string like: '{{"url": "the_attachment_url", "local_filename": "the_file_name_from_attachment"}}'
|
| 283 |
-
example: for input, Attachment '1f975693-876d-457b-a649-393859e79bf3.mp3' available at EXACT URL: https://agents-course-unit4-scoring.hf.space/files/1f975693-876d-457b-a649-393859e79bf3, Action Input for file_saver would be '{{"url": "https://agents-course-unit4-scoring.hf.space/files/1f975693-876d-457b-a649-393859e79bf3", "local_filename": "1f975693-876d-457b-a649-393859e79bf3.mp3"}}'
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
IMPORTANT: When processing audio files (like .mp3) that have been saved using 'file_saver', the 'audio_transcriber_tool' MUST be used with the 'local_filename' of the saved audio file as its Action Input. Do NOT pass URLs or remote paths directly to 'audio_transcriber_tool'.
|
| 287 |
-
For any incoming image files (e.g., .jpg, .png), it's crucial to download and save them locally using the 'file_saver' tool. Once the image is saved, you should then decide whether to utilize other available tools or your Multimodal LLM to formulate a response. If you have sufficient information and can provide a CONCISE response, or if no tool is needed, you MUST use this precise format:
|
| 288 |
-
|
| 289 |
-
if you can use a LLM to answer the question, think step-by-step and then answer the question.
|
| 290 |
-
Example: given a chess board image and asked to predict the next best move, if Multi-modal LLM is available, you can use it to answer the question.
|
| 291 |
-
|
| 292 |
-
Thought: I have enough information, or no tool is needed.
|
| 293 |
-
Final Answer: [your concise/short response here]
|
| 294 |
-
|
| 295 |
-
NOTE: it is MANDATORY for you to be precise and concise in your response. Respond directly with ONLY the answer, without any introductory phrases or additional details.
|
| 296 |
-
For example, if asked for the number of letters in the English alphabet, respond with '26'. Do NOT say "The number of letters is 26."
|
| 297 |
-
VERY IMPORTANT: Your response MUST always start with 'Thought:'.
|
| 298 |
-
|
| 299 |
-
Here are some examples of how you should respond:
|
| 300 |
-
|
| 301 |
-
Example 1:
|
| 302 |
-
Question: What is the capital of France?
|
| 303 |
-
Thought: I need to use a tool to find the capital of France.
|
| 304 |
-
Action: tavily_search_results
|
| 305 |
-
Action Input: capital of France
|
| 306 |
-
Observation: The capital of France is Paris.
|
| 307 |
-
Thought: I have found the answer.
|
| 308 |
-
Final Answer: Paris
|
| 309 |
-
|
| 310 |
-
Example 2:
|
| 311 |
-
Question: What is 2 + 2?
|
| 312 |
-
Thought: This is a simple arithmetic question, no tool is needed.
|
| 313 |
-
Final Answer: 4
|
| 314 |
-
|
| 315 |
-
Example 3:
|
| 316 |
-
Question: How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.
|
| 317 |
-
Thought: The user is asking for specific information from Wikipedia, likely requiring a list or discography. The `travily_api_search_tool` is best for this to get the detailed section. After getting the content, I will need to parse it using `python_repl` to count the albums within the specified years.
|
| 318 |
-
Action: serpapi_Google Search
|
| 319 |
-
Action Input: Mercedes Sosa section: Discography
|
| 320 |
-
Observation: [Discography text content]
|
| 321 |
-
Thought: I have retrieved the discography text. Now I need to parse this text to identify and count studio albums released between 2000 and 2009. I will use the `python_repl` tool for this.
|
| 322 |
-
Action: python_repl
|
| 323 |
-
Action Input:
|
| 324 |
-
```python
|
| 325 |
-
import re
|
| 326 |
-
text = "[Discography text content from previous observation]" # Replace with actual text
|
| 327 |
-
albums_2000_2009 = []
|
| 328 |
-
# This is a simplified regex example; actual parsing might be more complex depending on text format
|
| 329 |
-
pattern = r"\((\d{{4}})\s*(.*?)(?:\[|\n|$)"
|
| 330 |
-
for match in re.finditer(pattern, text):
|
| 331 |
-
year = int(match.group(1))
|
| 332 |
-
if 2000 <= year <= 2009:
|
| 333 |
-
albums_2000_2009.append(match.group(2).strip())
|
| 334 |
-
print(len(albums_2000_2009))
|
| 335 |
-
Observation: 3
|
| 336 |
-
Thought: I have parsed the discography and counted the albums. I have found the answer.
|
| 337 |
-
Final Answer: 3
|
| 338 |
-
---
|
| 339 |
-
Previous conversation history:
|
| 340 |
-
{chat_history}
|
| 341 |
-
|
| 342 |
-
New input: {input}
|
| 343 |
-
---
|
| 344 |
-
{agent_scratchpad}
|
| 345 |
-
"""
|
| 346 |
-
)
|
| 347 |
-
'''
|
| 348 |
|
| 349 |
prompt = PromptTemplate(
|
| 350 |
input_variables=["input", "agent_scratchpad", "chat_history", "tool_names"],
|
|
@@ -526,18 +352,20 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 526 |
full_question_for_agent += f"\n\nAttachment '{file_name}' available at EXACT URL: {attachment_url}"
|
| 527 |
print(f"Running agent on task {task_id}: {full_question_for_agent}",flush=True)
|
| 528 |
|
| 529 |
-
|
| 530 |
allowed_ids = {
|
| 531 |
-
"7bd855d8-463d-4ed5-93ca-5fe35145f733",
|
| 532 |
"cca530fc-4052-43b2-b130-b30968d8aa44",
|
| 533 |
#"1f975693-876d-457b-a649-393859e79bf3",
|
| 534 |
#"99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3",
|
| 535 |
#"4fc2f1ae-8625-45b5-ab34-ad4433bc21f8",
|
| 536 |
#"8e867cd7-cff9-4e6c-867a-ff5ddc2550be",
|
|
|
|
|
|
|
| 537 |
}
|
| 538 |
if task_id not in allowed_ids:
|
| 539 |
continue
|
| 540 |
-
|
| 541 |
|
| 542 |
try:
|
| 543 |
submitted_answer = agent(full_question_for_agent)
|
|
@@ -571,8 +399,17 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 571 |
f"Message: {result_data.get('message', 'No message received.')}"
|
| 572 |
)
|
| 573 |
print("Submission successful.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 574 |
results_df = pd.DataFrame(results_log)
|
| 575 |
-
return
|
| 576 |
except requests.exceptions.HTTPError as e:
|
| 577 |
error_detail = f"Server responded with status {e.response.status_code}."
|
| 578 |
try:
|
|
|
|
| 4 |
import inspect
|
| 5 |
import pandas as pd
|
| 6 |
import time
|
| 7 |
+
import re
|
| 8 |
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 9 |
from langchain_community.tools import TavilySearchResults
|
| 10 |
from langchain import hub # Used to pull predefined prompts from LangChain Hub
|
|
|
|
| 23 |
from openai import OpenAI
|
| 24 |
|
| 25 |
# tools imported from helper.py
|
| 26 |
+
from helper import repl_tool, get_travily_api_search_tool,audio_transcriber_tool,wikipedia_search_tool,file_saver_tool,wikipedia_full_content_tool,serpapi_Google_Search_tool,gemini_multimodal_tool
|
| 27 |
|
| 28 |
|
| 29 |
|
|
|
|
| 103 |
return self.invoke_with_retry(question)
|
| 104 |
|
| 105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
|
| 107 |
|
| 108 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
|
|
| 137 |
print(f"Using OpenAI API key: {openai_api_key[:4]}... (truncated for security)")
|
| 138 |
|
| 139 |
|
| 140 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
#NMODEL
|
| 142 |
#'''
|
| 143 |
llm_client = ChatGoogleGenerativeAI(
|
|
|
|
| 171 |
# Pull a predefined prompt from LangChain Hub
|
| 172 |
# "hwchase17/react-chat" is a prompt template designed for ReAct-style conversational agents.
|
| 173 |
#prompt = hub.pull("hwchase17/react-chat")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
|
| 175 |
prompt = PromptTemplate(
|
| 176 |
input_variables=["input", "agent_scratchpad", "chat_history", "tool_names"],
|
|
|
|
| 352 |
full_question_for_agent += f"\n\nAttachment '{file_name}' available at EXACT URL: {attachment_url}"
|
| 353 |
print(f"Running agent on task {task_id}: {full_question_for_agent}",flush=True)
|
| 354 |
|
| 355 |
+
|
| 356 |
allowed_ids = {
|
| 357 |
+
#"7bd855d8-463d-4ed5-93ca-5fe35145f733",
|
| 358 |
"cca530fc-4052-43b2-b130-b30968d8aa44",
|
| 359 |
#"1f975693-876d-457b-a649-393859e79bf3",
|
| 360 |
#"99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3",
|
| 361 |
#"4fc2f1ae-8625-45b5-ab34-ad4433bc21f8",
|
| 362 |
#"8e867cd7-cff9-4e6c-867a-ff5ddc2550be",
|
| 363 |
+
"a1e91b78-d3d8-4675-bb8d-62741b4b68a6",
|
| 364 |
+
"3f57289b-8c60-48be-bd80-01f8099ca449",
|
| 365 |
}
|
| 366 |
if task_id not in allowed_ids:
|
| 367 |
continue
|
| 368 |
+
|
| 369 |
|
| 370 |
try:
|
| 371 |
submitted_answer = agent(full_question_for_agent)
|
|
|
|
| 399 |
f"Message: {result_data.get('message', 'No message received.')}"
|
| 400 |
)
|
| 401 |
print("Submission successful.")
|
| 402 |
+
# Step 1: Remove common problematic characters (like null bytes, non-breaking spaces, etc.)
|
| 403 |
+
# This regex removes characters that are not printable ASCII.
|
| 404 |
+
# \x20-\x7E covers space through tilde (~)
|
| 405 |
+
# \n\r\t covers newlines and tabs
|
| 406 |
+
# You might need to adjust this regex based on what 'wonky chars' you specifically observe.
|
| 407 |
+
cleaned_final_status = re.sub(r'[^\x20-\x7E\n\r\t]+', '', final_status)
|
| 408 |
+
|
| 409 |
+
# Step 2: Strip leading/trailing whitespace (including newlines from formatting)
|
| 410 |
+
cleaned_final_status = cleaned_final_status.strip()
|
| 411 |
results_df = pd.DataFrame(results_log)
|
| 412 |
+
return cleaned_final_status, results_df
|
| 413 |
except requests.exceptions.HTTPError as e:
|
| 414 |
error_detail = f"Server responded with status {e.response.status_code}."
|
| 415 |
try:
|