Update app.py
Browse files
app.py
CHANGED
|
@@ -4,6 +4,7 @@ import requests
|
|
| 4 |
import pandas as pd
|
| 5 |
import traceback
|
| 6 |
import time
|
|
|
|
| 7 |
|
| 8 |
# Import smol-agent and tool components
|
| 9 |
from smolagents import CodeAgent, LiteLLMModel, tool
|
|
@@ -13,39 +14,62 @@ from unstructured.partition.auto import partition
|
|
| 13 |
# --- Constants ---
|
| 14 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 15 |
|
| 16 |
-
# --- Tool Definition ---
|
| 17 |
@tool
|
| 18 |
def file_reader(file_path: str) -> str:
|
| 19 |
-
"""
|
| 20 |
-
|
| 21 |
-
This tool supports various file types
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
Args:
|
| 25 |
file_path (str): The local path or web URL of the file to be read.
|
| 26 |
"""
|
|
|
|
| 27 |
try:
|
|
|
|
| 28 |
if file_path.startswith("http://") or file_path.startswith("https://"):
|
|
|
|
| 29 |
response = requests.get(file_path, timeout=20)
|
| 30 |
response.raise_for_status()
|
| 31 |
-
with open(
|
| 32 |
f.write(response.content)
|
| 33 |
-
|
| 34 |
-
os.remove("temp_file") # Clean up
|
| 35 |
else:
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
return "\n\n".join([str(el) for el in elements])
|
| 38 |
except Exception as e:
|
|
|
|
|
|
|
|
|
|
| 39 |
return f"Error reading or processing file '{file_path}': {e}"
|
| 40 |
|
| 41 |
-
|
|
|
|
| 42 |
class GaiaSmolAgent:
|
| 43 |
def __init__(self):
|
| 44 |
"""
|
| 45 |
Initializes the optimized agent.
|
| 46 |
-
|
| 47 |
-
Optimization 2: Use a single, powerful agent with a detailed system prompt
|
| 48 |
-
to eliminate the slow two-step (plan -> execute) process.
|
| 49 |
"""
|
| 50 |
print("Initializing Optimized GaiaSmolAgent...")
|
| 51 |
api_key = os.getenv("GEMINI_API_KEY")
|
|
@@ -62,14 +86,14 @@ class GaiaSmolAgent:
|
|
| 62 |
|
| 63 |
# Store the sophisticated system prompt as an instance variable.
|
| 64 |
self.system_prompt = """
|
| 65 |
-
You are an expert-level research assistant AI. Your sole purpose is to answer the user's question by breaking it down into logical steps and using the provided tools.
|
| 66 |
|
| 67 |
**Available Tools:**
|
| 68 |
- `duck_duck_go_search(query: str) -> str`: Use this to find information, file URLs, or anything on the web.
|
| 69 |
-
- `file_reader(file_path: str) -> str`: Use this to read the contents of a file from a local path or a web URL.
|
| 70 |
|
| 71 |
**Your Thought Process:**
|
| 72 |
-
1. **Deconstruct the Goal:** Carefully analyze the question to understand what information is needed.
|
| 73 |
2. **Formulate a Plan:** Think step-by-step about which tools to use in what order. For example, you might need to search for a URL first, then read the content of that URL.
|
| 74 |
3. **Execute & Analyze:** Call the necessary tools. Carefully examine the output of each tool to extract the required facts. You can write Python code to process the data returned by the tools.
|
| 75 |
4. **Synthesize the Answer:** Once you have gathered sufficient information, formulate a final, concise answer to the original question.
|
|
@@ -81,25 +105,36 @@ class GaiaSmolAgent:
|
|
| 81 |
- Do not ask for clarification. Directly proceed to solve the problem.
|
| 82 |
"""
|
| 83 |
|
| 84 |
-
# Initialize the agent
|
| 85 |
self.agent = CodeAgent(
|
| 86 |
model=model,
|
| 87 |
tools=[file_reader, DuckDuckGoSearchTool()],
|
| 88 |
add_base_tools=True, # Provides the python interpreter and the final_answer function
|
|
|
|
| 89 |
)
|
| 90 |
-
|
|
|
|
| 91 |
|
| 92 |
-
def __call__(self, question: str) -> str:
|
| 93 |
"""
|
| 94 |
Directly runs the agent to generate and execute a plan to answer the question.
|
| 95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
"""
|
| 97 |
print(f"Optimized Agent received question: {question[:100]}...")
|
|
|
|
| 98 |
try:
|
| 99 |
-
# Combine the system prompt with the
|
| 100 |
-
full_prompt = f"{self.system_prompt}\n\nUser Question: \"{question}\""
|
| 101 |
-
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
| 103 |
except Exception as e:
|
| 104 |
print(f"FATAL AGENT ERROR: An exception occurred during agent execution: {e}")
|
| 105 |
print(traceback.format_exc()) # Print full traceback for easier debugging
|
|
|
|
| 4 |
import pandas as pd
|
| 5 |
import traceback
|
| 6 |
import time
|
| 7 |
+
import mimetypes
|
| 8 |
|
| 9 |
# Import smol-agent and tool components
|
| 10 |
from smolagents import CodeAgent, LiteLLMModel, tool
|
|
|
|
| 14 |
# --- Constants ---
|
| 15 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 16 |
|
| 17 |
+
# --- Tool Definition (Updated for Multimodality) ---
|
| 18 |
@tool
|
| 19 |
def file_reader(file_path: str) -> str:
|
| 20 |
+
"""
|
| 21 |
+
Reads the content of a file and returns its text content.
|
| 22 |
+
This tool supports various file types, including text (PDF, TXT, CSV)
|
| 23 |
+
and can perform Optical Character Recognition (OCR) on images (PNG, JPG).
|
| 24 |
+
It can be used with either a local path or a web URL.
|
| 25 |
+
For non-text/image formats like audio or video, it will return a message
|
| 26 |
+
indicating the file type, as it cannot analyze their content directly.
|
| 27 |
|
| 28 |
Args:
|
| 29 |
file_path (str): The local path or web URL of the file to be read.
|
| 30 |
"""
|
| 31 |
+
temp_file_path = None
|
| 32 |
try:
|
| 33 |
+
# Handle web URLs by downloading the file first
|
| 34 |
if file_path.startswith("http://") or file_path.startswith("https://"):
|
| 35 |
+
temp_file_path = "temp_downloaded_file"
|
| 36 |
response = requests.get(file_path, timeout=20)
|
| 37 |
response.raise_for_status()
|
| 38 |
+
with open(temp_file_path, "wb") as f:
|
| 39 |
f.write(response.content)
|
| 40 |
+
local_path = temp_file_path
|
|
|
|
| 41 |
else:
|
| 42 |
+
local_path = file_path
|
| 43 |
+
|
| 44 |
+
# Gracefully handle unsupported file types (e.g., audio, video)
|
| 45 |
+
mime_type, _ = mimetypes.guess_type(local_path)
|
| 46 |
+
if mime_type and not (mime_type.startswith('text/') or mime_type.startswith('image/') or mime_type == 'application/pdf' or mime_type == 'application/zip'):
|
| 47 |
+
if temp_file_path and os.path.exists(temp_file_path):
|
| 48 |
+
os.remove(temp_file_path)
|
| 49 |
+
return f"File is of a non-visual, non-text format ({mime_type}). Content analysis is not supported by this tool."
|
| 50 |
+
|
| 51 |
+
# Use 'unstructured' which has built-in OCR for images.
|
| 52 |
+
# This will extract text from images where possible.
|
| 53 |
+
elements = partition(local_path)
|
| 54 |
+
|
| 55 |
+
# Clean up the temporary file if it was created
|
| 56 |
+
if temp_file_path and os.path.exists(temp_file_path):
|
| 57 |
+
os.remove(temp_file_path)
|
| 58 |
+
|
| 59 |
return "\n\n".join([str(el) for el in elements])
|
| 60 |
except Exception as e:
|
| 61 |
+
# Ensure cleanup even if an error occurs
|
| 62 |
+
if temp_file_path and os.path.exists(temp_file_path):
|
| 63 |
+
os.remove(temp_file_path)
|
| 64 |
return f"Error reading or processing file '{file_path}': {e}"
|
| 65 |
|
| 66 |
+
|
| 67 |
+
# --- Agent Class (Updated with Native Memory Management) ---
|
| 68 |
class GaiaSmolAgent:
|
| 69 |
def __init__(self):
|
| 70 |
"""
|
| 71 |
Initializes the optimized agent.
|
| 72 |
+
Now uses the agent's native conversation memory capabilities.
|
|
|
|
|
|
|
| 73 |
"""
|
| 74 |
print("Initializing Optimized GaiaSmolAgent...")
|
| 75 |
api_key = os.getenv("GEMINI_API_KEY")
|
|
|
|
| 86 |
|
| 87 |
# Store the sophisticated system prompt as an instance variable.
|
| 88 |
self.system_prompt = """
|
| 89 |
+
You are an expert-level research assistant AI. Your sole purpose is to answer the user's question by breaking it down into logical steps and using the provided tools. You will have access to the conversation history, so use it for context.
|
| 90 |
|
| 91 |
**Available Tools:**
|
| 92 |
- `duck_duck_go_search(query: str) -> str`: Use this to find information, file URLs, or anything on the web.
|
| 93 |
+
- `file_reader(file_path: str) -> str`: Use this to read the contents of a file from a local path or a web URL. It can read text and extract text from images (OCR).
|
| 94 |
|
| 95 |
**Your Thought Process:**
|
| 96 |
+
1. **Deconstruct the Goal:** Carefully analyze the question to understand what information is needed, considering the previous turns in the conversation.
|
| 97 |
2. **Formulate a Plan:** Think step-by-step about which tools to use in what order. For example, you might need to search for a URL first, then read the content of that URL.
|
| 98 |
3. **Execute & Analyze:** Call the necessary tools. Carefully examine the output of each tool to extract the required facts. You can write Python code to process the data returned by the tools.
|
| 99 |
4. **Synthesize the Answer:** Once you have gathered sufficient information, formulate a final, concise answer to the original question.
|
|
|
|
| 105 |
- Do not ask for clarification. Directly proceed to solve the problem.
|
| 106 |
"""
|
| 107 |
|
| 108 |
+
# Initialize the agent with the updated file_reader tool and memory settings.
|
| 109 |
self.agent = CodeAgent(
|
| 110 |
model=model,
|
| 111 |
tools=[file_reader, DuckDuckGoSearchTool()],
|
| 112 |
add_base_tools=True, # Provides the python interpreter and the final_answer function
|
| 113 |
+
planning_interval=3 # Re-plan every 3 steps, considering memory.
|
| 114 |
)
|
| 115 |
+
|
| 116 |
+
print("Optimized GaiaSmolAgent initialized successfully with native memory and multimodal capabilities.")
|
| 117 |
|
| 118 |
+
def __call__(self, question: str, reset_memory: bool = False) -> str:
|
| 119 |
"""
|
| 120 |
Directly runs the agent to generate and execute a plan to answer the question.
|
| 121 |
+
It leverages the agent's built-in memory, controlled by the `reset` parameter.
|
| 122 |
+
|
| 123 |
+
Args:
|
| 124 |
+
question (str): The user's question.
|
| 125 |
+
reset_memory (bool): If True, the agent's conversation memory will be cleared
|
| 126 |
+
before running. Maps to the agent's `reset` parameter.
|
| 127 |
"""
|
| 128 |
print(f"Optimized Agent received question: {question[:100]}...")
|
| 129 |
+
|
| 130 |
try:
|
| 131 |
+
# Combine the system prompt with the current question. The agent will handle the history.
|
| 132 |
+
full_prompt = f"{self.system_prompt}\n\nCURRENT TASK:\nUser Question: \"{question}\""
|
| 133 |
+
|
| 134 |
+
# Use the agent's `reset` parameter to control conversation memory.
|
| 135 |
+
# `reset=False` keeps the memory from previous calls.
|
| 136 |
+
final_answer = self.agent.run(full_prompt, reset=reset_memory)
|
| 137 |
+
|
| 138 |
except Exception as e:
|
| 139 |
print(f"FATAL AGENT ERROR: An exception occurred during agent execution: {e}")
|
| 140 |
print(traceback.format_exc()) # Print full traceback for easier debugging
|