Update app.py
Browse files
app.py
CHANGED
|
@@ -37,22 +37,23 @@ def file_reader(file_path: str) -> str:
|
|
| 37 |
except Exception as e:
|
| 38 |
return f"Error reading or processing file '{file_path}': {e}"
|
| 39 |
|
| 40 |
-
# --- Agent Class (
|
| 41 |
class GaiaSmolAgent:
|
| 42 |
def __init__(self):
|
| 43 |
-
|
| 44 |
-
|
|
|
|
| 45 |
if not api_key:
|
| 46 |
-
raise ValueError("API key '
|
| 47 |
|
| 48 |
self.planner_model = LiteLLMModel(
|
| 49 |
-
|
|
|
|
| 50 |
api_key=api_key,
|
| 51 |
temperature=0.0,
|
| 52 |
)
|
| 53 |
|
| 54 |
# Initialize the agent with the tools it can use.
|
| 55 |
-
# The agent will make these available to the script it runs.
|
| 56 |
self.executor_agent = CodeAgent(
|
| 57 |
model=self.planner_model,
|
| 58 |
tools=[file_reader, DuckDuckGoSearchTool()],
|
|
@@ -64,7 +65,6 @@ class GaiaSmolAgent:
|
|
| 64 |
"""Generates a self-contained Python script to answer the question."""
|
| 65 |
print(f"Generating script for question: {question[:100]}...")
|
| 66 |
|
| 67 |
-
# This new prompt asks for a single, complete script.
|
| 68 |
prompt = f"""
|
| 69 |
You are an expert Python programmer. Your task is to write a single, self-contained Python script to answer the user's question.
|
| 70 |
|
|
@@ -89,11 +89,9 @@ class GaiaSmolAgent:
|
|
| 89 |
|
| 90 |
Now, write the Python script to answer the user's question.
|
| 91 |
"""
|
| 92 |
-
# The generate method expects a list of message dictionaries, not a raw string.
|
| 93 |
messages = [{"role": "user", "content": prompt}]
|
| 94 |
response = self.planner_model.generate(messages)
|
| 95 |
|
| 96 |
-
# Clean up the response from the LLM, which sometimes wraps it in markdown
|
| 97 |
if "```python" in response:
|
| 98 |
response = response.split("```python")[1].split("```")[0].strip()
|
| 99 |
|
|
@@ -105,12 +103,7 @@ class GaiaSmolAgent:
|
|
| 105 |
print(f"Agent received question: {question[:100]}...")
|
| 106 |
|
| 107 |
try:
|
| 108 |
-
# Step 1: Generate a single, complete script
|
| 109 |
script_to_execute = self._generate_script(question)
|
| 110 |
-
|
| 111 |
-
# Step 2: Execute the entire script in one go.
|
| 112 |
-
# The agent will match the function calls in the script (e.g., duck_duck_go_search)
|
| 113 |
-
# to the tools it was initialized with.
|
| 114 |
final_answer = self.executor_agent.run(script_to_execute)
|
| 115 |
|
| 116 |
except Exception as e:
|
|
@@ -167,7 +160,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 167 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
| 168 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
| 169 |
except Exception as e:
|
| 170 |
-
# This catches errors in the __call__ method itself
|
| 171 |
error_message = f"AGENT ERROR: {e}"
|
| 172 |
print(f"Error running agent on task {task_id}: {e}")
|
| 173 |
print(traceback.format_exc())
|
|
@@ -193,13 +185,13 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 193 |
except Exception as e:
|
| 194 |
return f"Submission Failed: {e}", pd.DataFrame(results_log)
|
| 195 |
|
| 196 |
-
# --- Gradio Interface (
|
| 197 |
with gr.Blocks() as demo:
|
| 198 |
gr.Markdown("# GAIA Agent Evaluation Runner (smol-agent)")
|
| 199 |
gr.Markdown(
|
| 200 |
"""
|
| 201 |
**Instructions:**
|
| 202 |
-
1. Ensure you have added your **
|
| 203 |
2. Log in to your Hugging Face account using the button below.
|
| 204 |
3. Click 'Run Evaluation & Submit All Answers' to run your agent and see the score.
|
| 205 |
"""
|
|
@@ -209,7 +201,6 @@ with gr.Blocks() as demo:
|
|
| 209 |
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
|
| 210 |
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
| 211 |
|
| 212 |
-
|
| 213 |
run_button.click(
|
| 214 |
fn=run_and_submit_all,
|
| 215 |
outputs=[status_output, results_table]
|
|
@@ -217,4 +208,4 @@ with gr.Blocks() as demo:
|
|
| 217 |
|
| 218 |
if __name__ == "__main__":
|
| 219 |
print("Launching Gradio Interface for GAIA Agent Evaluation...")
|
| 220 |
-
demo.launch(debug=True, share=False)
|
|
|
|
| 37 |
except Exception as e:
|
| 38 |
return f"Error reading or processing file '{file_path}': {e}"
|
| 39 |
|
| 40 |
+
# --- Agent Class (Now using a free Open-Source LLM) ---
|
| 41 |
class GaiaSmolAgent:
|
| 42 |
def __init__(self):
|
| 43 |
+
# --- MODIFICATION: Switched to Groq for free, fast inference ---
|
| 44 |
+
print("Initializing GaiaSmolAgent with a free Open-Source LLM via Groq...")
|
| 45 |
+
api_key = os.getenv("GROQ_API_KEY")
|
| 46 |
if not api_key:
|
| 47 |
+
raise ValueError("API key 'GROQ_API_KEY' not found in environment secrets.")
|
| 48 |
|
| 49 |
self.planner_model = LiteLLMModel(
|
| 50 |
+
# Using Llama 3 8B via Groq's free API
|
| 51 |
+
model_id="groq/llama3-8b-8192",
|
| 52 |
api_key=api_key,
|
| 53 |
temperature=0.0,
|
| 54 |
)
|
| 55 |
|
| 56 |
# Initialize the agent with the tools it can use.
|
|
|
|
| 57 |
self.executor_agent = CodeAgent(
|
| 58 |
model=self.planner_model,
|
| 59 |
tools=[file_reader, DuckDuckGoSearchTool()],
|
|
|
|
| 65 |
"""Generates a self-contained Python script to answer the question."""
|
| 66 |
print(f"Generating script for question: {question[:100]}...")
|
| 67 |
|
|
|
|
| 68 |
prompt = f"""
|
| 69 |
You are an expert Python programmer. Your task is to write a single, self-contained Python script to answer the user's question.
|
| 70 |
|
|
|
|
| 89 |
|
| 90 |
Now, write the Python script to answer the user's question.
|
| 91 |
"""
|
|
|
|
| 92 |
messages = [{"role": "user", "content": prompt}]
|
| 93 |
response = self.planner_model.generate(messages)
|
| 94 |
|
|
|
|
| 95 |
if "```python" in response:
|
| 96 |
response = response.split("```python")[1].split("```")[0].strip()
|
| 97 |
|
|
|
|
| 103 |
print(f"Agent received question: {question[:100]}...")
|
| 104 |
|
| 105 |
try:
|
|
|
|
| 106 |
script_to_execute = self._generate_script(question)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
final_answer = self.executor_agent.run(script_to_execute)
|
| 108 |
|
| 109 |
except Exception as e:
|
|
|
|
| 160 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
| 161 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
| 162 |
except Exception as e:
|
|
|
|
| 163 |
error_message = f"AGENT ERROR: {e}"
|
| 164 |
print(f"Error running agent on task {task_id}: {e}")
|
| 165 |
print(traceback.format_exc())
|
|
|
|
| 185 |
except Exception as e:
|
| 186 |
return f"Submission Failed: {e}", pd.DataFrame(results_log)
|
| 187 |
|
| 188 |
+
# --- Gradio Interface (Updated Instructions) ---
|
| 189 |
with gr.Blocks() as demo:
|
| 190 |
gr.Markdown("# GAIA Agent Evaluation Runner (smol-agent)")
|
| 191 |
gr.Markdown(
|
| 192 |
"""
|
| 193 |
**Instructions:**
|
| 194 |
+
1. Ensure you have added your **Groq API key** (as `GROQ_API_KEY`) in the Space's secrets.
|
| 195 |
2. Log in to your Hugging Face account using the button below.
|
| 196 |
3. Click 'Run Evaluation & Submit All Answers' to run your agent and see the score.
|
| 197 |
"""
|
|
|
|
| 201 |
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
|
| 202 |
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
| 203 |
|
|
|
|
| 204 |
run_button.click(
|
| 205 |
fn=run_and_submit_all,
|
| 206 |
outputs=[status_output, results_table]
|
|
|
|
| 208 |
|
| 209 |
if __name__ == "__main__":
|
| 210 |
print("Launching Gradio Interface for GAIA Agent Evaluation...")
|
| 211 |
+
demo.launch(debug=True, share=False)
|