MickyWin22 commited on
Commit
c2f952d
·
verified ·
1 Parent(s): 0f2b299

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -127
app.py CHANGED
@@ -2,20 +2,18 @@ import os
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
 
5
 
6
  # Import smol-agent and tool components
7
  from smolagents import CodeAgent, LiteLLMModel, tool
8
- from smolagents import DuckDuckGoSearchTool
 
9
  from unstructured.partition.auto import partition
10
 
11
-
12
  # --- Constants ---
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
 
15
- # --- Agent Definition ---
16
-
17
- # 1. Define Your Tools
18
-
19
  @tool
20
  def file_reader(file_path: str) -> str:
21
  """Reads the content of a file and returns its text content.
@@ -30,179 +28,155 @@ def file_reader(file_path: str) -> str:
30
  if file_path.startswith("http://") or file_path.startswith("https://"):
31
  response = requests.get(file_path, timeout=20)
32
  response.raise_for_status()
33
- # Use a temporary file to process with unstructured
34
  with open("temp_file", "wb") as f:
35
  f.write(response.content)
36
  elements = partition("temp_file")
37
  os.remove("temp_file") # Clean up
38
  else:
39
- # Assumes it's a local path within the Space
40
  elements = partition(file_path)
41
-
42
  return "\n\n".join([str(el) for el in elements])
43
  except Exception as e:
44
  return f"Error reading or processing file '{file_path}': {e}"
45
 
46
- # 2. Define Your Agent Class
47
  class GaiaSmolAgent:
48
  def __init__(self):
49
  print("Initializing GaiaSmolAgent with OpenAI...")
50
- # Ensure you have set your OPENAI_API_KEY as a secret in your HF Space
51
  api_key = os.getenv("OPENAI_API_KEY")
52
  if not api_key:
53
  raise ValueError("API key 'OPENAI_API_KEY' not found in environment secrets.")
54
 
55
- # The "Planner" model - for high-level reasoning
56
  self.planner_model = LiteLLMModel(
57
- model_id="gpt-4o", # Using OpenAI's gpt-4o model
58
  api_key=api_key,
59
  temperature=0.0,
60
  )
61
 
62
- from smolagents import DuckDuckGoSearchTool
63
-
64
- # The "Executor" agent - for executing tasks with tools
65
  self.executor_agent = CodeAgent(
66
- model=self.planner_model, # Can use the same model
67
- # --- MODIFICATION 2: Use the DEFINITIVELY correct tool name in the list ---
68
- tools=[file_reader, DuckDuckGoSearchTool()], # Corrected class name
69
- add_base_tools=True,
70
  )
71
- print("GaiaSmolAgent initialized successfully with OpenAI.")
72
 
73
- def _generate_plan(self, question: str) -> list[str]:
74
- """Generates a step-by-step plan to answer the question."""
75
- print(f"Generating plan for question: {question[:100]}...")
76
 
 
77
  prompt = f"""
78
- You are a master planner that creates Python code plans for an agent.
79
- You have access to the following tools:
80
- - `DuckDuckGoSearch(query: str) -> str`: Searches the web and returns a string with the results.
81
- - `file_reader(file_path: str) -> str`: Reads a file from a URL or local path and returns its contents as a string.
82
- - A full Python interpreter to process strings, perform calculations, etc.
83
 
84
- Your task is to create a plan to answer the user's question. The plan must be a Python list of strings, where each string is a single line of Python code.
 
 
85
 
86
- **Crucial Instructions:**
87
- 1. The output of `DuckDuckGoSearch` and `file_reader` is always a STRING. You MUST use Python code (e.g., string manipulation, regex) in a subsequent step to extract information from this string. **DO NOT treat the tool output like a dictionary or JSON.**
88
- 2. Store the output of tools in variables (e.g., `search_results = DuckDuckGoSearch(...)`).
89
- 3. The final step of the plan MUST be a call to `final_answer("your final answer here")`. The answer must be a single, concise string.
 
90
 
91
  Question: "{question}"
92
 
93
- Example of a good plan for the question "What is the main topic of the document at http://example.com/paper.pdf?":
94
- Plan:
95
- [
96
- "file_content = file_reader('http://example.com/paper.pdf')",
97
- "summary = 'The main topic seems to be about: ' + file_content[:200]",
98
- "final_answer(summary)"
99
- ]
 
100
  """
101
  response = self.planner_model.generate(prompt)
102
- print(f"Generated plan: {response}")
103
- try:
104
- # Safely evaluate the string to a Python list
105
- plan = eval(response)
106
- if isinstance(plan, list):
107
- return plan
108
- else:
109
- # If the LLM doesn't return a list, create a fallback plan
110
- return [f"final_answer('Error: Plan generation failed. The model did not return a valid list.')"]
111
- except Exception as e:
112
- print(f"Error parsing plan with eval(): {e}")
113
- # If eval fails, create a fallback plan
114
- return [f"final_answer('Error: Plan generation failed. The model returned malformed code: {response}')"]
115
-
116
 
117
  def __call__(self, question: str) -> str:
118
- """Runs the planner and executor to answer the question."""
119
- # ... (This method remains unchanged)
120
  print(f"Agent received question: {question[:100]}...")
121
- plan = self._generate_plan(question)
122
- final_answer = self.executor_agent.run(plan)
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  print(f"Agent returning final answer: {final_answer}")
124
  return str(final_answer)
125
 
126
-
127
  def run_and_submit_all(profile: gr.OAuthProfile | None):
128
- """
129
- Fetches all questions, runs the GaiaSmolAgent on them, submits all answers,
130
- and displays the results.
131
- """
132
- # --- Determine HF Space Runtime URL and Repo URL ---
133
  space_id = os.getenv("SPACE_ID")
134
-
135
- if profile:
136
- username = f"{profile.username}"
137
- print(f"User logged in: {username}")
138
- else:
139
- print("User not logged in.")
140
  return "Please Login to Hugging Face with the button.", None
 
 
 
141
 
142
  api_url = DEFAULT_API_URL
143
  questions_url = f"{api_url}/questions"
144
  submit_url = f"{api_url}/submit"
145
 
146
- # 1. Instantiate Agent
147
  try:
148
- # **MODIFIED PART: Instantiate your new agent**
149
  agent = GaiaSmolAgent()
150
  except Exception as e:
151
- print(f"Error instantiating agent: {e}")
152
  return f"Error initializing agent: {e}", None
153
 
154
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
155
- print(agent_code)
156
 
157
- # 2. Fetch Questions
158
- print(f"Fetching questions from: {questions_url}")
159
  try:
160
  response = requests.get(questions_url, timeout=15)
161
  response.raise_for_status()
162
  questions_data = response.json()
163
  if not questions_data:
164
- print("Fetched questions list is empty.")
165
  return "Fetched questions list is empty or invalid format.", None
166
- print(f"Fetched {len(questions_data)} questions.")
167
  except Exception as e:
168
- print(f"Error fetching questions: {e}")
169
  return f"Error fetching questions: {e}", None
170
 
171
-
172
- # 3. Run your Agent
173
  results_log = []
174
  answers_payload = []
175
- print(f"Running agent on {len(questions_data)} questions...")
176
  for item in questions_data:
177
  task_id = item.get("task_id")
178
- # GAIA questions can include file paths
179
  question_text = item.get("question")
180
- file_path = item.get("file") # Get the file URL if it exists
181
  if file_path:
182
  question_text += f"\n\nRelevant file is available at: {file_path}"
183
-
184
  if not task_id or question_text is None:
185
- print(f"Skipping item with missing task_id or question: {item}")
186
  continue
 
187
  try:
188
  submitted_answer = agent(question_text)
189
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
190
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
191
  except Exception as e:
 
 
192
  print(f"Error running agent on task {task_id}: {e}")
193
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
194
 
195
  if not answers_payload:
196
- print("Agent did not produce any answers to submit.")
197
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
198
 
199
- # 4. Prepare Submission
200
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
201
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
202
- print(status_update)
203
-
204
- # 5. Submit
205
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
206
  try:
207
  response = requests.post(submit_url, json=submission_data, timeout=60)
208
  response.raise_for_status()
@@ -214,56 +188,38 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
214
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
215
  f"Message: {result_data.get('message', 'No message received.')}"
216
  )
217
- print("Submission successful.")
218
- results_df = pd.DataFrame(results_log)
219
- return final_status, results_df
220
- except requests.exceptions.HTTPError as e:
221
- error_detail = f"Server responded with status {e.response.status_code}."
222
- try:
223
- error_json = e.response.json()
224
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
225
- except requests.exceptions.JSONDecodeError:
226
- error_detail += f" Response: {e.response.text[:500]}"
227
- status_message = f"Submission Failed: {error_detail}"
228
- print(status_message)
229
- results_df = pd.DataFrame(results_log)
230
- return status_message, results_df
231
  except Exception as e:
232
- status_message = f"An unexpected error occurred during submission: {e}"
233
- print(status_message)
234
- results_df = pd.DataFrame(results_log)
235
- return status_message, results_df
236
 
237
- # --- Gradio Interface ---
238
- # (This part remains unchanged)
239
  with gr.Blocks() as demo:
240
  gr.Markdown("# GAIA Agent Evaluation Runner (smol-agent)")
241
  gr.Markdown(
242
  """
243
  **Instructions:**
244
-
245
  1. Ensure you have added your **OpenAI API key** (as `OPENAI_API_KEY`) in the Space's secrets.
246
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
247
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
248
  """
249
  )
250
-
251
  gr.LoginButton()
252
-
253
  run_button = gr.Button("Run Evaluation & Submit All Answers")
254
-
255
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
256
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
257
-
258
- # This is the new way to bind the OAuth profile to the function
259
- run_button.click(
260
  fn=run_and_submit_all,
261
- inputs=None, # No direct input components
262
  outputs=[status_output, results_table],
263
- api_name="run_evaluation" # Add an API name for programmatic access
 
 
 
 
 
264
  )
265
 
266
- # The __main__ block remains the same
267
  if __name__ == "__main__":
268
  print("Launching Gradio Interface for GAIA Agent Evaluation...")
269
  demo.launch(debug=True, share=False)
 
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
+ import traceback
6
 
7
  # Import smol-agent and tool components
8
  from smolagents import CodeAgent, LiteLLMModel, tool
9
+ # Corrected import for the search tool
10
+ from smolagents.tools import DuckDuckGoSearch
11
  from unstructured.partition.auto import partition
12
 
 
13
  # --- Constants ---
14
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
15
 
16
+ # --- Tool Definition ---
 
 
 
17
  @tool
18
  def file_reader(file_path: str) -> str:
19
  """Reads the content of a file and returns its text content.
 
28
  if file_path.startswith("http://") or file_path.startswith("https://"):
29
  response = requests.get(file_path, timeout=20)
30
  response.raise_for_status()
 
31
  with open("temp_file", "wb") as f:
32
  f.write(response.content)
33
  elements = partition("temp_file")
34
  os.remove("temp_file") # Clean up
35
  else:
 
36
  elements = partition(file_path)
 
37
  return "\n\n".join([str(el) for el in elements])
38
  except Exception as e:
39
  return f"Error reading or processing file '{file_path}': {e}"
40
 
41
+ # --- Agent Class (Completely Rewritten) ---
42
  class GaiaSmolAgent:
43
  def __init__(self):
44
  print("Initializing GaiaSmolAgent with OpenAI...")
 
45
  api_key = os.getenv("OPENAI_API_KEY")
46
  if not api_key:
47
  raise ValueError("API key 'OPENAI_API_KEY' not found in environment secrets.")
48
 
 
49
  self.planner_model = LiteLLMModel(
50
+ model_id="gpt-4o",
51
  api_key=api_key,
52
  temperature=0.0,
53
  )
54
 
55
+ # Initialize the agent with the tools it can use.
56
+ # The agent will make these available to the script it runs.
 
57
  self.executor_agent = CodeAgent(
58
+ model=self.planner_model,
59
+ tools=[file_reader, DuckDuckGoSearch()],
60
+ add_base_tools=True, # Provides a python interpreter
 
61
  )
62
+ print("GaiaSmolAgent initialized successfully.")
63
 
64
+ def _generate_script(self, question: str) -> str:
65
+ """Generates a self-contained Python script to answer the question."""
66
+ print(f"Generating script for question: {question[:100]}...")
67
 
68
+ # This new prompt asks for a single, complete script.
69
  prompt = f"""
70
+ You are an expert Python programmer. Your task is to write a single, self-contained Python script to answer the user's question.
 
 
 
 
71
 
72
+ You have access to the following functions which are pre-imported and ready to use:
73
+ - `duck_duck_go_search(query: str) -> str`: Searches the web and returns a string with the results.
74
+ - `file_reader(file_path: str) -> str`: Reads a file and returns its contents as a string.
75
 
76
+ CRITICAL INSTRUCTIONS:
77
+ 1. Your output must be ONLY the Python code for the script. Do not add any explanation or markdown formatting like ```python.
78
+ 2. The script MUST end with a call to a function `final_answer(answer: str)`.
79
+ 3. The `answer` passed to `final_answer` must be a single, concise string.
80
+ 4. All logic, including processing the string outputs from the tools, must be included in this single script. State is preserved within the script.
81
 
82
  Question: "{question}"
83
 
84
+ Example for "What is the capital of France?":
85
+ search_result = duck_duck_go_search("capital of France")
86
+ # In a real scenario, you would parse this string to find the answer.
87
+ # For this example, we'll just summarize the string.
88
+ answer = "Based on the search, the capital is likely Paris." # Replace with actual logic
89
+ final_answer(answer)
90
+
91
+ Now, write the Python script to answer the user's question.
92
  """
93
  response = self.planner_model.generate(prompt)
94
+
95
+ # Clean up the response from the LLM, which sometimes wraps it in markdown
96
+ if "```python" in response:
97
+ response = response.split("```python")[1].split("```")[0].strip()
98
+
99
+ print(f"--- Generated Script ---\n{response}\n------------------------")
100
+ return response
 
 
 
 
 
 
 
101
 
102
  def __call__(self, question: str) -> str:
103
+ """Generates and executes a single script to answer the question."""
 
104
  print(f"Agent received question: {question[:100]}...")
105
+
106
+ try:
107
+ # Step 1: Generate a single, complete script
108
+ script_to_execute = self._generate_script(question)
109
+
110
+ # Step 2: Execute the entire script in one go.
111
+ # The agent will match the function calls in the script (e.g., duck_duck_go_search)
112
+ # to the tools it was initialized with.
113
+ final_answer = self.executor_agent.run(script_to_execute)
114
+
115
+ except Exception as e:
116
+ print(f"FATAL AGENT ERROR: An exception occurred during agent execution: {e}")
117
+ print(traceback.format_exc()) # Print the full traceback for debugging
118
+ return f"FATAL AGENT ERROR: {e}"
119
+
120
  print(f"Agent returning final answer: {final_answer}")
121
  return str(final_answer)
122
 
123
+ # --- Main Application Logic (Unchanged) ---
124
  def run_and_submit_all(profile: gr.OAuthProfile | None):
 
 
 
 
 
125
  space_id = os.getenv("SPACE_ID")
126
+ if not profile:
 
 
 
 
 
127
  return "Please Login to Hugging Face with the button.", None
128
+
129
+ username = profile.username
130
+ print(f"User logged in: {username}")
131
 
132
  api_url = DEFAULT_API_URL
133
  questions_url = f"{api_url}/questions"
134
  submit_url = f"{api_url}/submit"
135
 
 
136
  try:
 
137
  agent = GaiaSmolAgent()
138
  except Exception as e:
 
139
  return f"Error initializing agent: {e}", None
140
 
141
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
 
142
 
 
 
143
  try:
144
  response = requests.get(questions_url, timeout=15)
145
  response.raise_for_status()
146
  questions_data = response.json()
147
  if not questions_data:
 
148
  return "Fetched questions list is empty or invalid format.", None
 
149
  except Exception as e:
 
150
  return f"Error fetching questions: {e}", None
151
 
 
 
152
  results_log = []
153
  answers_payload = []
 
154
  for item in questions_data:
155
  task_id = item.get("task_id")
 
156
  question_text = item.get("question")
157
+ file_path = item.get("file")
158
  if file_path:
159
  question_text += f"\n\nRelevant file is available at: {file_path}"
160
+
161
  if not task_id or question_text is None:
 
162
  continue
163
+
164
  try:
165
  submitted_answer = agent(question_text)
166
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
167
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
168
  except Exception as e:
169
+ # This catches errors in the __call__ method itself
170
+ error_message = f"AGENT ERROR: {e}"
171
  print(f"Error running agent on task {task_id}: {e}")
172
+ print(traceback.format_exc())
173
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": error_message})
174
 
175
  if not answers_payload:
 
176
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
177
 
 
178
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
179
+
 
 
 
 
180
  try:
181
  response = requests.post(submit_url, json=submission_data, timeout=60)
182
  response.raise_for_status()
 
188
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
189
  f"Message: {result_data.get('message', 'No message received.')}"
190
  )
191
+ return final_status, pd.DataFrame(results_log)
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  except Exception as e:
193
+ return f"Submission Failed: {e}", pd.DataFrame(results_log)
 
 
 
194
 
195
+ # --- Gradio Interface (Unchanged) ---
 
196
  with gr.Blocks() as demo:
197
  gr.Markdown("# GAIA Agent Evaluation Runner (smol-agent)")
198
  gr.Markdown(
199
  """
200
  **Instructions:**
 
201
  1. Ensure you have added your **OpenAI API key** (as `OPENAI_API_KEY`) in the Space's secrets.
202
+ 2. Log in to your Hugging Face account using the button below.
203
+ 3. Click 'Run Evaluation & Submit All Answers' to run your agent and see the score.
204
  """
205
  )
 
206
  gr.LoginButton()
 
207
  run_button = gr.Button("Run Evaluation & Submit All Answers")
 
208
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
209
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
210
+
211
+ demo.load(
 
212
  fn=run_and_submit_all,
213
+ inputs=None,
214
  outputs=[status_output, results_table],
215
+ every=None, # Remove automatic running on load
216
+ )
217
+
218
+ run_button.click(
219
+ fn=run_and_submit_all,
220
+ outputs=[status_output, results_table]
221
  )
222
 
 
223
  if __name__ == "__main__":
224
  print("Launching Gradio Interface for GAIA Agent Evaluation...")
225
  demo.launch(debug=True, share=False)