gabriel-melki commited on
Commit
2a41ea2
·
1 Parent(s): 860424e

Modify package structure

Browse files
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
  title: Gaia_benchmark_agent
3
- emoji: 🕵🏻‍♂️
4
- colorFrom: indigo
5
- colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 5.25.2
8
  app_file: src/app.py
 
1
  ---
2
  title: Gaia_benchmark_agent
3
+ emoji: 🤖
4
+ colorFrom: red
5
+ colorTo: red
6
  sdk: gradio
7
  sdk_version: 5.25.2
8
  app_file: src/app.py
src/{agent.py → agent/QuestionAnsweringAgent.py} RENAMED
@@ -1,7 +1,7 @@
1
  import os
2
  import glob
3
  from smolagents import CodeAgent
4
- from prompt import get_prompt
5
 
6
  class QuestionAnsweringAgent(CodeAgent):
7
  def __init__(self, *args, **kwargs):
 
1
  import os
2
  import glob
3
  from smolagents import CodeAgent
4
+ from prompt.prompt import get_prompt
5
 
6
  class QuestionAnsweringAgent(CodeAgent):
7
  def __init__(self, *args, **kwargs):
src/app.py CHANGED
@@ -8,16 +8,16 @@ from tools.file_tools import read_file_as_text
8
  from tools.youtube_tools import download_youtube_url_images, download_youtube_url_audio
9
  from tools.image_processing_tools import ask_question_about_image
10
 
11
- from agent import QuestionAnsweringAgent
12
 
13
- from submission import build_gradio_interface
14
 
15
  model = InferenceClientModel(
16
  provider="auto",
17
  model_id="Qwen/Qwen3-Coder-30B-A3B-Instruct",
18
  temperature=0,
19
  top_p=1.0,
20
- seed=42
21
  )
22
 
23
  agent_tools = [
@@ -28,16 +28,26 @@ agent_tools = [
28
  ask_question_about_image
29
  ]
30
 
 
 
 
 
 
 
 
 
 
 
31
  agent = QuestionAnsweringAgent(
32
  name="question_answering_expert",
33
  model=model,
34
- tools=agent_tools,
35
  add_base_tools=True,
 
 
36
  planning_interval=None,
37
- additional_authorized_imports=["os", "bs4", "PIL", "transformers", "torch", "requests", "glob"],
38
  max_steps=10,
39
  verbosity_level=2, # For better debugging
40
  )
41
 
42
  if __name__ == "__main__":
43
- build_gradio_interface(agent)
 
8
  from tools.youtube_tools import download_youtube_url_images, download_youtube_url_audio
9
  from tools.image_processing_tools import ask_question_about_image
10
 
11
+ from agent.QuestionAnsweringAgent import QuestionAnsweringAgent
12
 
13
+ from ui.builder import GradioUI
14
 
15
  model = InferenceClientModel(
16
  provider="auto",
17
  model_id="Qwen/Qwen3-Coder-30B-A3B-Instruct",
18
  temperature=0,
19
  top_p=1.0,
20
+ seed=42
21
  )
22
 
23
  agent_tools = [
 
28
  ask_question_about_image
29
  ]
30
 
31
+ additional_authorized_imports=[
32
+ "os",
33
+ "bs4",
34
+ "PIL",
35
+ "transformers",
36
+ "torch",
37
+ "requests",
38
+ "glob"
39
+ ]
40
+
41
  agent = QuestionAnsweringAgent(
42
  name="question_answering_expert",
43
  model=model,
 
44
  add_base_tools=True,
45
+ tools=agent_tools,
46
+ additional_authorized_imports=additional_authorized_imports,
47
  planning_interval=None,
 
48
  max_steps=10,
49
  verbosity_level=2, # For better debugging
50
  )
51
 
52
  if __name__ == "__main__":
53
+ GradioUI(agent).launch()
src/{submission.py → eval/submission.py} RENAMED
@@ -7,6 +7,7 @@ import numpy as np
7
 
8
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
9
  SELECTED_QUESTIONS = [3]
 
10
  def run_and_submit_all(agent, profile: gr.OAuthProfile | None):
11
  """
12
  Fetches all questions, runs the BasicAgent on them, submits all answers,
@@ -141,7 +142,7 @@ def run_and_submit_all(agent, profile: gr.OAuthProfile | None):
141
  return status_message, results_df
142
 
143
 
144
- def build_gradio_interface(agent):
145
  # --- Build Gradio Interface using Blocks ---
146
  with gr.Blocks() as demo:
147
  gr.Markdown("# Basic Agent Evaluation Runner")
 
7
 
8
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
9
  SELECTED_QUESTIONS = [3]
10
+
11
  def run_and_submit_all(agent, profile: gr.OAuthProfile | None):
12
  """
13
  Fetches all questions, runs the BasicAgent on them, submits all answers,
 
142
  return status_message, results_df
143
 
144
 
145
+
146
  # --- Build Gradio Interface using Blocks ---
147
  with gr.Blocks() as demo:
148
  gr.Markdown("# Basic Agent Evaluation Runner")
src/{prompt.py → prompt/prompt.py} RENAMED
File without changes
src/ui/builder.py ADDED
@@ -0,0 +1,312 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import mimetypes
2
+ import os
3
+ import re
4
+ import shutil
5
+ from typing import Optional
6
+ import gradio as gr
7
+
8
+ from smolagents.agent_types import AgentAudio, AgentImage, AgentText, handle_agent_output_types
9
+ from smolagents.agents import ActionStep, MultiStepAgent
10
+ from smolagents.memory import MemoryStep
11
+ from smolagents.utils import _is_package_available
12
+
13
+ from eval.submission import run_and_submit_all
14
+
15
+ def pull_messages_from_step(
16
+ step_log: MemoryStep,
17
+ ):
18
+ """Extract ChatMessage objects from agent steps with proper nesting"""
19
+ import gradio as gr
20
+
21
+ if isinstance(step_log, ActionStep):
22
+ # Output the step number
23
+ step_number = f"Step {step_log.step_number}" if step_log.step_number is not None else ""
24
+ yield gr.ChatMessage(role="assistant", content=f"**{step_number}**")
25
+
26
+ # First yield the thought/reasoning from the LLM
27
+ if hasattr(step_log, "model_output") and step_log.model_output is not None:
28
+ # Clean up the LLM output
29
+ model_output = step_log.model_output.strip()
30
+ # Remove any trailing <end_code> and extra backticks, handling multiple possible formats
31
+ model_output = re.sub(r"```\s*<end_code>", "```", model_output) # handles ```<end_code>
32
+ model_output = re.sub(r"<end_code>\s*```", "```", model_output) # handles <end_code>```
33
+ model_output = re.sub(r"```\s*\n\s*<end_code>", "```", model_output) # handles ```\n<end_code>
34
+ model_output = model_output.strip()
35
+ yield gr.ChatMessage(role="assistant", content=model_output)
36
+
37
+ # For tool calls, create a parent message
38
+ if hasattr(step_log, "tool_calls") and step_log.tool_calls is not None:
39
+ first_tool_call = step_log.tool_calls[0]
40
+ used_code = first_tool_call.name == "python_interpreter"
41
+ parent_id = f"call_{len(step_log.tool_calls)}"
42
+
43
+ # Tool call becomes the parent message with timing info
44
+ # First we will handle arguments based on type
45
+ args = first_tool_call.arguments
46
+ if isinstance(args, dict):
47
+ content = str(args.get("answer", str(args)))
48
+ else:
49
+ content = str(args).strip()
50
+
51
+ if used_code:
52
+ # Clean up the content by removing any end code tags
53
+ content = re.sub(r"```.*?\n", "", content) # Remove existing code blocks
54
+ content = re.sub(r"\s*<end_code>\s*", "", content) # Remove end_code tags
55
+ content = content.strip()
56
+ if not content.startswith("```python"):
57
+ content = f"```python\n{content}\n```"
58
+
59
+ parent_message_tool = gr.ChatMessage(
60
+ role="assistant",
61
+ content=content,
62
+ metadata={
63
+ "title": f"🛠️ Used tool {first_tool_call.name}",
64
+ "id": parent_id,
65
+ "status": "pending",
66
+ },
67
+ )
68
+ yield parent_message_tool
69
+
70
+ # Nesting execution logs under the tool call if they exist
71
+ if hasattr(step_log, "observations") and (
72
+ step_log.observations is not None and step_log.observations.strip()
73
+ ): # Only yield execution logs if there's actual content
74
+ log_content = step_log.observations.strip()
75
+ if log_content:
76
+ log_content = re.sub(r"^Execution logs:\s*", "", log_content)
77
+ yield gr.ChatMessage(
78
+ role="assistant",
79
+ content=f"{log_content}",
80
+ metadata={"title": "📝 Execution Logs", "parent_id": parent_id, "status": "done"},
81
+ )
82
+
83
+ # Nesting any errors under the tool call
84
+ if hasattr(step_log, "error") and step_log.error is not None:
85
+ yield gr.ChatMessage(
86
+ role="assistant",
87
+ content=str(step_log.error),
88
+ metadata={"title": "💥 Error", "parent_id": parent_id, "status": "done"},
89
+ )
90
+
91
+ # Update parent message metadata to done status without yielding a new message
92
+ parent_message_tool.metadata["status"] = "done"
93
+
94
+ # Handle standalone errors but not from tool calls
95
+ elif hasattr(step_log, "error") and step_log.error is not None:
96
+ yield gr.ChatMessage(role="assistant", content=str(step_log.error), metadata={"title": "💥 Error"})
97
+
98
+ # Calculate duration and token information
99
+ step_footnote = f"{step_number}"
100
+ if hasattr(step_log, "input_token_count") and hasattr(step_log, "output_token_count"):
101
+ token_str = (
102
+ f" | Input-tokens:{step_log.input_token_count:,} | Output-tokens:{step_log.output_token_count:,}"
103
+ )
104
+ step_footnote += token_str
105
+ if hasattr(step_log, "duration"):
106
+ step_duration = f" | Duration: {round(float(step_log.duration), 2)}" if step_log.duration else None
107
+ step_footnote += step_duration
108
+ step_footnote = f"""<span style="color: #bbbbc2; font-size: 12px;">{step_footnote}</span> """
109
+ yield gr.ChatMessage(role="assistant", content=f"{step_footnote}")
110
+ yield gr.ChatMessage(role="assistant", content="-----")
111
+
112
+
113
+ def stream_to_gradio(
114
+ agent,
115
+ task: str,
116
+ reset_agent_memory: bool = False,
117
+ additional_args: Optional[dict] = None
118
+ ):
119
+ """Runs an agent with the given task and streams the messages from the agent as gradio ChatMessages."""
120
+ if not _is_package_available("gradio"):
121
+ raise ModuleNotFoundError(
122
+ "Please install 'gradio' extra to use the GradioUI: `pip install 'smolagents[gradio]'`"
123
+ )
124
+ import gradio as gr
125
+
126
+ total_input_tokens = 0
127
+ total_output_tokens = 0
128
+
129
+ for step_log in agent.run(task, stream=True, reset=reset_agent_memory, additional_args=additional_args):
130
+ # Track tokens if model provides them
131
+ if hasattr(agent.model, "last_input_token_count"):
132
+ total_input_tokens += agent.model.last_input_token_count
133
+ total_output_tokens += agent.model.last_output_token_count
134
+ if isinstance(step_log, ActionStep):
135
+ step_log.input_token_count = agent.model.last_input_token_count
136
+ step_log.output_token_count = agent.model.last_output_token_count
137
+
138
+ for message in pull_messages_from_step(
139
+ step_log,
140
+ ):
141
+ yield message
142
+
143
+ final_answer = step_log # Last log is the run's final_answer
144
+ final_answer = handle_agent_output_types(final_answer)
145
+
146
+ if isinstance(final_answer, AgentText):
147
+ yield gr.ChatMessage(
148
+ role="assistant",
149
+ content=f"**Final answer:**\n{final_answer.to_string()}\n",
150
+ )
151
+ elif isinstance(final_answer, AgentImage):
152
+ yield gr.ChatMessage(
153
+ role="assistant",
154
+ content={"path": final_answer.to_string(), "mime_type": "image/png"},
155
+ )
156
+ elif isinstance(final_answer, AgentAudio):
157
+ yield gr.ChatMessage(
158
+ role="assistant",
159
+ content={"path": final_answer.to_string(), "mime_type": "audio/wav"},
160
+ )
161
+ else:
162
+ yield gr.ChatMessage(role="assistant", content=f"**Final answer:** {str(final_answer)}")
163
+
164
+
165
+ class GradioUI:
166
+ """A one-line interface to launch your agent in Gradio"""
167
+ def __init__(self, agent: MultiStepAgent, file_upload_folder: str | None = None):
168
+ if not _is_package_available("gradio"):
169
+ raise ModuleNotFoundError(
170
+ "Please install 'gradio' extra to use the GradioUI: `pip install 'smolagents[gradio]'`"
171
+ )
172
+ self.agent = agent
173
+ self.file_upload_folder = file_upload_folder
174
+ if self.file_upload_folder is not None:
175
+ if not os.path.exists(file_upload_folder):
176
+ os.mkdir(file_upload_folder)
177
+
178
+ def interact_with_agent(self, prompt, messages):
179
+ import gradio as gr
180
+
181
+ messages.append(gr.ChatMessage(role="user", content=prompt))
182
+ yield messages
183
+ for msg in stream_to_gradio(self.agent, task=prompt, reset_agent_memory=False):
184
+ messages.append(msg)
185
+ yield messages
186
+ yield messages
187
+
188
+ def upload_file(
189
+ self,
190
+ file,
191
+ file_uploads_log,
192
+ allowed_file_types=[
193
+ "application/pdf",
194
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
195
+ "text/plain",
196
+ ],
197
+ ):
198
+ """
199
+ Handle file uploads, default allowed types are .pdf, .docx, and .txt
200
+ """
201
+ import gradio as gr
202
+
203
+ if file is None:
204
+ return gr.Textbox("No file uploaded", visible=True), file_uploads_log
205
+
206
+ try:
207
+ mime_type, _ = mimetypes.guess_type(file.name)
208
+ except Exception as e:
209
+ return gr.Textbox(f"Error: {e}", visible=True), file_uploads_log
210
+
211
+ if mime_type not in allowed_file_types:
212
+ return gr.Textbox("File type disallowed", visible=True), file_uploads_log
213
+
214
+ # Sanitize file name
215
+ original_name = os.path.basename(file.name)
216
+ sanitized_name = re.sub(
217
+ r"[^\w\-.]", "_", original_name
218
+ ) # Replace any non-alphanumeric, non-dash, or non-dot characters with underscores
219
+
220
+ type_to_ext = {}
221
+ for ext, t in mimetypes.types_map.items():
222
+ if t not in type_to_ext:
223
+ type_to_ext[t] = ext
224
+
225
+ # Ensure the extension correlates to the mime type
226
+ sanitized_name = sanitized_name.split(".")[:-1]
227
+ sanitized_name.append("" + type_to_ext[mime_type])
228
+ sanitized_name = "".join(sanitized_name)
229
+
230
+ # Save the uploaded file to the specified folder
231
+ file_path = os.path.join(self.file_upload_folder, os.path.basename(sanitized_name))
232
+ shutil.copy(file.name, file_path)
233
+
234
+ return gr.Textbox(f"File uploaded: {file_path}", visible=True), file_uploads_log + [file_path]
235
+
236
+ def log_user_message(self, text_input, file_uploads_log):
237
+ return (
238
+ text_input
239
+ + (
240
+ f"\nYou have been provided with these files, which might be helpful or not: {file_uploads_log}"
241
+ if len(file_uploads_log) > 0
242
+ else ""
243
+ ),
244
+ "",
245
+ )
246
+
247
+ def launch(self, **kwargs):
248
+ with gr.Blocks() as demo:
249
+ gr.Markdown("# Question Answering Agent Evaluation Runner")
250
+ gr.Markdown(
251
+ """
252
+ **Welcome to the Question Answering Agent !**
253
+ ## 1. Please start by logging in to your Hugging Face account using the button below. This uses your HF username for submission.
254
+ """
255
+ )
256
+ gr.LoginButton()
257
+ gr.Markdown(
258
+ """
259
+ ---
260
+ ## 2. Interact with the agent below.
261
+ """
262
+ )
263
+ stored_messages = gr.State([])
264
+ file_uploads_log = gr.State([])
265
+ chatbot = gr.Chatbot(
266
+ label="Agent",
267
+ type="messages",
268
+ avatar_images=(
269
+ None,
270
+ "https://huggingface.co/datasets/agents-course/course-images/resolve/main/en/communication/Alfred.png",
271
+ ),
272
+ resizeable=True,
273
+ scale=1,
274
+ )
275
+ # If an upload folder is provided, enable the upload feature
276
+ if self.file_upload_folder is not None:
277
+ upload_file = gr.File(label="Upload a file")
278
+ upload_status = gr.Textbox(label="Upload Status", interactive=False, visible=False)
279
+ upload_file.change(
280
+ self.upload_file,
281
+ [upload_file, file_uploads_log],
282
+ [upload_status, file_uploads_log],
283
+ )
284
+ text_input = gr.Textbox(lines=1, label="Chat Message")
285
+ text_input.submit(
286
+ self.log_user_message,
287
+ [text_input, file_uploads_log],
288
+ [stored_messages, text_input],
289
+ ).then(self.interact_with_agent, [stored_messages, chatbot], [chatbot])
290
+
291
+ gr.Markdown(
292
+ """
293
+ ---
294
+ ## 3.Run Evaluation on GAIA Benchmark & Submit All Answers
295
+ """
296
+ )
297
+ run_button = gr.Button("Run Evaluation on GAIA Benchmark & Submit All Answers")
298
+
299
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
300
+ # Removed max_rows=10 from DataFrame constructor
301
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
302
+
303
+ def run_with_login_state(profile: gr.OAuthProfile):
304
+ return run_and_submit_all(self.agent, profile)
305
+
306
+ run_button.click(
307
+ fn=run_with_login_state,
308
+ outputs=[status_output, results_table]
309
+ )
310
+ demo.launch(debug=True, share=False, **kwargs)
311
+
312
+