Ben Beinke commited on
Commit
40eee5b
·
1 Parent(s): eeb84e8

Updated UI to only show final message

Browse files
Files changed (3) hide show
  1. app.py +25 -3
  2. kiss_agent.py +1 -1
  3. ui_helpers.py +356 -0
app.py CHANGED
@@ -7,7 +7,7 @@ from pathlib import Path
7
  import gradio as gr
8
  import requests
9
  from smolagents.agents import MultiStepAgent
10
- from smolagents.gradio_ui import stream_to_gradio
11
 
12
  # from src.manager_agent import GradioManagerAgent
13
  from src.utils import load_file
@@ -181,10 +181,19 @@ class GradioUI:
181
 
182
  def __init__(self, agent: MultiStepAgent):
183
  self.agent = agent
 
184
 
185
  def interact_with_agent(self, prompt, messages, session_state):
186
  import gradio as gr
187
 
 
 
 
 
 
 
 
 
188
  # Get the agent type from the template agent
189
  if "agent" not in session_state:
190
  session_state["agent"] = self.agent
@@ -196,12 +205,25 @@ class GradioUI:
196
  yield messages
197
 
198
  for msg in stream_to_gradio(
199
- session_state["agent"], task=prompt, reset_agent_memory=False
 
 
 
200
  ):
201
  if isinstance(msg, gr.ChatMessage):
202
- messages[-1].metadata["status"] = "done"
203
  # TODO make it so that only the final answer is shown, rest in drop down
204
  messages.append(msg)
 
 
 
 
 
 
 
 
 
 
205
  elif isinstance(msg, str): # Then it's only a completion delta
206
  msg = msg.replace("<", r"\<").replace(
207
  ">", r"\>"
 
7
  import gradio as gr
8
  import requests
9
  from smolagents.agents import MultiStepAgent
10
+ from ui_helpers import stream_to_gradio
11
 
12
  # from src.manager_agent import GradioManagerAgent
13
  from src.utils import load_file
 
181
 
182
  def __init__(self, agent: MultiStepAgent):
183
  self.agent = agent
184
+ self.parent_id = None
185
 
186
  def interact_with_agent(self, prompt, messages, session_state):
187
  import gradio as gr
188
 
189
+ self.parent_id = int(time.time() * 1000)
190
+ messages.append(
191
+ gr.ChatMessage(
192
+ role="assistant",
193
+ content="",
194
+ metadata={"id": self.parent_id, "title": "...", "status": "pending"},
195
+ )
196
+ )
197
  # Get the agent type from the template agent
198
  if "agent" not in session_state:
199
  session_state["agent"] = self.agent
 
205
  yield messages
206
 
207
  for msg in stream_to_gradio(
208
+ session_state["agent"],
209
+ task=prompt,
210
+ reset_agent_memory=False,
211
+ parent_id=self.parent_id,
212
  ):
213
  if isinstance(msg, gr.ChatMessage):
214
+ # messages[-1].metadata["status"] = "done"
215
  # TODO make it so that only the final answer is shown, rest in drop down
216
  messages.append(msg)
217
+ messages[-1].metadata["status"] = "done"
218
+ if msg.content.startswith("**Final answer:**"):
219
+ # Set the parent message status to done when final answer is reached
220
+ for message in messages:
221
+ if (
222
+ isinstance(message, gr.ChatMessage)
223
+ and message.metadata.get("id") == self.parent_id
224
+ ):
225
+ message.metadata["status"] = "done"
226
+ break
227
  elif isinstance(msg, str): # Then it's only a completion delta
228
  msg = msg.replace("<", r"\<").replace(
229
  ">", r"\>"
kiss_agent.py CHANGED
@@ -271,7 +271,7 @@ def test_app_py() -> str:
271
  try:
272
  # Wait for a short time to see if the process starts successfully
273
  # If it exits immediately with an error, we'll catch it
274
- stdout, stderr = process.communicate(timeout=3)
275
 
276
  # If we get here, the process exited within 10 seconds
277
  # Check for errors in stderr or stdout, not just return code
 
271
  try:
272
  # Wait for a short time to see if the process starts successfully
273
  # If it exits immediately with an error, we'll catch it
274
+ stdout, stderr = process.communicate(timeout=5)
275
 
276
  # If we get here, the process exited within 10 seconds
277
  # Check for errors in stderr or stdout, not just return code
ui_helpers.py ADDED
@@ -0,0 +1,356 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding=utf-8
3
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ import os
17
+ import re
18
+ import shutil
19
+ from pathlib import Path
20
+ from typing import Generator
21
+ import time
22
+ from smolagents.agent_types import AgentAudio, AgentImage, AgentText
23
+ from smolagents.agents import MultiStepAgent, PlanningStep
24
+ from smolagents.memory import ActionStep, FinalAnswerStep
25
+ from smolagents.models import ChatMessageStreamDelta
26
+ from smolagents.utils import _is_package_available
27
+
28
+
29
+ def get_step_footnote_content(
30
+ step_log: ActionStep | PlanningStep, step_name: str
31
+ ) -> str:
32
+ """Get a footnote string for a step log with duration and token information"""
33
+ step_footnote = f"**{step_name}**"
34
+ if step_log.token_usage is not None:
35
+ step_footnote += f" | Input tokens: {step_log.token_usage.input_tokens:,} | Output tokens: {step_log.token_usage.output_tokens:,}"
36
+ step_footnote += (
37
+ f" | Duration: {round(float(step_log.timing.duration), 2)}s"
38
+ if step_log.timing.duration
39
+ else ""
40
+ )
41
+ step_footnote_content = (
42
+ f"""<span style="color: #bbbbc2; font-size: 12px;">{step_footnote}</span> """
43
+ )
44
+ return step_footnote_content
45
+
46
+
47
+ def _clean_model_output(model_output: str) -> str:
48
+ """
49
+ Clean up model output by removing trailing tags and extra backticks.
50
+
51
+ Args:
52
+ model_output (`str`): Raw model output.
53
+
54
+ Returns:
55
+ `str`: Cleaned model output.
56
+ """
57
+ if not model_output:
58
+ return ""
59
+ model_output = model_output.strip()
60
+ # Remove any trailing <end_code> and extra backticks, handling multiple possible formats
61
+ model_output = re.sub(
62
+ r"```\s*<end_code>", "```", model_output
63
+ ) # handles ```<end_code>
64
+ model_output = re.sub(
65
+ r"<end_code>\s*```", "```", model_output
66
+ ) # handles <end_code>```
67
+ model_output = re.sub(
68
+ r"```\s*\n\s*<end_code>", "```", model_output
69
+ ) # handles ```\n<end_code>
70
+ return model_output.strip()
71
+
72
+
73
+ def _format_code_content(content: str) -> str:
74
+ """
75
+ Format code content as Python code block if it's not already formatted.
76
+
77
+ Args:
78
+ content (`str`): Code content to format.
79
+
80
+ Returns:
81
+ `str`: Code content formatted as a Python code block.
82
+ """
83
+ content = content.strip()
84
+ # Remove existing code blocks and end_code tags
85
+ content = re.sub(r"```.*?\n", "", content)
86
+ content = re.sub(r"\s*<end_code>\s*", "", content)
87
+ content = content.strip()
88
+ # Add Python code block formatting if not already present
89
+ if not content.startswith("```python"):
90
+ content = f"```python\n{content}\n```"
91
+ return content
92
+
93
+
94
+ def _process_action_step(
95
+ step_log: ActionStep, skip_model_outputs: bool = False, parent_id: str | None = None
96
+ ) -> Generator:
97
+ """
98
+ Process an [`ActionStep`] and yield appropriate Gradio ChatMessage objects.
99
+
100
+ Args:
101
+ step_log ([`ActionStep`]): ActionStep to process.
102
+ skip_model_outputs (`bool`): Whether to skip model outputs.
103
+
104
+ Yields:
105
+ `gradio.ChatMessage`: Gradio ChatMessages representing the action step.
106
+ """
107
+ import gradio as gr
108
+
109
+ # Output the step number
110
+ step_number = f"Step {step_log.step_number}"
111
+ # if not skip_model_outputs:
112
+ # # yield gr.ChatMessage(
113
+ # # role="assistant", content=f"**{step_number}**", metadata={"status": "done"}
114
+ # # )
115
+
116
+ # First yield the thought/reasoning from the LLM
117
+ if not skip_model_outputs and getattr(step_log, "model_output", ""):
118
+ model_output = _clean_model_output(step_log.model_output)
119
+ yield gr.ChatMessage(
120
+ role="assistant",
121
+ content=model_output,
122
+ metadata={
123
+ "title": "🤔 Thinking",
124
+ "status": "done",
125
+ "id": int(time.time() * 1000),
126
+ "parent_id": parent_id,
127
+ },
128
+ )
129
+
130
+ # For tool calls, create a parent message
131
+ if getattr(step_log, "tool_calls", []):
132
+ first_tool_call = step_log.tool_calls[0]
133
+ used_code = first_tool_call.name == "python_interpreter"
134
+
135
+ # Process arguments based on type
136
+ args = first_tool_call.arguments
137
+ if isinstance(args, dict):
138
+ content = str(args.get("answer", str(args)))
139
+ else:
140
+ content = str(args).strip()
141
+
142
+ # Format code content if needed
143
+ if used_code:
144
+ content = _format_code_content(content)
145
+
146
+ # Create the tool call message
147
+ parent_message_tool = gr.ChatMessage(
148
+ role="assistant",
149
+ content=content,
150
+ metadata={
151
+ "title": f"🛠️ Used tool {first_tool_call.name}",
152
+ "status": "done",
153
+ "parent_id": parent_id,
154
+ "id": int(time.time() * 1000),
155
+ },
156
+ )
157
+ yield parent_message_tool
158
+
159
+ # Display execution logs if they exist
160
+ if getattr(step_log, "observations", "") and step_log.observations.strip():
161
+ log_content = step_log.observations.strip()
162
+ if log_content:
163
+ log_content = re.sub(r"^Execution logs:\s*", "", log_content)
164
+ yield gr.ChatMessage(
165
+ role="assistant",
166
+ content=f"```bash\n{log_content}\n",
167
+ metadata={
168
+ "title": "📝 Execution Logs",
169
+ "status": "done",
170
+ "parent_id": parent_id,
171
+ "id": int(time.time() * 1000),
172
+ },
173
+ )
174
+
175
+ # Display any images in observations
176
+ if getattr(step_log, "observations_images", []):
177
+ for image in step_log.observations_images:
178
+ path_image = AgentImage(image).to_string()
179
+ yield gr.ChatMessage(
180
+ role="assistant",
181
+ content={
182
+ "path": path_image,
183
+ "mime_type": f"image/{path_image.split('.')[-1]}",
184
+ },
185
+ metadata={
186
+ "title": "🖼️ Output Image",
187
+ "status": "done",
188
+ "parent_id": parent_id,
189
+ "id": int(time.time() * 1000),
190
+ },
191
+ )
192
+
193
+ # Handle errors
194
+ if getattr(step_log, "error", None):
195
+ yield gr.ChatMessage(
196
+ role="assistant",
197
+ content=str(step_log.error),
198
+ metadata={
199
+ "title": "💥 Error",
200
+ "status": "done",
201
+ "parent_id": parent_id,
202
+ "id": int(time.time() * 1000),
203
+ },
204
+ )
205
+
206
+ # Add step footnote and separator
207
+ # yield gr.ChatMessage(
208
+ # role="assistant",
209
+ # content=get_step_footnote_content(step_log, step_number),
210
+ # metadata={
211
+ # "status": "done",
212
+ # "parent_id": parent_id,
213
+ # "id": int(time.time() * 1000),
214
+ # },
215
+ # )
216
+ # yield gr.ChatMessage(
217
+ # role="assistant",
218
+ # content="-----",
219
+ # metadata={
220
+ # "status": "done",
221
+ # "parent_id": parent_id,
222
+ # "id": int(time.time() * 1000),
223
+ # },
224
+ # )
225
+
226
+
227
+ def _process_planning_step(
228
+ step_log: PlanningStep, skip_model_outputs: bool = False
229
+ ) -> Generator:
230
+ """
231
+ Process a [`PlanningStep`] and yield appropriate gradio.ChatMessage objects.
232
+
233
+ Args:
234
+ step_log ([`PlanningStep`]): PlanningStep to process.
235
+
236
+ Yields:
237
+ `gradio.ChatMessage`: Gradio ChatMessages representing the planning step.
238
+ """
239
+ import gradio as gr
240
+
241
+ if not skip_model_outputs:
242
+ yield gr.ChatMessage(
243
+ role="assistant", content="**Planning step**", metadata={"status": "done"}
244
+ )
245
+ yield gr.ChatMessage(
246
+ role="assistant", content=step_log.plan, metadata={"status": "done"}
247
+ )
248
+ yield gr.ChatMessage(
249
+ role="assistant",
250
+ content=get_step_footnote_content(step_log, "Planning step"),
251
+ metadata={"status": "done"},
252
+ )
253
+ yield gr.ChatMessage(role="assistant", content="-----", metadata={"status": "done"})
254
+
255
+
256
+ def _process_final_answer_step(step_log: FinalAnswerStep) -> Generator:
257
+ """
258
+ Process a [`FinalAnswerStep`] and yield appropriate gradio.ChatMessage objects.
259
+
260
+ Args:
261
+ step_log ([`FinalAnswerStep`]): FinalAnswerStep to process.
262
+
263
+ Yields:
264
+ `gradio.ChatMessage`: Gradio ChatMessages representing the final answer.
265
+ """
266
+ import gradio as gr
267
+
268
+ final_answer = step_log.output
269
+ if isinstance(final_answer, AgentText):
270
+ yield gr.ChatMessage(
271
+ role="assistant",
272
+ content=f"**Final answer:**\n{final_answer.to_string()}\n",
273
+ metadata={"status": "done"},
274
+ )
275
+ elif isinstance(final_answer, AgentImage):
276
+ yield gr.ChatMessage(
277
+ role="assistant",
278
+ content={"path": final_answer.to_string(), "mime_type": "image/png"},
279
+ metadata={"status": "done"},
280
+ )
281
+ elif isinstance(final_answer, AgentAudio):
282
+ yield gr.ChatMessage(
283
+ role="assistant",
284
+ content={"path": final_answer.to_string(), "mime_type": "audio/wav"},
285
+ metadata={"status": "done"},
286
+ )
287
+ else:
288
+ yield gr.ChatMessage(
289
+ role="assistant",
290
+ content=f"**Final answer:** {str(final_answer)}",
291
+ metadata={"status": "done"},
292
+ )
293
+
294
+
295
+ def pull_messages_from_step(
296
+ step_log: ActionStep | PlanningStep | FinalAnswerStep,
297
+ skip_model_outputs: bool = False,
298
+ parent_id: str | None = None,
299
+ ):
300
+ """Extract Gradio ChatMessage objects from agent steps with proper nesting.
301
+
302
+ Args:
303
+ step_log: The step log to display as gr.ChatMessage objects.
304
+ skip_model_outputs: If True, skip the model outputs when creating the gr.ChatMessage objects:
305
+ This is used for instance when streaming model outputs have already been displayed.
306
+ parent_id: The ID of the parent message. Only used for nested thoughts. Nested thoughts can be nested by setting the parent_id to the id of the parent thought.
307
+ """
308
+ if not _is_package_available("gradio"):
309
+ raise ModuleNotFoundError(
310
+ "Please install 'gradio' extra to use the GradioUI: `pip install 'smolagents[gradio]'`"
311
+ )
312
+ if isinstance(step_log, ActionStep):
313
+ yield from _process_action_step(step_log, skip_model_outputs, parent_id)
314
+ elif isinstance(step_log, PlanningStep):
315
+ yield from _process_planning_step(step_log, skip_model_outputs)
316
+ elif isinstance(step_log, FinalAnswerStep):
317
+ yield from _process_final_answer_step(step_log)
318
+ else:
319
+ raise ValueError(f"Unsupported step type: {type(step_log)}")
320
+
321
+
322
+ def stream_to_gradio(
323
+ agent,
324
+ task: str,
325
+ task_images: list | None = None,
326
+ reset_agent_memory: bool = False,
327
+ additional_args: dict | None = None,
328
+ parent_id: int | None = None,
329
+ ) -> Generator:
330
+ """Runs an agent with the given task and streams the messages from the agent as gradio ChatMessages."""
331
+ if not _is_package_available("gradio"):
332
+ raise ModuleNotFoundError(
333
+ "Please install 'gradio' extra to use the GradioUI: `pip install 'smolagents[gradio]'`"
334
+ )
335
+ intermediate_text = ""
336
+
337
+ for event in agent.run(
338
+ task,
339
+ images=task_images,
340
+ stream=True,
341
+ reset=reset_agent_memory,
342
+ additional_args=additional_args,
343
+ ):
344
+ print(f"parent_id: {parent_id}")
345
+ if isinstance(event, ActionStep | PlanningStep | FinalAnswerStep):
346
+ intermediate_text = ""
347
+ for message in pull_messages_from_step(
348
+ event,
349
+ # If we're streaming model outputs, no need to display them twice
350
+ skip_model_outputs=getattr(agent, "stream_outputs", False),
351
+ parent_id=parent_id,
352
+ ):
353
+ yield message
354
+ elif isinstance(event, ChatMessageStreamDelta):
355
+ intermediate_text += event.content or ""
356
+ yield intermediate_text