Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python | |
| # coding=utf-8 | |
| # Copyright 2024 The HuggingFace Inc. team. All rights reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| import base64 | |
| import mimetypes | |
| import os | |
| import re | |
| import shutil | |
| import uuid | |
| from typing import Optional | |
| import requests | |
| from smolagents.agent_types import AgentAudio, AgentImage, AgentText, handle_agent_output_types | |
| from smolagents.agents import ActionStep, MultiStepAgent | |
| from smolagents.memory import FinalAnswerStep, MemoryStep | |
| from smolagents.utils import _is_package_available | |
| def pull_messages_from_step(step_log: MemoryStep): | |
| """Extract ChatMessage objects from agent steps with proper nesting""" | |
| import gradio as gr | |
| if isinstance(step_log, ActionStep): | |
| step_number = f"Step {step_log.step_number}" if step_log.step_number is not None else "" | |
| yield gr.ChatMessage(role="assistant", content=f"**{step_number}**") | |
| # Show model output | |
| if hasattr(step_log, "model_output") and step_log.model_output is not None: | |
| model_output = step_log.model_output.strip() | |
| model_output = re.sub(r"```\s*<end_code>", "```", model_output) | |
| model_output = re.sub(r"<end_code>\s*```", "```", model_output) | |
| model_output = re.sub(r"```\s*\n\s*<end_code>", "```", model_output) | |
| model_output = model_output.strip() | |
| yield gr.ChatMessage(role="assistant", content=model_output) | |
| # Tool call display | |
| if hasattr(step_log, "tool_calls") and step_log.tool_calls is not None: | |
| first_tool_call = step_log.tool_calls[0] | |
| used_code = first_tool_call.name == "python_interpreter" | |
| parent_id = f"call_{len(step_log.tool_calls)}" | |
| args = first_tool_call.arguments | |
| if isinstance(args, dict): | |
| content = str(args.get("answer", str(args))) | |
| else: | |
| content = str(args).strip() | |
| if used_code: | |
| content = re.sub(r"```.*?\n", "", content) | |
| content = re.sub(r"\s*<end_code>\s*", "", content) | |
| content = content.strip() | |
| if not content.startswith("```python"): | |
| content = f"```python\n{content}\n```" | |
| parent_message_tool = gr.ChatMessage( | |
| role="assistant", | |
| content=content, | |
| metadata={ | |
| "title": f"🛠️ Used tool {first_tool_call.name}", | |
| "id": parent_id, | |
| "status": "pending", | |
| }, | |
| ) | |
| yield parent_message_tool | |
| # Tool observations (logs) | |
| if hasattr(step_log, "observations") and ( | |
| step_log.observations is not None and step_log.observations.strip() | |
| ): | |
| log_content = step_log.observations.strip() | |
| if log_content: | |
| log_content = re.sub(r"^Execution logs:\s*", "", log_content) | |
| yield gr.ChatMessage( | |
| role="assistant", | |
| content=f"{log_content}", | |
| metadata={"title": "📝 Execution Logs", "parent_id": parent_id, "status": "done"}, | |
| ) | |
| # Tool error | |
| if hasattr(step_log, "error") and step_log.error is not None: | |
| yield gr.ChatMessage( | |
| role="assistant", | |
| content=str(step_log.error), | |
| metadata={"title": "💥 Error", "parent_id": parent_id, "status": "done"}, | |
| ) | |
| parent_message_tool.metadata["status"] = "done" | |
| # Standalone error | |
| elif hasattr(step_log, "error") and step_log.error is not None: | |
| yield gr.ChatMessage(role="assistant", content=str(step_log.error), metadata={"title": "💥 Error"}) | |
| # Footnote with tokens and timing | |
| step_footnote = f"{step_number}" | |
| if hasattr(step_log, "input_token_count") and hasattr(step_log, "output_token_count"): | |
| token_str = ( | |
| f" | Input-tokens:{step_log.input_token_count:,} | Output-tokens:{step_log.output_token_count:,}" | |
| ) | |
| step_footnote += token_str | |
| if hasattr(step_log, "duration"): | |
| step_duration = f" | Duration: {round(float(step_log.duration), 2)}" if step_log.duration else None | |
| step_footnote += step_duration | |
| step_footnote = f"""<span style="color: #bbbbc2; font-size: 12px;">{step_footnote}</span> """ | |
| yield gr.ChatMessage(role="assistant", content=f"{step_footnote}") | |
| yield gr.ChatMessage(role="assistant", content="-----") | |
| def _save_agent_image(agent_img: AgentImage) -> str: | |
| """ | |
| Convert AgentImage into a real PNG file path so Gradio can render it. | |
| Supports: | |
| - existing file paths | |
| - PIL images in different attrs | |
| - raw bytes | |
| - base64 | |
| - URLs | |
| """ | |
| os.makedirs("generated_images", exist_ok=True) | |
| img_path = os.path.join("generated_images", f"image_{uuid.uuid4().hex[:8]}.png") | |
| img_str = agent_img.to_string() | |
| # 1) If to_string() is a valid local file path | |
| if isinstance(img_str, str) and os.path.exists(img_str): | |
| return img_str | |
| # 2) If to_string() looks like a URL | |
| if isinstance(img_str, str) and img_str.startswith("http"): | |
| try: | |
| r = requests.get(img_str, timeout=30) | |
| r.raise_for_status() | |
| with open(img_path, "wb") as f: | |
| f.write(r.content) | |
| return img_path | |
| except Exception: | |
| pass | |
| # 3) If to_string() looks like base64 | |
| if isinstance(img_str, str) and "base64" in img_str[:50].lower(): | |
| try: | |
| b64data = img_str.split("base64,")[-1] | |
| img_bytes = base64.b64decode(b64data) | |
| with open(img_path, "wb") as f: | |
| f.write(img_bytes) | |
| return img_path | |
| except Exception: | |
| pass | |
| # 4) Try extracting PIL image from common fields | |
| for attr in ["value", "image", "data", "pil_image"]: | |
| if hasattr(agent_img, attr): | |
| candidate = getattr(agent_img, attr) | |
| if candidate is None: | |
| continue | |
| # PIL image | |
| if hasattr(candidate, "save"): | |
| try: | |
| candidate.save(img_path) | |
| return img_path | |
| except Exception: | |
| pass | |
| # bytes | |
| if isinstance(candidate, (bytes, bytearray)): | |
| try: | |
| with open(img_path, "wb") as f: | |
| f.write(candidate) | |
| return img_path | |
| except Exception: | |
| pass | |
| # 5) Try agent_img.to_pil() | |
| if hasattr(agent_img, "to_pil"): | |
| try: | |
| pil_img = agent_img.to_pil() | |
| if pil_img is not None and hasattr(pil_img, "save"): | |
| pil_img.save(img_path) | |
| return img_path | |
| except Exception: | |
| pass | |
| # If nothing worked, still return path (won't crash) | |
| return img_path | |
| def stream_to_gradio(agent, task: str, reset_agent_memory: bool = False, additional_args: Optional[dict] = None): | |
| """Runs an agent with the given task and streams the messages from the agent as gradio ChatMessages.""" | |
| if not _is_package_available("gradio"): | |
| raise ModuleNotFoundError( | |
| "Please install 'gradio' extra to use the GradioUI: `pip install 'smolagents[gradio]'`" | |
| ) | |
| import gradio as gr | |
| for step_log in agent.run(task, stream=True, reset=reset_agent_memory, additional_args=additional_args): | |
| if hasattr(agent.model, "last_input_token_count"): | |
| if isinstance(step_log, ActionStep): | |
| step_log.input_token_count = agent.model.last_input_token_count | |
| step_log.output_token_count = agent.model.last_output_token_count | |
| for message in pull_messages_from_step(step_log): | |
| yield message | |
| raw_final_answer = step_log.final_answer if isinstance(step_log, FinalAnswerStep) else step_log | |
| # If a tool returns a local image path (e.g. via `save_image`), render it inline in the chat. | |
| if isinstance(raw_final_answer, str): | |
| candidate_path = raw_final_answer.strip() | |
| if candidate_path and os.path.exists(candidate_path): | |
| mime_type, _ = mimetypes.guess_type(candidate_path) | |
| if mime_type and mime_type.startswith("image/"): | |
| yield gr.ChatMessage(role="assistant", content={"path": candidate_path, "mime_type": mime_type}) | |
| return | |
| final_answer = handle_agent_output_types(raw_final_answer) | |
| if isinstance(final_answer, AgentText): | |
| # If the text is actually a local image path, render the image. | |
| text = final_answer.to_string().strip() | |
| if text and os.path.exists(text): | |
| mime_type, _ = mimetypes.guess_type(text) | |
| if mime_type and mime_type.startswith("image/"): | |
| yield gr.ChatMessage(role="assistant", content={"path": text, "mime_type": mime_type}) | |
| return | |
| yield gr.ChatMessage(role="assistant", content=f"**Final answer:**\n{text}\n") | |
| elif isinstance(final_answer, AgentImage): | |
| img_path = _save_agent_image(final_answer) | |
| yield gr.ChatMessage(role="assistant", content={"path": img_path, "mime_type": "image/png"}) | |
| elif isinstance(final_answer, AgentAudio): | |
| yield gr.ChatMessage(role="assistant", content={"path": final_answer.to_string(), "mime_type": "audio/wav"}) | |
| else: | |
| yield gr.ChatMessage(role="assistant", content=f"**Final answer:** {str(final_answer)}") | |
| class GradioUI: | |
| """A one-line interface to launch your agent in Gradio""" | |
| def __init__(self, agent: MultiStepAgent, file_upload_folder: str | None = None): | |
| if not _is_package_available("gradio"): | |
| raise ModuleNotFoundError( | |
| "Please install 'gradio' extra to use the GradioUI: `pip install 'smolagents[gradio]'`" | |
| ) | |
| self.agent = agent | |
| self.file_upload_folder = file_upload_folder | |
| if self.file_upload_folder is not None: | |
| if not os.path.exists(file_upload_folder): | |
| os.mkdir(file_upload_folder) | |
| def interact_with_agent(self, prompt, messages): | |
| import gradio as gr | |
| messages.append(gr.ChatMessage(role="user", content=prompt)) | |
| yield messages | |
| for msg in stream_to_gradio(self.agent, task=prompt, reset_agent_memory=False): | |
| messages.append(msg) | |
| yield messages | |
| yield messages | |
| def upload_file( | |
| self, | |
| file, | |
| file_uploads_log, | |
| allowed_file_types=[ | |
| "application/pdf", | |
| "application/vnd.openxmlformats-officedocument.wordprocessingml.document", | |
| "text/plain", | |
| ], | |
| ): | |
| """Handle file uploads, default allowed types are .pdf, .docx, and .txt""" | |
| import gradio as gr | |
| if file is None: | |
| return gr.Textbox("No file uploaded", visible=True), file_uploads_log | |
| try: | |
| mime_type, _ = mimetypes.guess_type(file.name) | |
| except Exception as e: | |
| return gr.Textbox(f"Error: {e}", visible=True), file_uploads_log | |
| if mime_type not in allowed_file_types: | |
| return gr.Textbox("File type disallowed", visible=True), file_uploads_log | |
| original_name = os.path.basename(file.name) | |
| sanitized_name = re.sub(r"[^\w\-.]", "_", original_name) | |
| type_to_ext = {} | |
| for ext, t in mimetypes.types_map.items(): | |
| if t not in type_to_ext: | |
| type_to_ext[t] = ext | |
| sanitized_name = sanitized_name.split(".")[:-1] | |
| sanitized_name.append("" + type_to_ext[mime_type]) | |
| sanitized_name = "".join(sanitized_name) | |
| file_path = os.path.join(self.file_upload_folder, os.path.basename(sanitized_name)) | |
| shutil.copy(file.name, file_path) | |
| return gr.Textbox(f"File uploaded: {file_path}", visible=True), file_uploads_log + [file_path] | |
| def log_user_message(self, text_input, file_uploads_log): | |
| return ( | |
| text_input | |
| + ( | |
| f"\nYou have been provided with these files, which might be helpful or not: {file_uploads_log}" | |
| if len(file_uploads_log) > 0 | |
| else "" | |
| ), | |
| "", | |
| ) | |
| def launch(self, **kwargs): | |
| import gradio as gr | |
| with gr.Blocks(fill_height=True) as demo: | |
| stored_messages = gr.State([]) | |
| file_uploads_log = gr.State([]) | |
| chatbot = gr.Chatbot( | |
| label="Agent", | |
| avatar_images=( | |
| None, | |
| "https://huggingface.co/datasets/agents-course/course-images/resolve/main/en/communication/Alfred.png", | |
| ), | |
| resizable=True, | |
| scale=1, | |
| ) | |
| if self.file_upload_folder is not None: | |
| upload_file = gr.File(label="Upload a file") | |
| upload_status = gr.Textbox(label="Upload Status", interactive=False, visible=False) | |
| upload_file.change( | |
| self.upload_file, | |
| [upload_file, file_uploads_log], | |
| [upload_status, file_uploads_log], | |
| ) | |
| text_input = gr.Textbox(lines=1, label="Chat Message") | |
| text_input.submit( | |
| self.log_user_message, | |
| [text_input, file_uploads_log], | |
| [stored_messages, text_input], | |
| ).then(self.interact_with_agent, [stored_messages, chatbot], [chatbot]) | |
| # Disable share on Spaces automatically | |
| is_spaces = os.environ.get("SPACE_ID") is not None | |
| demo.launch(debug=True, share=not is_spaces, **kwargs) | |
| __all__ = ["stream_to_gradio", "GradioUI"] | |