Spaces:

RemVdH
/

Space2PQM

Sleeping

File size: 9,766 Bytes

#!/usr/bin/env python

import gradio as gr

import mimetypes
import os
import re
import shutil
from typing import Optional

from smolagents.agent_types import AgentAudio, AgentImage, AgentText, handle_agent_output_types
from smolagents.agents import ActionStep, MultiStepAgent
from smolagents.memory import MemoryStep
from smolagents.utils import _is_package_available

def clean_up_LLM_output(p_output: str):
    # Remove any trailing <end_code> and extra backticks, handling multiple possible formats
    r_output = re.sub(r"```\s*<end_code>", "```", p_output)  # handles ```<end_code>
    r_output = re.sub(r"<end_code>\s*```", "```", r_output)  # handles <end_code>```
    r_output = re.sub(r"```\s*\n\s*<end_code>", "```", r_output)  # handles ```\n<end_code>
    r_output = r_output.strip()
    return r_output

def clean_up_code_output(p_content: str):
    r_content = re.sub(r"```.*?\n", "", p_content)  # Remove existing code blocks
    r_content = re.sub(r"\s*<end_code>\s*", "", r_content)  # Remove end_code tags
    r_content = r_content.strip()
    if not r_content.startswith("```python"):
        r_content = f"```python\n{r_content}\n```"
    return r_content

def pull_messages_from_step(step_log: MemoryStep):
    """Extract ChatMessage objects from agent steps with proper nesting"""

    if isinstance(step_log, ActionStep):
        # Output the step number
        step_number = f"Step {step_log.step_number}" if step_log.step_number is not None else ""
        yield gr.ChatMessage(role="assistant", content=f"**{step_number}**")

        # First yield the thought/reasoning from the LLM
        if hasattr(step_log, "model_output") and step_log.model_output is not None:
            model_output = clean_up_LLM_output(step_log.model_output.strip())
            # TEST TO BE CHANGED
            if "music" in model_output:
                # an_output = ["SOUND?",gr.Audio(value="../resources/GenAI Revolution.mp3", autoplay=True)]
                # an_output = ("SOUND?",gr.Audio(value="../resources/GenAI Revolution.mp3", autoplay=True))
                # based on an_output = {"path": final_answer.to_string(), "mime_type": "audio/wav"}
                # Kind of works => an_output = {"path": "./resources/GenAI Revolution.mp3", "mime_type": "audio/wav"}
                an_output = {"path": "./resources/GenAI Revolution.mp3", "mime_type": "audio/wav", "autoplay":True}
                yield gr.ChatMessage(role="assistant", content=an_output)
            else:
                yield gr.ChatMessage(role="assistant", content=model_output)

        # For tool calls, create a parent message
        if hasattr(step_log, "tool_calls") and step_log.tool_calls is not None:
            first_tool_call = step_log.tool_calls[0]
            used_code = first_tool_call.name == "python_interpreter"
            parent_id = f"call_{len(step_log.tool_calls)}"

            # Tool call becomes the parent message with timing info
            # First we will handle arguments based on type
            args = first_tool_call.arguments
            if isinstance(args, dict):
                content = str(args.get("answer", str(args)))
            else:
                content = str(args).strip()

            if used_code:
                content = clean_up_code_output(content)

            parent_message_tool = gr.ChatMessage(
                role="assistant",
                content=content,
                metadata={
                    "title": f"🛠️ Used tool {first_tool_call.name}",
                    "id": parent_id,
                    "status": "pending",
                },
            )
            yield parent_message_tool

            # Nesting execution logs under the tool call if they exist
            if hasattr(step_log, "observations") and (
                step_log.observations is not None and step_log.observations.strip()
            ):  # Only yield execution logs if there's actual content
                log_content = step_log.observations.strip()
                if log_content:
                    log_content = re.sub(r"^Execution logs:\s*", "", log_content)
                    yield gr.ChatMessage(
                        role="assistant",
                        content=f"{log_content}",
                        metadata={"title": "📝 Execution Logs", "parent_id": parent_id, "status": "done"},
                    )

            # Nesting any errors under the tool call
            if hasattr(step_log, "error") and step_log.error is not None:
                yield gr.ChatMessage(
                    role="assistant",
                    content=str(step_log.error),
                    metadata={"title": "💥 Error", "parent_id": parent_id, "status": "done"},
                )

            # Update parent message metadata to done status without yielding a new message
            parent_message_tool.metadata["status"] = "done"

        # Handle standalone errors but not from tool calls
        elif hasattr(step_log, "error") and step_log.error is not None:
            yield gr.ChatMessage(role="assistant", content=str(step_log.error), metadata={"title": "💥 Error"})

        # Calculate duration and token information
        step_footnote = f"{step_number}"
        if hasattr(step_log, "input_token_count") and hasattr(step_log, "output_token_count"):
            token_str = (
                f" | Input-tokens:{step_log.input_token_count:,} | Output-tokens:{step_log.output_token_count:,}"
            )
            step_footnote += token_str
        if hasattr(step_log, "duration"):
            step_duration = f" | Duration: {round(float(step_log.duration), 2)}" if step_log.duration else None
            step_footnote += step_duration
        step_footnote = f"""<span style="color: #bbbbc2; font-size: 12px;">{step_footnote}</span> """
        yield gr.ChatMessage(role="assistant", content=f"{step_footnote}")
        yield gr.ChatMessage(role="assistant", content="-----")


def stream_to_gradio(agent, task: str, reset_agent_memory: bool = False, additional_args: Optional[dict] = None):
    """Runs an agent with the given task and streams the messages from the agent as gradio ChatMessages."""

    total_input_tokens = 0
    total_output_tokens = 0

    for step_log in agent.run(task, stream=True, reset=reset_agent_memory, additional_args=additional_args):
        # Track tokens if model provides them
        if hasattr(agent.model, "last_input_token_count"):
            total_input_tokens += agent.model.last_input_token_count
            total_output_tokens += agent.model.last_output_token_count
            if isinstance(step_log, ActionStep):
                step_log.input_token_count = agent.model.last_input_token_count
                step_log.output_token_count = agent.model.last_output_token_count

        for message in pull_messages_from_step(
            step_log,
        ):
            yield message

    final_answer = step_log  # Last log is the run's final_answer
    final_answer = handle_agent_output_types(final_answer)

    if isinstance(final_answer, AgentText):
        yield gr.ChatMessage(
            role="assistant",
            content=f"**Final answer:**\n{final_answer.to_string()}\n",
        )
    elif isinstance(final_answer, AgentImage):
        yield gr.ChatMessage(
            role="assistant",
            content={"path": final_answer.to_string(), "mime_type": "image/png"},
        )
    elif isinstance(final_answer, AgentAudio):
        yield gr.ChatMessage(
            role="assistant",
            content={"path": final_answer.to_string(), "mime_type": "audio/wav"},
        )
    else:
        yield gr.ChatMessage(role="assistant", content=f"**Final answer:** {str(final_answer)}")


class GradioUI:
    def __init__(self, agent: MultiStepAgent): 
        self.agent = agent

    def interact_with_agent(self, prompt, messages):

        messages.append(gr.ChatMessage(role="user", content=prompt))
        yield messages
        for msg in stream_to_gradio(self.agent, task=prompt, reset_agent_memory=False):
            messages.append(msg)
            yield messages
        yield messages

    def log_user_message(self, text_input):
        return text_input, ""

    def launch(self, **kwargs):
        with gr.Blocks(fill_height=True) as demo:
            stored_messages = gr.State([])

            gr.Audio(value="./resources/GenAI Revolution.mp3", autoplay=True)

            # history = [
            #     gr.ChatMessage(role="assistant", content="How can I help you?"),
            #     gr.ChatMessage(role="user", content="Can you give some fun facts about the band Nirvana, based on the wikipedia information?"),
            #     gr.ChatMessage(role="assistant", content="I am happy to some fun facts about Nirvana, based on what I find on wikipedia")
            # ]
            
            history = [
                gr.ChatMessage(role="assistant", content="How can I help you?") ]
            
            chatbot = gr.Chatbot(
                history,
                label="Agent",
                type="messages",
                avatar_images=(
                    None,
                    "https://huggingface.co/datasets/agents-course/course-images/resolve/main/en/communication/Alfred.png",
                ),
                resizeable=True,
                scale=1,
            )
            text_input = gr.Textbox(lines=1, label="Chat Message")
            text_input.submit(
                self.log_user_message,
                [text_input],
                [stored_messages, text_input],
            ).then(self.interact_with_agent, [stored_messages, chatbot], [chatbot])

        demo.launch(debug=True, share=True, **kwargs)

__all__ = ["stream_to_gradio", "GradioUI"]