Spaces:

charles-azam
/

deepdraft

Runtime error

File size: 8,663 Bytes

52c7696
 
1eb9c9d
52c7696
 
 
 
 
 
 
 
 
1eb9c9d
52c7696
dd7fa38
6c0aeb9
0159aaf
52c7696
 
dd7fa38
52c7696
 
 
 
 
 
 
1eb9c9d
 
52c7696
 
6c0aeb9
395c3d4
 
 
 
 
 
 
 
 
 
 
0159aaf
 
395c3d4
 
 
0159aaf
395c3d4
 
 
 
dd7fa38
395c3d4
 
 
 
 
 
1eb9c9d
 
 
dd7fa38
52c7696
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1eb9c9d
395c3d4
1eb9c9d
 
52c7696
1eb9c9d
 
 
 
 
 
52c7696
1eb9c9d
 
 
52c7696
1eb9c9d
395c3d4
52c7696
 
 
 
1eb9c9d
52c7696
1eb9c9d
 
 
 
52c7696
 
 
dd7fa38
1eb9c9d
 
 
 
 
 
 
 
 
395c3d4
 
 
 
 
 
1eb9c9d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6c0aeb9
395c3d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0159aaf
395c3d4
 
dd7fa38
 
 
395c3d4
 
 
 
 
 
1eb9c9d

from __future__ import annotations

from pathlib import Path
from io import BytesIO
from time import sleep

import matplotlib

matplotlib.use("Agg")  # headless backend
import matplotlib.pyplot as plt
from PIL import Image

from smolagents import CodeAgent, LiteLLMModel
from smolagents.agents import ActionStep
from deepengineer.webcrawler.crawl_database import DataBase
from deepengineer.logging_tools import LoggingTool
import queue


def _find_and_save_matplotlib_figure(image_path: Path = Path("figure.png")) -> str:
    """Save the current matplotlib figure to *path*.
    Save fig takes no arguments. The output path is hardcoded to "figure.png".
    """
    if not plt.get_fignums():
        raise RuntimeError(
            "No active figure to save; create one before calling save_fig()."
        )
    plt.savefig(image_path, bbox_inches="tight")
    return f"Figure saved to {image_path}."


class SaveMatplotlibFigTool(LoggingTool):
    name = "save_matplotlib_fig"
    description = """Save the current matplotlib figure to the current directory. Then plt.close() is called to clear the figure. The image is returned as a markdown string, use this markdown inside the final answer to include the image.
    """
    inputs = {
        "image_name": {
            "type": "string",
            "description": "The name of the image to save.",
        },
    }
    output_type = "string"

    def __init__(self, output_dir: Path, log_queue: queue.Queue | None = None):
        super().__init__(log_queue=log_queue)
        self.output_dir: Path = output_dir

    def forward(self, image_name: str) -> str:
        self.push_log(f"🖼️ Saving matplotlib figure to {image_name}")
        if not image_name.endswith(".png"):
            image_name = image_name + ".png"
        output_path = self.output_dir / image_name
        output_path.unlink(missing_ok=True)
        _find_and_save_matplotlib_figure(output_path)
        if output_path.exists():
            return f"![]({image_name})"
        else:
            return f"Error: The image {image_name} was not saved."


def _capture_snapshot(
    memory_step: ActionStep, agent: CodeAgent, image_path: Path = Path("figure.png")
) -> None:
    _find_and_save_matplotlib_figure(image_path)
    if not plt.get_fignums():
        return

    buf = BytesIO()
    plt.savefig(buf, format="png", bbox_inches="tight")
    buf.seek(0)
    img = Image.open(buf)

    for prev in agent.memory.steps:
        if (
            isinstance(prev, ActionStep)
            and prev.step_number <= memory_step.step_number - 2
        ):
            prev.observations_images = None

    memory_step.observations_images = [img.copy()]
    buf.close()

    hint = "[snapshot: matplotlib figure captured]"
    memory_step.observations = (
        hint
        if memory_step.observations is None
        else memory_step.observations + "\n" + hint
    )


matplotlib_instructions_multiple_steps = r"""
You may use the entire **matplotlib** and **numpy** and **pandas** and **seaborn** API. Do not worry about saving the image, it is done automatically and you can't access the os library.

Between each step, the image is provided in memory. From step 2, you can use it to pass additional instructions to the model to improve the image.

Workflow
--------
1. Construct your figure with ordinary matplotlib calls.
2. Wait another iteration, watch the image. If the image is correct call `final_answer() directly`. Otherwise, just do it again.
3. Do **not** call `plt.show()`; a callback captures a PNG automatically.
4. Keep code blocks concise and avoid GUI back‑end imports (TkAgg, Qt, etc.).

User instructions:
{user_instructions}
"""

matplotlib_instructions_single_step = r"""
You may use the entire **matplotlib** and **numpy** and **pandas** and **seaborn** API. Do not worry about saving the image, it is done automatically and you can't access the os library.

Workflow
--------
1. Construct your figure with ordinary matplotlib calls.
2. If the task is easy and you are confident that the image is correct, call `final_answer() directly`. Otherwise, wait another iteration to watch the image.
3. Do **not** call `plt.show()`; a callback captures a PNG automatically.
4. Keep code blocks concise and avoid GUI back‑end imports (TkAgg, Qt, etc.).  

User instructions:
{user_instructions}
"""


def draw_matplotlib_image_from_prompt(
    prompt: str,
    image_path: str = Path("figure.png"),
    model_id: str = "mistral/mistral-medium-latest",
    multiple_steps: bool = False,
) -> Path:
    model = LiteLLMModel(model_id=model_id)
    agent = CodeAgent(
        tools=[],
        model=model,
        additional_authorized_imports=[
            "matplotlib.*",
            "numpy.*",
            "pandas.*",
            "seaborn.*",
        ],
        step_callbacks=[
            lambda memory_step, agent: _capture_snapshot(memory_step, agent, image_path)
        ],
        max_steps=20,
        verbosity_level=2,
    )
    if multiple_steps:
        agent.run(
            matplotlib_instructions_multiple_steps.format(user_instructions=prompt)
        )
    else:
        agent.run(matplotlib_instructions_single_step.format(user_instructions=prompt))
    return image_path


class DrawImageTool(LoggingTool):
    name = "draw_image"
    description = f"Draw an image based on a prompt. The image is saved in the current directory. The image is returned as a markdown image, use this markdown inside the final answer to include the image. You must be very specific in your prompt."
    inputs = {
        "prompt": {
            "type": "string",
            "description": """
    Draw an image based on a prompt. The image is saved in the current directory. The image is returned as a markdown image, use this markdown inside the final answer to include the image. 
    
    You must be very specific in your prompt. This tool has access to matplotlib, numpy, pandas, seaborn.
                   
                   """,
        },
        "image_name": {
            "type": "string",
            "description": "The name of the image to save.",
        },
    }
    output_type = "string"

    def __init__(self, output_dir: Path):
        super().__init__()
        self.output_dir: Path = output_dir

    def forward(self, prompt: str, image_name: str) -> str:
        self.push_log(f"🖊️ Drawing image from prompt: {prompt}")
        if not image_name.endswith(".png"):
            image_name = image_name + ".png"
        output_path = draw_matplotlib_image_from_prompt(
            prompt, self.output_dir / image_name
        )
        if output_path.exists():
            return f"![]({image_name})"
        else:
            return f"Error: The image {image_name} was not saved."


def multiple_steps_draw_image_agent(
    prompt: str,
    image_path: str = Path("figure.png"),
    model_id: str = "mistral/mistral-medium-latest",
) -> Path:
    """
    The idea behind this function is to give to a multimodal agent the code and the image of the previous step to adapt it.
    """
    from smolagents import CodeAgent, ActionStep, TaskStep, Timing
    import time

    model = LiteLLMModel(model_id=model_id)
    agent = CodeAgent(
        tools=[],
        model=model,
        additional_authorized_imports=["matplotlib.*", "numpy.*"],
        step_callbacks=[
            lambda memory_step, agent: _capture_snapshot(memory_step, agent, image_path)
        ],
        max_steps=20,
        verbosity_level=2,
    )

    # Send the tools to the agent (no tools here)
    agent.python_executor.send_tools({**agent.tools})

    # Print the system prompt
    print(agent.memory.system_prompt)

    # Set the task
    task = prompt

    # You could modify the memory as needed here by inputting the memory of another agent.
    # agent.memory.steps = previous_agent.memory.steps

    # Let's start a new task!
    agent.memory.steps.append(TaskStep(task=task, task_images=[]))

    final_answer = None
    step_number = 1
    while final_answer is None and step_number <= 10:
        memory_step = ActionStep(
            step_number=step_number,
            observations_images=[],
            timing=Timing(start_time=time.time(), end_time=time.time()),
        )
        # Run one step.
        final_answer = agent.step(memory_step)
        agent.memory.steps.append(memory_step)
        step_number += 1
        _capture_snapshot(memory_step, agent, image_path)
        pass
        # Change the memory as you please!
        # For instance to update the latest step:
        # agent.memory.steps[-1] = ...

    print("The final answer is:", final_answer)

    return image_path