Spaces:

charles-azam
/

deepdraft

Runtime error

App Files Files Community

Charles Azam commited on Jul 13, 2025

Commit

1eb9c9d

1 Parent(s): 52c7696

fix: let the agent perform multiple steps

Browse files

Files changed (4) hide show

data/figure.png +0 -0
src/deepengineer/deepsearch/draw_agent.py +115 -53
src/deepengineer/webcrawler/async_crawl.py +0 -1
tests/webcrawler/test_draw_agent.py +22 -0

data/figure.png ADDED Viewed

src/deepengineer/deepsearch/draw_agent.py CHANGED Viewed

@@ -1,13 +1,6 @@
-"""
-drawing_agent.py (rev 3)
-A smolagents‑powered CodeAgent that grants the model **full matplotlib.pyplot**
-control *plus* a single high‑level `save_fig` tool. The tool must be called at
-the end of each drawing sequence to persist the artwork, while a callback still
-captures a snapshot for chat‑time previews.
-"""
 from __future__ import annotations
 from io import BytesIO
 from time import sleep
@@ -17,34 +10,28 @@ matplotlib.use("Agg")  # headless backend
 import matplotlib.pyplot as plt
 from PIL import Image
-from smolagents import CodeAgent, LiteLLMModel, tool
 from smolagents.agents import ActionStep
-# ---------------------------------------------------------------------------
-# Drawing tool (the *only* one): save_fig
-# ---------------------------------------------------------------------------
-@tool
-def save_fig() -> str:
     """Save the current matplotlib figure to *path*.
     Save fig takes no arguments. The output path is hardcoded to "figure.png".
     """
-    path = "figure.png"
     if not plt.get_fignums():
         raise RuntimeError(
             "No active figure to save; create one before calling save_fig()."
         )
-    plt.savefig(path, bbox_inches="tight")
-    return f"Figure saved to {path}."
-# ---------------------------------------------------------------------------
-# Callback: snapshot the figure after each executed step
-# ---------------------------------------------------------------------------
-def _capture_snapshot(memory_step: ActionStep, agent: CodeAgent) -> None:
     if not plt.get_fignums():
         return
@@ -71,44 +58,119 @@ def _capture_snapshot(memory_step: ActionStep, agent: CodeAgent) -> None:
     )
-# ---------------------------------------------------------------------------
-# Agent initialisation
-# ---------------------------------------------------------------------------
-model_id = "deepseek/deepseek-chat"
-model = LiteLLMModel(model_id=model_id)
-agent = CodeAgent(
-    tools=[save_fig],  # only one explicit tool
-    model=model,
-    additional_authorized_imports=["*"],
-    step_callbacks=[_capture_snapshot],
-    max_steps=20,
-    verbosity_level=2,
-)
-# ---------------------------------------------------------------------------
-# System prompt injected before every user request
-# ---------------------------------------------------------------------------
-matplotlib_instructions = r"""
-You may use the entire **matplotlib** API.
 Workflow
 --------
 1. Construct your figure with ordinary matplotlib calls.
-2. **Once the figure is complete, call `save_fig()` and `final_answer()`.** This is the
-   *only* external tool you have and must be invoked exactly once per final
-   graphic.
 3. Do **not** call `plt.show()`; a callback captures a PNG automatically.
-4. Keep code blocks concise and avoid GUI back‑end imports (TkAgg, Qt, etc.).
 """
-# ---------------------------------------------------------------------------
-# Example CLI usage
-# ---------------------------------------------------------------------------
-if __name__ == "__main__":
-    import numpy as np
-    prompt = "Propose moi un schéma simplifié d'un réacteur nucléaire à eau pressurisée (PWR) avec un schéma de l'installation."
-    result = agent.run(prompt + matplotlib_instructions)
-    print("Final output:\n", result)

 from __future__ import annotations
+from pathlib import Path
 from io import BytesIO
 from time import sleep
 import matplotlib.pyplot as plt
 from PIL import Image
+from smolagents import CodeAgent, LiteLLMModel
 from smolagents.agents import ActionStep
+import base64, mimetypes
+def save_fig(image_path: Path = Path("figure.png")) -> str:
     """Save the current matplotlib figure to *path*.
     Save fig takes no arguments. The output path is hardcoded to "figure.png".
     """
     if not plt.get_fignums():
         raise RuntimeError(
             "No active figure to save; create one before calling save_fig()."
         )
+    plt.savefig(image_path, bbox_inches="tight")
+    return f"Figure saved to {image_path}."
+def _capture_snapshot(
+    memory_step: ActionStep, agent: CodeAgent, image_path: Path = Path("figure.png")
+) -> None:
+    save_fig(image_path)
     if not plt.get_fignums():
         return
     )
+matplotlib_instructions_multiple_steps = r"""
+You may use the entire **matplotlib** and **numpy** API. Do not worry about saving the image, it is done automatically and you can't access the os library.
+Between each step, the image is provided in memory. From step 2, you can use it to pass additional instructions to the model to improve the image.
+Workflow
+--------
+1. Construct your figure with ordinary matplotlib calls.
+2. Wait another iteration, watch the image. If the image is correct call `final_answer() directly`. Otherwise, just do it again.
+3. Do **not** call `plt.show()`; a callback captures a PNG automatically.
+4. Keep code blocks concise and avoid GUI back‑end imports (TkAgg, Qt, etc.).
+User instructions:
+{user_instructions}
+"""
+matplotlib_instructions_single_step = r"""
+You may use the entire **matplotlib** and **numpy** API. Do not worry about saving the image, it is done automatically and you can't access the os library.
 Workflow
 --------
 1. Construct your figure with ordinary matplotlib calls.
+2. If the task is easy and you are confident that the image is correct, call `final_answer() directly`. Otherwise, wait another iteration to watch the image.
 3. Do **not** call `plt.show()`; a callback captures a PNG automatically.
+4. Keep code blocks concise and avoid GUI back‑end imports (TkAgg, Qt, etc.).
+User instructions:
+{user_instructions}
 """
+def draw_image_agent(
+    prompt: str,
+    image_path: str = Path("figure.png"),
+    model_id: str = "mistral/mistral-medium-latest",
+    multiple_steps: bool = False,
+) -> Path:
+    model = LiteLLMModel(model_id=model_id)
+    agent = CodeAgent(
+        tools=[],
+        model=model,
+        additional_authorized_imports=["matplotlib.*", "numpy.*"],
+        step_callbacks=[
+            lambda memory_step, agent: _capture_snapshot(memory_step, agent, image_path)
+        ],
+        max_steps=20,
+        verbosity_level=2,
+    )
+    if multiple_steps:
+        agent.run(
+            matplotlib_instructions_multiple_steps.format(user_instructions=prompt)
+        )
+    else:
+        agent.run(matplotlib_instructions_single_step.format(user_instructions=prompt))
+    return image_path
+def multiple_steps_draw_image_agent(
+    prompt: str,
+    image_path: str = Path("figure.png"),
+    model_id: str = "mistral/mistral-medium-latest",
+) -> Path:
+    """
+    The idea behind this function is to give to a multimodal agent the code and the image of the previous step to adapt it.
+    """
+    from smolagents import CodeAgent, ActionStep, TaskStep, Timing
+    import time
+    model = LiteLLMModel(model_id=model_id)
+    agent = CodeAgent(
+        tools=[],
+        model=model,
+        additional_authorized_imports=["matplotlib.*", "numpy.*"],
+        step_callbacks=[
+            lambda memory_step, agent: _capture_snapshot(memory_step, agent, image_path)
+        ],
+        max_steps=20,
+        verbosity_level=2,
+    )
+    # Send the tools to the agent (no tools here)
+    agent.python_executor.send_tools({**agent.tools})
+    # Print the system prompt
+    print(agent.memory.system_prompt)
+    # Set the task
+    task = prompt
+    # You could modify the memory as needed here by inputting the memory of another agent.
+    # agent.memory.steps = previous_agent.memory.steps
+    # Let's start a new task!
+    agent.memory.steps.append(TaskStep(task=task, task_images=[]))
+    final_answer = None
+    step_number = 1
+    while final_answer is None and step_number <= 10:
+        memory_step = ActionStep(
+            step_number=step_number,
+            observations_images=[],
+            timing=Timing(start_time=time.time(), end_time=time.time()),
+        )
+        # Run one step.
+        final_answer = agent.step(memory_step)
+        agent.memory.steps.append(memory_step)
+        step_number += 1
+        _capture_snapshot(memory_step, agent, image_path)
+        pass
+        # Change the memory as you please!
+        # For instance to update the latest step:
+        # agent.memory.steps[-1] = ...
+    print("The final answer is:", final_answer)
+    return image_path

src/deepengineer/webcrawler/async_crawl.py CHANGED Viewed

@@ -6,7 +6,6 @@ import httpx
 import pytest
-@pytest.mark.skip(reason="Playwright is not installed on CI")
 async def crawl4ai_extract_markdown_of_url_async(url: str) -> str:
     """Extract markdown content from a URL using crawl4ai."""
     async with crawl4ai.AsyncWebCrawler() as crawler:

 import pytest
 async def crawl4ai_extract_markdown_of_url_async(url: str) -> str:
     """Extract markdown content from a URL using crawl4ai."""
     async with crawl4ai.AsyncWebCrawler() as crawler:

tests/webcrawler/test_draw_agent.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import pytest
+from deepengineer.deepsearch.draw_agent import draw_image_agent, run_agent_step_by_step
+from deepengineer.common_path import DATA_DIR
+from pathlib import Path
+@pytest.mark.expensive
+def test_draw_image_agent():
+    prompt = """Propose moi un schéma très détaillé d'un réacteur nucléaire hélium graphite."""
+    output_path = Path(DATA_DIR) / "figure.png"
+    output_path.unlink(missing_ok=True)
+    output_path = draw_image_agent(prompt, output_path, multiple_steps=False)
+    assert output_path.exists()
+@pytest.mark.skip(reason="This function is not working yet")
+def test_run_agent_step_by_step():
+    prompt = """Propose moi un schéma très détaillé d'un réacteur nucléaire hélium graphite."""
+    output_path = Path(DATA_DIR) / "figure.png"
+    output_path.unlink(missing_ok=True)
+    output_path = run_agent_step_by_step(prompt, output_path)
+    assert output_path.exists()