Charles Azam commited on
Commit
1eb9c9d
·
1 Parent(s): 52c7696

fix: let the agent perform multiple steps

Browse files
data/figure.png ADDED
src/deepengineer/deepsearch/draw_agent.py CHANGED
@@ -1,13 +1,6 @@
1
- """
2
- drawing_agent.py (rev 3)
3
- A smolagents‑powered CodeAgent that grants the model **full matplotlib.pyplot**
4
- control *plus* a single high‑level `save_fig` tool. The tool must be called at
5
- the end of each drawing sequence to persist the artwork, while a callback still
6
- captures a snapshot for chat‑time previews.
7
- """
8
-
9
  from __future__ import annotations
10
 
 
11
  from io import BytesIO
12
  from time import sleep
13
 
@@ -17,34 +10,28 @@ matplotlib.use("Agg") # headless backend
17
  import matplotlib.pyplot as plt
18
  from PIL import Image
19
 
20
- from smolagents import CodeAgent, LiteLLMModel, tool
21
  from smolagents.agents import ActionStep
22
 
23
- # ---------------------------------------------------------------------------
24
- # Drawing tool (the *only* one): save_fig
25
- # ---------------------------------------------------------------------------
26
 
27
 
28
- @tool
29
- def save_fig() -> str:
30
  """Save the current matplotlib figure to *path*.
31
  Save fig takes no arguments. The output path is hardcoded to "figure.png".
32
  """
33
- path = "figure.png"
34
  if not plt.get_fignums():
35
  raise RuntimeError(
36
  "No active figure to save; create one before calling save_fig()."
37
  )
38
- plt.savefig(path, bbox_inches="tight")
39
- return f"Figure saved to {path}."
40
-
41
 
42
- # ---------------------------------------------------------------------------
43
- # Callback: snapshot the figure after each executed step
44
- # ---------------------------------------------------------------------------
45
 
46
-
47
- def _capture_snapshot(memory_step: ActionStep, agent: CodeAgent) -> None:
 
 
48
  if not plt.get_fignums():
49
  return
50
 
@@ -71,44 +58,119 @@ def _capture_snapshot(memory_step: ActionStep, agent: CodeAgent) -> None:
71
  )
72
 
73
 
74
- # ---------------------------------------------------------------------------
75
- # Agent initialisation
76
- # ---------------------------------------------------------------------------
 
77
 
78
- model_id = "deepseek/deepseek-chat"
79
- model = LiteLLMModel(model_id=model_id)
 
 
 
 
80
 
81
- agent = CodeAgent(
82
- tools=[save_fig], # only one explicit tool
83
- model=model,
84
- additional_authorized_imports=["*"],
85
- step_callbacks=[_capture_snapshot],
86
- max_steps=20,
87
- verbosity_level=2,
88
- )
89
 
90
- # ---------------------------------------------------------------------------
91
- # System prompt injected before every user request
92
- # ---------------------------------------------------------------------------
93
 
94
- matplotlib_instructions = r"""
95
- You may use the entire **matplotlib** API.
96
  Workflow
97
  --------
98
  1. Construct your figure with ordinary matplotlib calls.
99
- 2. **Once the figure is complete, call `save_fig()` and `final_answer()`.** This is the
100
- *only* external tool you have and must be invoked exactly once per final
101
- graphic.
102
  3. Do **not** call `plt.show()`; a callback captures a PNG automatically.
103
- 4. Keep code blocks concise and avoid GUI back‑end imports (TkAgg, Qt, etc.).
 
 
 
104
  """
105
 
106
- # ---------------------------------------------------------------------------
107
- # Example CLI usage
108
- # ---------------------------------------------------------------------------
109
- if __name__ == "__main__":
110
- import numpy as np
111
 
112
- prompt = "Propose moi un schéma simplifié d'un réacteur nucléaire à eau pressurisée (PWR) avec un schéma de l'installation."
113
- result = agent.run(prompt + matplotlib_instructions)
114
- print("Final output:\n", result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from __future__ import annotations
2
 
3
+ from pathlib import Path
4
  from io import BytesIO
5
  from time import sleep
6
 
 
10
  import matplotlib.pyplot as plt
11
  from PIL import Image
12
 
13
+ from smolagents import CodeAgent, LiteLLMModel
14
  from smolagents.agents import ActionStep
15
 
16
+ import base64, mimetypes
 
 
17
 
18
 
19
+ def save_fig(image_path: Path = Path("figure.png")) -> str:
 
20
  """Save the current matplotlib figure to *path*.
21
  Save fig takes no arguments. The output path is hardcoded to "figure.png".
22
  """
 
23
  if not plt.get_fignums():
24
  raise RuntimeError(
25
  "No active figure to save; create one before calling save_fig()."
26
  )
27
+ plt.savefig(image_path, bbox_inches="tight")
28
+ return f"Figure saved to {image_path}."
 
29
 
 
 
 
30
 
31
+ def _capture_snapshot(
32
+ memory_step: ActionStep, agent: CodeAgent, image_path: Path = Path("figure.png")
33
+ ) -> None:
34
+ save_fig(image_path)
35
  if not plt.get_fignums():
36
  return
37
 
 
58
  )
59
 
60
 
61
+ matplotlib_instructions_multiple_steps = r"""
62
+ You may use the entire **matplotlib** and **numpy** API. Do not worry about saving the image, it is done automatically and you can't access the os library.
63
+
64
+ Between each step, the image is provided in memory. From step 2, you can use it to pass additional instructions to the model to improve the image.
65
 
66
+ Workflow
67
+ --------
68
+ 1. Construct your figure with ordinary matplotlib calls.
69
+ 2. Wait another iteration, watch the image. If the image is correct call `final_answer() directly`. Otherwise, just do it again.
70
+ 3. Do **not** call `plt.show()`; a callback captures a PNG automatically.
71
+ 4. Keep code blocks concise and avoid GUI back‑end imports (TkAgg, Qt, etc.).
72
 
73
+ User instructions:
74
+ {user_instructions}
75
+ """
 
 
 
 
 
76
 
77
+ matplotlib_instructions_single_step = r"""
78
+ You may use the entire **matplotlib** and **numpy** API. Do not worry about saving the image, it is done automatically and you can't access the os library.
 
79
 
 
 
80
  Workflow
81
  --------
82
  1. Construct your figure with ordinary matplotlib calls.
83
+ 2. If the task is easy and you are confident that the image is correct, call `final_answer() directly`. Otherwise, wait another iteration to watch the image.
 
 
84
  3. Do **not** call `plt.show()`; a callback captures a PNG automatically.
85
+ 4. Keep code blocks concise and avoid GUI back‑end imports (TkAgg, Qt, etc.).
86
+
87
+ User instructions:
88
+ {user_instructions}
89
  """
90
 
 
 
 
 
 
91
 
92
+ def draw_image_agent(
93
+ prompt: str,
94
+ image_path: str = Path("figure.png"),
95
+ model_id: str = "mistral/mistral-medium-latest",
96
+ multiple_steps: bool = False,
97
+ ) -> Path:
98
+ model = LiteLLMModel(model_id=model_id)
99
+ agent = CodeAgent(
100
+ tools=[],
101
+ model=model,
102
+ additional_authorized_imports=["matplotlib.*", "numpy.*"],
103
+ step_callbacks=[
104
+ lambda memory_step, agent: _capture_snapshot(memory_step, agent, image_path)
105
+ ],
106
+ max_steps=20,
107
+ verbosity_level=2,
108
+ )
109
+ if multiple_steps:
110
+ agent.run(
111
+ matplotlib_instructions_multiple_steps.format(user_instructions=prompt)
112
+ )
113
+ else:
114
+ agent.run(matplotlib_instructions_single_step.format(user_instructions=prompt))
115
+ return image_path
116
+
117
+
118
+ def multiple_steps_draw_image_agent(
119
+ prompt: str,
120
+ image_path: str = Path("figure.png"),
121
+ model_id: str = "mistral/mistral-medium-latest",
122
+ ) -> Path:
123
+ """
124
+ The idea behind this function is to give to a multimodal agent the code and the image of the previous step to adapt it.
125
+ """
126
+ from smolagents import CodeAgent, ActionStep, TaskStep, Timing
127
+ import time
128
+
129
+ model = LiteLLMModel(model_id=model_id)
130
+ agent = CodeAgent(
131
+ tools=[],
132
+ model=model,
133
+ additional_authorized_imports=["matplotlib.*", "numpy.*"],
134
+ step_callbacks=[
135
+ lambda memory_step, agent: _capture_snapshot(memory_step, agent, image_path)
136
+ ],
137
+ max_steps=20,
138
+ verbosity_level=2,
139
+ )
140
+
141
+ # Send the tools to the agent (no tools here)
142
+ agent.python_executor.send_tools({**agent.tools})
143
+
144
+ # Print the system prompt
145
+ print(agent.memory.system_prompt)
146
+
147
+ # Set the task
148
+ task = prompt
149
+
150
+ # You could modify the memory as needed here by inputting the memory of another agent.
151
+ # agent.memory.steps = previous_agent.memory.steps
152
+
153
+ # Let's start a new task!
154
+ agent.memory.steps.append(TaskStep(task=task, task_images=[]))
155
+
156
+ final_answer = None
157
+ step_number = 1
158
+ while final_answer is None and step_number <= 10:
159
+ memory_step = ActionStep(
160
+ step_number=step_number,
161
+ observations_images=[],
162
+ timing=Timing(start_time=time.time(), end_time=time.time()),
163
+ )
164
+ # Run one step.
165
+ final_answer = agent.step(memory_step)
166
+ agent.memory.steps.append(memory_step)
167
+ step_number += 1
168
+ _capture_snapshot(memory_step, agent, image_path)
169
+ pass
170
+ # Change the memory as you please!
171
+ # For instance to update the latest step:
172
+ # agent.memory.steps[-1] = ...
173
+
174
+ print("The final answer is:", final_answer)
175
+
176
+ return image_path
src/deepengineer/webcrawler/async_crawl.py CHANGED
@@ -6,7 +6,6 @@ import httpx
6
  import pytest
7
 
8
 
9
- @pytest.mark.skip(reason="Playwright is not installed on CI")
10
  async def crawl4ai_extract_markdown_of_url_async(url: str) -> str:
11
  """Extract markdown content from a URL using crawl4ai."""
12
  async with crawl4ai.AsyncWebCrawler() as crawler:
 
6
  import pytest
7
 
8
 
 
9
  async def crawl4ai_extract_markdown_of_url_async(url: str) -> str:
10
  """Extract markdown content from a URL using crawl4ai."""
11
  async with crawl4ai.AsyncWebCrawler() as crawler:
tests/webcrawler/test_draw_agent.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+ from deepengineer.deepsearch.draw_agent import draw_image_agent, run_agent_step_by_step
3
+ from deepengineer.common_path import DATA_DIR
4
+ from pathlib import Path
5
+
6
+
7
+ @pytest.mark.expensive
8
+ def test_draw_image_agent():
9
+ prompt = """Propose moi un schéma très détaillé d'un réacteur nucléaire hélium graphite."""
10
+ output_path = Path(DATA_DIR) / "figure.png"
11
+ output_path.unlink(missing_ok=True)
12
+ output_path = draw_image_agent(prompt, output_path, multiple_steps=False)
13
+ assert output_path.exists()
14
+
15
+
16
+ @pytest.mark.skip(reason="This function is not working yet")
17
+ def test_run_agent_step_by_step():
18
+ prompt = """Propose moi un schéma très détaillé d'un réacteur nucléaire hélium graphite."""
19
+ output_path = Path(DATA_DIR) / "figure.png"
20
+ output_path.unlink(missing_ok=True)
21
+ output_path = run_agent_step_by_step(prompt, output_path)
22
+ assert output_path.exists()