First_agent_smolagent

Runtime error

App Files Files Community

Ludo7127 commited on Aug 9, 2025

Commit

3e622c0

verified ·

1 Parent(s): 2713b70

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -47

app.py CHANGED Viewed

@@ -52,64 +52,84 @@ model = HfApiModel(
 HF_TOKEN = os.getenv("HF_TOKEN")
 assert HF_TOKEN, "HF_TOKEN is not set in the environment."
-# Load the remote image generation tool (kept private; the agent will use our wrapper below)
 _hf_image_tool = load_tool(
     "agents-course/text-to-image",
     trust_remote_code=True,
     hf_token=HF_TOKEN,
 )
-# ---------- Our wrapper tool that ALWAYS returns a PIL image ----------
-@tool
-def generate_image(prompt: str, width: int = 768, height: int = 768, steps: int = 25) -> str:
-    """Generate an image from text and return a displayable image.
-    Args:
-        prompt (str): What to draw (e.g., "a photorealistic cat on a windowsill").
-        width (int): Output width in pixels.
-        height (int): Output height in pixels.
-        steps (int): Inference steps (quality/speed tradeoff).
-    Returns:
-        PIL.Image.Image: The generated image ready for display in Gradio.
-    """
-    # Call the HF tool
-    img = _hf_image_tool(
-        prompt=prompt,
-        width=width,
-        height=height,
-        num_inference_steps=steps
-    )
-    # Normalize whatever came back into a PIL image
     from PIL import Image
-    import io, base64
-    import os as _os
-    try:
-        # AgentImage -> PIL
-        if hasattr(img, "to_pil"):
-            pil = img.to_pil()
-        # Bytes -> PIL
-        elif isinstance(img, (bytes, bytearray)):
-            pil = Image.open(io.BytesIO(img)).convert("RGB")
-        # Path -> PIL
-        elif isinstance(img, str) and _os.path.exists(img):
-            pil = Image.open(img).convert("RGB")
-        # Already PIL-like
-        elif hasattr(img, "size") and callable(getattr(img, "save", None)):
-            pil = img
-        else:
-            # last resort: try base64
-            pil = Image.open(io.BytesIO(base64.b64decode(str(img)))).convert("RGB")
-    except Exception as e:
-        return f"Image generation failed to produce a displayable image: {e}"
     if getattr(pil, "size", (0, 0)) == (0, 0):
-        return "Image generation produced an empty image."
     return pil
 # ---------- Prompts / Agent / UI ----------
 with open("prompts.yaml", "r") as f:
     prompt_templates = yaml.safe_load(f)
@@ -117,8 +137,8 @@ with open("prompts.yaml", "r") as f:
 agent = CodeAgent(
     model=model,
     tools=[
-        final_answer,            # keep this so the agent can end its turn
-        generate_image,          # <- use this to make pictures appear
         get_current_time_in_timezone,
         DuckDuckGoSearchTool(),
     ],

 HF_TOKEN = os.getenv("HF_TOKEN")
 assert HF_TOKEN, "HF_TOKEN is not set in the environment."
+# Load the remote image generation tool from the Hub
 _hf_image_tool = load_tool(
     "agents-course/text-to-image",
     trust_remote_code=True,
     hf_token=HF_TOKEN,
 )
+# ---------- Helpers to normalize return values into a PIL image ----------
+def _to_pil(obj):
     from PIL import Image
+    import io, base64, os
+    # smolagents AgentImage → PIL
+    if isinstance(obj, AgentImage) and hasattr(obj, "to_pil"):
+        return obj.to_pil()
+    # Already a PIL image
+    if hasattr(obj, "size") and callable(getattr(obj, "save", None)):
+        return obj  # looks like a PIL.Image.Image
+    # Raw bytes → PIL
+    if isinstance(obj, (bytes, bytearray)):
+        return Image.open(io.BytesIO(obj)).convert("RGB")
+    # File path → PIL
+    if isinstance(obj, str) and os.path.exists(obj):
+        return Image.open(obj).convert("RGB")
+    # base64 string → PIL
+    if isinstance(obj, str):
+        try:
+            return Image.open(io.BytesIO(base64.b64decode(obj))).convert("RGB")
+        except Exception:
+            pass
+    # Dict structures occasionally returned by tools (image / images / data)
+    if isinstance(obj, dict):
+        candidates = []
+        if "image" in obj:
+            candidates.append(obj["image"])
+        if "images" in obj and obj["images"]:
+            candidates.append(obj["images"][0])
+        if "data" in obj:
+            candidates.append(obj["data"])
+        for c in candidates:
+            try:
+                return _to_pil(c)
+            except Exception:
+                continue
+    raise ValueError(f"Unsupported image output type: {type(obj).__name__}")
+# ---------- Plain Python function the agent can call in python_interpreter ----------
+# IMPORTANT: no @tool decorator here.
+def generate_image(prompt: str):
+    """
+    Generate an image using the HF Hub tool and return a PIL image that Gradio can display.
+    The agent will call this function from python_interpreter like:
+        img = generate_image("a photorealistic cat")
+        final_answer(img)
+    """
+    # Most Hub tools accept just 'prompt'; passing extra kwargs can fail silently.
+    raw = _hf_image_tool(prompt=prompt)
+    pil = _to_pil(raw)
+    # Guard against empty images
     if getattr(pil, "size", (0, 0)) == (0, 0):
+        raise RuntimeError("Image generation produced an empty image.")
     return pil
+# (Optional) Also expose a tool version if the agent decides to use a tool instead of Python.
+@tool
+def make_image(prompt: str):
+    """Generate an image from text and return a displayable image (PIL)."""
+    return generate_image(prompt)
 # ---------- Prompts / Agent / UI ----------
 with open("prompts.yaml", "r") as f:
     prompt_templates = yaml.safe_load(f)
 agent = CodeAgent(
     model=model,
     tools=[
+        final_answer,              # required to end turns
+        make_image,                # tool route (if the LLM picks a tool)
         get_current_time_in_timezone,
         DuckDuckGoSearchTool(),
     ],