Ludo7127 commited on
Commit
3e622c0
·
verified ·
1 Parent(s): 2713b70

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -47
app.py CHANGED
@@ -52,64 +52,84 @@ model = HfApiModel(
52
  HF_TOKEN = os.getenv("HF_TOKEN")
53
  assert HF_TOKEN, "HF_TOKEN is not set in the environment."
54
 
55
- # Load the remote image generation tool (kept private; the agent will use our wrapper below)
56
  _hf_image_tool = load_tool(
57
  "agents-course/text-to-image",
58
  trust_remote_code=True,
59
  hf_token=HF_TOKEN,
60
  )
61
 
62
- # ---------- Our wrapper tool that ALWAYS returns a PIL image ----------
63
- @tool
64
- def generate_image(prompt: str, width: int = 768, height: int = 768, steps: int = 25) -> str:
65
- """Generate an image from text and return a displayable image.
66
-
67
- Args:
68
- prompt (str): What to draw (e.g., "a photorealistic cat on a windowsill").
69
- width (int): Output width in pixels.
70
- height (int): Output height in pixels.
71
- steps (int): Inference steps (quality/speed tradeoff).
72
-
73
- Returns:
74
- PIL.Image.Image: The generated image ready for display in Gradio.
75
- """
76
- # Call the HF tool
77
- img = _hf_image_tool(
78
- prompt=prompt,
79
- width=width,
80
- height=height,
81
- num_inference_steps=steps
82
- )
83
-
84
- # Normalize whatever came back into a PIL image
85
  from PIL import Image
86
- import io, base64
87
- import os as _os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
- try:
90
- # AgentImage -> PIL
91
- if hasattr(img, "to_pil"):
92
- pil = img.to_pil()
93
- # Bytes -> PIL
94
- elif isinstance(img, (bytes, bytearray)):
95
- pil = Image.open(io.BytesIO(img)).convert("RGB")
96
- # Path -> PIL
97
- elif isinstance(img, str) and _os.path.exists(img):
98
- pil = Image.open(img).convert("RGB")
99
- # Already PIL-like
100
- elif hasattr(img, "size") and callable(getattr(img, "save", None)):
101
- pil = img
102
- else:
103
- # last resort: try base64
104
- pil = Image.open(io.BytesIO(base64.b64decode(str(img)))).convert("RGB")
105
- except Exception as e:
106
- return f"Image generation failed to produce a displayable image: {e}"
107
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  if getattr(pil, "size", (0, 0)) == (0, 0):
109
- return "Image generation produced an empty image."
110
  return pil
111
 
112
 
 
 
 
 
 
 
 
113
  # ---------- Prompts / Agent / UI ----------
114
  with open("prompts.yaml", "r") as f:
115
  prompt_templates = yaml.safe_load(f)
@@ -117,8 +137,8 @@ with open("prompts.yaml", "r") as f:
117
  agent = CodeAgent(
118
  model=model,
119
  tools=[
120
- final_answer, # keep this so the agent can end its turn
121
- generate_image, # <- use this to make pictures appear
122
  get_current_time_in_timezone,
123
  DuckDuckGoSearchTool(),
124
  ],
 
52
  HF_TOKEN = os.getenv("HF_TOKEN")
53
  assert HF_TOKEN, "HF_TOKEN is not set in the environment."
54
 
55
+ # Load the remote image generation tool from the Hub
56
  _hf_image_tool = load_tool(
57
  "agents-course/text-to-image",
58
  trust_remote_code=True,
59
  hf_token=HF_TOKEN,
60
  )
61
 
62
+ # ---------- Helpers to normalize return values into a PIL image ----------
63
+ def _to_pil(obj):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  from PIL import Image
65
+ import io, base64, os
66
+
67
+ # smolagents AgentImage → PIL
68
+ if isinstance(obj, AgentImage) and hasattr(obj, "to_pil"):
69
+ return obj.to_pil()
70
+
71
+ # Already a PIL image
72
+ if hasattr(obj, "size") and callable(getattr(obj, "save", None)):
73
+ return obj # looks like a PIL.Image.Image
74
+
75
+ # Raw bytes → PIL
76
+ if isinstance(obj, (bytes, bytearray)):
77
+ return Image.open(io.BytesIO(obj)).convert("RGB")
78
+
79
+ # File path → PIL
80
+ if isinstance(obj, str) and os.path.exists(obj):
81
+ return Image.open(obj).convert("RGB")
82
+
83
+ # base64 string → PIL
84
+ if isinstance(obj, str):
85
+ try:
86
+ return Image.open(io.BytesIO(base64.b64decode(obj))).convert("RGB")
87
+ except Exception:
88
+ pass
89
+
90
+ # Dict structures occasionally returned by tools (image / images / data)
91
+ if isinstance(obj, dict):
92
+ candidates = []
93
+ if "image" in obj:
94
+ candidates.append(obj["image"])
95
+ if "images" in obj and obj["images"]:
96
+ candidates.append(obj["images"][0])
97
+ if "data" in obj:
98
+ candidates.append(obj["data"])
99
+ for c in candidates:
100
+ try:
101
+ return _to_pil(c)
102
+ except Exception:
103
+ continue
104
 
105
+ raise ValueError(f"Unsupported image output type: {type(obj).__name__}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
+
108
+ # ---------- Plain Python function the agent can call in python_interpreter ----------
109
+ # IMPORTANT: no @tool decorator here.
110
+ def generate_image(prompt: str):
111
+ """
112
+ Generate an image using the HF Hub tool and return a PIL image that Gradio can display.
113
+ The agent will call this function from python_interpreter like:
114
+ img = generate_image("a photorealistic cat")
115
+ final_answer(img)
116
+ """
117
+ # Most Hub tools accept just 'prompt'; passing extra kwargs can fail silently.
118
+ raw = _hf_image_tool(prompt=prompt)
119
+ pil = _to_pil(raw)
120
+ # Guard against empty images
121
  if getattr(pil, "size", (0, 0)) == (0, 0):
122
+ raise RuntimeError("Image generation produced an empty image.")
123
  return pil
124
 
125
 
126
+ # (Optional) Also expose a tool version if the agent decides to use a tool instead of Python.
127
+ @tool
128
+ def make_image(prompt: str):
129
+ """Generate an image from text and return a displayable image (PIL)."""
130
+ return generate_image(prompt)
131
+
132
+
133
  # ---------- Prompts / Agent / UI ----------
134
  with open("prompts.yaml", "r") as f:
135
  prompt_templates = yaml.safe_load(f)
 
137
  agent = CodeAgent(
138
  model=model,
139
  tools=[
140
+ final_answer, # required to end turns
141
+ make_image, # tool route (if the LLM picks a tool)
142
  get_current_time_in_timezone,
143
  DuckDuckGoSearchTool(),
144
  ],