Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -52,64 +52,84 @@ model = HfApiModel(
|
|
| 52 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 53 |
assert HF_TOKEN, "HF_TOKEN is not set in the environment."
|
| 54 |
|
| 55 |
-
# Load the remote image generation tool
|
| 56 |
_hf_image_tool = load_tool(
|
| 57 |
"agents-course/text-to-image",
|
| 58 |
trust_remote_code=True,
|
| 59 |
hf_token=HF_TOKEN,
|
| 60 |
)
|
| 61 |
|
| 62 |
-
# ----------
|
| 63 |
-
|
| 64 |
-
def generate_image(prompt: str, width: int = 768, height: int = 768, steps: int = 25) -> str:
|
| 65 |
-
"""Generate an image from text and return a displayable image.
|
| 66 |
-
|
| 67 |
-
Args:
|
| 68 |
-
prompt (str): What to draw (e.g., "a photorealistic cat on a windowsill").
|
| 69 |
-
width (int): Output width in pixels.
|
| 70 |
-
height (int): Output height in pixels.
|
| 71 |
-
steps (int): Inference steps (quality/speed tradeoff).
|
| 72 |
-
|
| 73 |
-
Returns:
|
| 74 |
-
PIL.Image.Image: The generated image ready for display in Gradio.
|
| 75 |
-
"""
|
| 76 |
-
# Call the HF tool
|
| 77 |
-
img = _hf_image_tool(
|
| 78 |
-
prompt=prompt,
|
| 79 |
-
width=width,
|
| 80 |
-
height=height,
|
| 81 |
-
num_inference_steps=steps
|
| 82 |
-
)
|
| 83 |
-
|
| 84 |
-
# Normalize whatever came back into a PIL image
|
| 85 |
from PIL import Image
|
| 86 |
-
import io, base64
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
-
|
| 90 |
-
# AgentImage -> PIL
|
| 91 |
-
if hasattr(img, "to_pil"):
|
| 92 |
-
pil = img.to_pil()
|
| 93 |
-
# Bytes -> PIL
|
| 94 |
-
elif isinstance(img, (bytes, bytearray)):
|
| 95 |
-
pil = Image.open(io.BytesIO(img)).convert("RGB")
|
| 96 |
-
# Path -> PIL
|
| 97 |
-
elif isinstance(img, str) and _os.path.exists(img):
|
| 98 |
-
pil = Image.open(img).convert("RGB")
|
| 99 |
-
# Already PIL-like
|
| 100 |
-
elif hasattr(img, "size") and callable(getattr(img, "save", None)):
|
| 101 |
-
pil = img
|
| 102 |
-
else:
|
| 103 |
-
# last resort: try base64
|
| 104 |
-
pil = Image.open(io.BytesIO(base64.b64decode(str(img)))).convert("RGB")
|
| 105 |
-
except Exception as e:
|
| 106 |
-
return f"Image generation failed to produce a displayable image: {e}"
|
| 107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
if getattr(pil, "size", (0, 0)) == (0, 0):
|
| 109 |
-
|
| 110 |
return pil
|
| 111 |
|
| 112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
# ---------- Prompts / Agent / UI ----------
|
| 114 |
with open("prompts.yaml", "r") as f:
|
| 115 |
prompt_templates = yaml.safe_load(f)
|
|
@@ -117,8 +137,8 @@ with open("prompts.yaml", "r") as f:
|
|
| 117 |
agent = CodeAgent(
|
| 118 |
model=model,
|
| 119 |
tools=[
|
| 120 |
-
final_answer,
|
| 121 |
-
|
| 122 |
get_current_time_in_timezone,
|
| 123 |
DuckDuckGoSearchTool(),
|
| 124 |
],
|
|
|
|
| 52 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 53 |
assert HF_TOKEN, "HF_TOKEN is not set in the environment."
|
| 54 |
|
| 55 |
+
# Load the remote image generation tool from the Hub
|
| 56 |
_hf_image_tool = load_tool(
|
| 57 |
"agents-course/text-to-image",
|
| 58 |
trust_remote_code=True,
|
| 59 |
hf_token=HF_TOKEN,
|
| 60 |
)
|
| 61 |
|
| 62 |
+
# ---------- Helpers to normalize return values into a PIL image ----------
|
| 63 |
+
def _to_pil(obj):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
from PIL import Image
|
| 65 |
+
import io, base64, os
|
| 66 |
+
|
| 67 |
+
# smolagents AgentImage → PIL
|
| 68 |
+
if isinstance(obj, AgentImage) and hasattr(obj, "to_pil"):
|
| 69 |
+
return obj.to_pil()
|
| 70 |
+
|
| 71 |
+
# Already a PIL image
|
| 72 |
+
if hasattr(obj, "size") and callable(getattr(obj, "save", None)):
|
| 73 |
+
return obj # looks like a PIL.Image.Image
|
| 74 |
+
|
| 75 |
+
# Raw bytes → PIL
|
| 76 |
+
if isinstance(obj, (bytes, bytearray)):
|
| 77 |
+
return Image.open(io.BytesIO(obj)).convert("RGB")
|
| 78 |
+
|
| 79 |
+
# File path → PIL
|
| 80 |
+
if isinstance(obj, str) and os.path.exists(obj):
|
| 81 |
+
return Image.open(obj).convert("RGB")
|
| 82 |
+
|
| 83 |
+
# base64 string → PIL
|
| 84 |
+
if isinstance(obj, str):
|
| 85 |
+
try:
|
| 86 |
+
return Image.open(io.BytesIO(base64.b64decode(obj))).convert("RGB")
|
| 87 |
+
except Exception:
|
| 88 |
+
pass
|
| 89 |
+
|
| 90 |
+
# Dict structures occasionally returned by tools (image / images / data)
|
| 91 |
+
if isinstance(obj, dict):
|
| 92 |
+
candidates = []
|
| 93 |
+
if "image" in obj:
|
| 94 |
+
candidates.append(obj["image"])
|
| 95 |
+
if "images" in obj and obj["images"]:
|
| 96 |
+
candidates.append(obj["images"][0])
|
| 97 |
+
if "data" in obj:
|
| 98 |
+
candidates.append(obj["data"])
|
| 99 |
+
for c in candidates:
|
| 100 |
+
try:
|
| 101 |
+
return _to_pil(c)
|
| 102 |
+
except Exception:
|
| 103 |
+
continue
|
| 104 |
|
| 105 |
+
raise ValueError(f"Unsupported image output type: {type(obj).__name__}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
|
| 107 |
+
|
| 108 |
+
# ---------- Plain Python function the agent can call in python_interpreter ----------
|
| 109 |
+
# IMPORTANT: no @tool decorator here.
|
| 110 |
+
def generate_image(prompt: str):
|
| 111 |
+
"""
|
| 112 |
+
Generate an image using the HF Hub tool and return a PIL image that Gradio can display.
|
| 113 |
+
The agent will call this function from python_interpreter like:
|
| 114 |
+
img = generate_image("a photorealistic cat")
|
| 115 |
+
final_answer(img)
|
| 116 |
+
"""
|
| 117 |
+
# Most Hub tools accept just 'prompt'; passing extra kwargs can fail silently.
|
| 118 |
+
raw = _hf_image_tool(prompt=prompt)
|
| 119 |
+
pil = _to_pil(raw)
|
| 120 |
+
# Guard against empty images
|
| 121 |
if getattr(pil, "size", (0, 0)) == (0, 0):
|
| 122 |
+
raise RuntimeError("Image generation produced an empty image.")
|
| 123 |
return pil
|
| 124 |
|
| 125 |
|
| 126 |
+
# (Optional) Also expose a tool version if the agent decides to use a tool instead of Python.
|
| 127 |
+
@tool
|
| 128 |
+
def make_image(prompt: str):
|
| 129 |
+
"""Generate an image from text and return a displayable image (PIL)."""
|
| 130 |
+
return generate_image(prompt)
|
| 131 |
+
|
| 132 |
+
|
| 133 |
# ---------- Prompts / Agent / UI ----------
|
| 134 |
with open("prompts.yaml", "r") as f:
|
| 135 |
prompt_templates = yaml.safe_load(f)
|
|
|
|
| 137 |
agent = CodeAgent(
|
| 138 |
model=model,
|
| 139 |
tools=[
|
| 140 |
+
final_answer, # required to end turns
|
| 141 |
+
make_image, # tool route (if the LLM picks a tool)
|
| 142 |
get_current_time_in_timezone,
|
| 143 |
DuckDuckGoSearchTool(),
|
| 144 |
],
|