Ludo7127's picture
Update app.py
fa3e07e verified
import os
import datetime
import yaml
import pytz
from smolagents import CodeAgent, DuckDuckGoSearchTool, InferenceClientModel , load_tool, tool
from tools.final_answer import FinalAnswerTool # keep your existing file
from Gradio_UI import GradioUI # our UI that unwraps FinalAnswerStep and shows images
# ---------------------------
# Tools
# ---------------------------
@tool
def my_custom_tool(arg1: str, arg2: int) -> str:
"""A toy tool that echoes args.
Args:
arg1: the first argument
arg2: the second argument
"""
return f"my_custom_tool got: arg1={arg1}, arg2={arg2}"
@tool
def get_current_time_in_timezone(timezone: str) -> str:
"""Fetch the current local time in a specified timezone.
Args:
timezone: A string representing a valid timezone (e.g., 'America/New_York').
"""
try:
tz = pytz.timezone(timezone)
local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
return f"The current local time in {timezone} is: {local_time}"
except Exception as e:
return f"Error fetching time for timezone '{timezone}': {str(e)}"
final_answer = FinalAnswerTool()
# ---------------------------
# Model (works on Spaces)
# ---------------------------
# If the model is gated or you use a private Inference Endpoint, add a Space secret "HF_TOKEN".
model = InferenceClientModel (
max_tokens=2096,
temperature=0.5,
model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
custom_role_conversions=None,
)
# ---------------------------
# Text-to-image (Hub tool) + wrapper that returns a local file path
# ---------------------------
_raw_text_to_image = load_tool("agents-course/text-to-image", trust_remote_code=True)
@tool
def generate_image(prompt: str) -> str:
"""Generate an image from text and save it locally. Always returns `IMAGE:<abs_path>`.
Args:
prompt: The image description to generate.
"""
result = _raw_text_to_image(prompt=prompt)
import base64, io
from PIL import Image, ImageDraw
out_dir = os.path.abspath("generated_images")
os.makedirs(out_dir, exist_ok=True)
out_path = os.path.join(out_dir, f"img_{int(datetime.datetime.now().timestamp())}.png")
def _save_pil(img):
img.save(out_path)
return out_path
try:
# PIL Image?
if "PIL.Image" in str(type(result)):
return f"IMAGE:{_save_pil(result)}"
# dict variants seen in the wild
if isinstance(result, dict):
for key in ["image", "pil_image"]:
if key in result and "PIL.Image" in str(type(result[key])):
return f"IMAGE:{_save_pil(result[key])}"
for key in ["image_base64", "b64", "base64"]:
if key in result and isinstance(result[key], str):
data = base64.b64decode(result[key], validate=False)
Image.open(io.BytesIO(data)).save(out_path)
return f"IMAGE:{out_path}"
for key in ["path", "image_path"]:
if key in result and isinstance(result[key], str):
p = result[key]
if os.path.isfile(p):
return f"IMAGE:{os.path.abspath(p)}"
try:
Image.open(p).save(out_path)
return f"IMAGE:{out_path}"
except Exception:
pass
# raw bytes
if isinstance(result, (bytes, bytearray)):
Image.open(io.BytesIO(result)).save(out_path)
return f"IMAGE:{out_path}"
# string: file path or base64 or text
if isinstance(result, str):
if os.path.isfile(result):
return f"IMAGE:{os.path.abspath(result)}"
try:
data = base64.b64decode(result, validate=False)
Image.open(io.BytesIO(data)).save(out_path)
return f"IMAGE:{out_path}"
except Exception:
img = Image.new("RGB", (1024, 512), color=(245, 245, 245))
d = ImageDraw.Draw(img)
d.multiline_text((20, 20), result[:4000], fill=(0, 0, 0))
img.save(out_path)
return f"IMAGE:{out_path}"
# ultimate fallback: render the prompt as an image
img = Image.new("RGB", (1024, 512), color=(245, 245, 245))
d = ImageDraw.Draw(img)
d.multiline_text((20, 20), prompt, fill=(0, 0, 0))
img.save(out_path)
return f"IMAGE:{out_path}"
except Exception as e:
return f"Error generating image: {e}"
search = DuckDuckGoSearchTool()
# ---------------------------
# Prompts
# ---------------------------
try:
with open("prompts.yaml", "r", encoding="utf-8") as stream:
user_prompts = yaml.safe_load(stream) or {}
except FileNotFoundError:
user_prompts = {}
IMAGE_RULE = (
"If you generate an image using the `generate_image` tool, "
"include the exact returned line `IMAGE:<abs_path>` on a new line in your final answer. "
"Always finish by calling final_answer(<plain text> with any IMAGE lines)."
)
prompt_templates = PromptTemplates(
system_prompt=(
"You are a helpful coding agent. Use tools when helpful. Prefer concise answers. "
+ IMAGE_RULE
),
planning=PlanningPromptTemplate(
plan="Briefly plan steps before acting when needed.",
update_plan_pre_messages="Update the plan considering the latest observation.",
update_plan_post_messages="Summarize the updated plan."
),
managed_agent=ManagedAgentPromptTemplate(
task="You are delegated a subtask: {task}. Follow the manager's instructions.",
report="Return a clear, concise report of what you did and the result."
),
final_answer=FinalAnswerPromptTemplate(
pre_messages="Provide the final answer the user should see.",
post_messages="End of answer."
),
)
# Start with defaults, let user overrides replace them
prompt_templates = {**DEFAULT_PROMPTS, **(user_prompts or {})}
# If user had a separate 'rules' string, append it to system_prompt
if isinstance(user_prompts.get("rules"), str) and user_prompts["rules"].strip():
prompt_templates["system_prompt"] += "\n" + user_prompts["rules"].strip()
# ---------------------------
# Agent
# ---------------------------
agent = CodeAgent(
model=model,
tools=[
final_answer,
get_current_time_in_timezone,
my_custom_tool,
generate_image,
],
max_steps=6,
verbosity_level=1,
prompt_templates=prompt_templates,
)
# ---------------------------
# Launch Gradio (Spaces-friendly)
# ---------------------------
if __name__ == "__main__":
# Gradio will bind correctly on Spaces; no need to set host/port manually.
GradioUI(agent).launch()