import os import datetime import yaml import pytz from smolagents import CodeAgent, DuckDuckGoSearchTool, InferenceClientModel , load_tool, tool from tools.final_answer import FinalAnswerTool # keep your existing file from Gradio_UI import GradioUI # our UI that unwraps FinalAnswerStep and shows images # --------------------------- # Tools # --------------------------- @tool def my_custom_tool(arg1: str, arg2: int) -> str: """A toy tool that echoes args. Args: arg1: the first argument arg2: the second argument """ return f"my_custom_tool got: arg1={arg1}, arg2={arg2}" @tool def get_current_time_in_timezone(timezone: str) -> str: """Fetch the current local time in a specified timezone. Args: timezone: A string representing a valid timezone (e.g., 'America/New_York'). """ try: tz = pytz.timezone(timezone) local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S") return f"The current local time in {timezone} is: {local_time}" except Exception as e: return f"Error fetching time for timezone '{timezone}': {str(e)}" final_answer = FinalAnswerTool() # --------------------------- # Model (works on Spaces) # --------------------------- # If the model is gated or you use a private Inference Endpoint, add a Space secret "HF_TOKEN". model = InferenceClientModel ( max_tokens=2096, temperature=0.5, model_id="Qwen/Qwen2.5-Coder-32B-Instruct", custom_role_conversions=None, ) # --------------------------- # Text-to-image (Hub tool) + wrapper that returns a local file path # --------------------------- _raw_text_to_image = load_tool("agents-course/text-to-image", trust_remote_code=True) @tool def generate_image(prompt: str) -> str: """Generate an image from text and save it locally. Always returns `IMAGE:`. Args: prompt: The image description to generate. """ result = _raw_text_to_image(prompt=prompt) import base64, io from PIL import Image, ImageDraw out_dir = os.path.abspath("generated_images") os.makedirs(out_dir, exist_ok=True) out_path = os.path.join(out_dir, f"img_{int(datetime.datetime.now().timestamp())}.png") def _save_pil(img): img.save(out_path) return out_path try: # PIL Image? if "PIL.Image" in str(type(result)): return f"IMAGE:{_save_pil(result)}" # dict variants seen in the wild if isinstance(result, dict): for key in ["image", "pil_image"]: if key in result and "PIL.Image" in str(type(result[key])): return f"IMAGE:{_save_pil(result[key])}" for key in ["image_base64", "b64", "base64"]: if key in result and isinstance(result[key], str): data = base64.b64decode(result[key], validate=False) Image.open(io.BytesIO(data)).save(out_path) return f"IMAGE:{out_path}" for key in ["path", "image_path"]: if key in result and isinstance(result[key], str): p = result[key] if os.path.isfile(p): return f"IMAGE:{os.path.abspath(p)}" try: Image.open(p).save(out_path) return f"IMAGE:{out_path}" except Exception: pass # raw bytes if isinstance(result, (bytes, bytearray)): Image.open(io.BytesIO(result)).save(out_path) return f"IMAGE:{out_path}" # string: file path or base64 or text if isinstance(result, str): if os.path.isfile(result): return f"IMAGE:{os.path.abspath(result)}" try: data = base64.b64decode(result, validate=False) Image.open(io.BytesIO(data)).save(out_path) return f"IMAGE:{out_path}" except Exception: img = Image.new("RGB", (1024, 512), color=(245, 245, 245)) d = ImageDraw.Draw(img) d.multiline_text((20, 20), result[:4000], fill=(0, 0, 0)) img.save(out_path) return f"IMAGE:{out_path}" # ultimate fallback: render the prompt as an image img = Image.new("RGB", (1024, 512), color=(245, 245, 245)) d = ImageDraw.Draw(img) d.multiline_text((20, 20), prompt, fill=(0, 0, 0)) img.save(out_path) return f"IMAGE:{out_path}" except Exception as e: return f"Error generating image: {e}" search = DuckDuckGoSearchTool() # --------------------------- # Prompts # --------------------------- try: with open("prompts.yaml", "r", encoding="utf-8") as stream: user_prompts = yaml.safe_load(stream) or {} except FileNotFoundError: user_prompts = {} IMAGE_RULE = ( "If you generate an image using the `generate_image` tool, " "include the exact returned line `IMAGE:` on a new line in your final answer. " "Always finish by calling final_answer( with any IMAGE lines)." ) prompt_templates = PromptTemplates( system_prompt=( "You are a helpful coding agent. Use tools when helpful. Prefer concise answers. " + IMAGE_RULE ), planning=PlanningPromptTemplate( plan="Briefly plan steps before acting when needed.", update_plan_pre_messages="Update the plan considering the latest observation.", update_plan_post_messages="Summarize the updated plan." ), managed_agent=ManagedAgentPromptTemplate( task="You are delegated a subtask: {task}. Follow the manager's instructions.", report="Return a clear, concise report of what you did and the result." ), final_answer=FinalAnswerPromptTemplate( pre_messages="Provide the final answer the user should see.", post_messages="End of answer." ), ) # Start with defaults, let user overrides replace them prompt_templates = {**DEFAULT_PROMPTS, **(user_prompts or {})} # If user had a separate 'rules' string, append it to system_prompt if isinstance(user_prompts.get("rules"), str) and user_prompts["rules"].strip(): prompt_templates["system_prompt"] += "\n" + user_prompts["rules"].strip() # --------------------------- # Agent # --------------------------- agent = CodeAgent( model=model, tools=[ final_answer, get_current_time_in_timezone, my_custom_tool, generate_image, ], max_steps=6, verbosity_level=1, prompt_templates=prompt_templates, ) # --------------------------- # Launch Gradio (Spaces-friendly) # --------------------------- if __name__ == "__main__": # Gradio will bind correctly on Spaces; no need to set host/port manually. GradioUI(agent).launch()