First_agent_smolagent

Runtime error

File size: 6,864 Bytes

8012ada
5c9c2bd
69b3e25
8012ada
 
496c8fa
8012ada
 
69e96e6
8012ada
 
 
0cbdf4b
69e96e6
8012ada
 
0cbdf4b
69e96e6
 
 
8012ada
0cbdf4b
69e96e6
 
8012ada
69e96e6
 
0cbdf4b
9b5b26a
 
69e96e6
9b5b26a
 
69e96e6
 
5c9c2bd
2713b70
8012ada
 
 
 
496c8fa
8012ada
 
 
 
69b3e25
 
8012ada
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69b3e25
8012ada
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69b3e25
8012ada
 
 
821d222
 
 
 
 
 
8012ada
 
 
 
 
 
821d222
fa3e07e
 
 
821d222
 
fa3e07e
 
 
 
821d222
fa3e07e
 
 
821d222
fa3e07e
 
 
821d222
fa3e07e
821d222
 
 
 
981e13a
821d222
 
 
8012ada
 
 
 
69b3e25
 
8012ada
 
 
 
 
 
69b3e25
69e96e6
8012ada
69b3e25
8012ada

import os
import datetime
import yaml
import pytz

from smolagents import CodeAgent, DuckDuckGoSearchTool, InferenceClientModel , load_tool, tool
from tools.final_answer import FinalAnswerTool  # keep your existing file
from Gradio_UI import GradioUI  # our UI that unwraps FinalAnswerStep and shows images

# ---------------------------
# Tools
# ---------------------------

@tool
def my_custom_tool(arg1: str, arg2: int) -> str:
    """A toy tool that echoes args.
    Args:
        arg1: the first argument
        arg2: the second argument
    """
    return f"my_custom_tool got: arg1={arg1}, arg2={arg2}"

@tool
def get_current_time_in_timezone(timezone: str) -> str:
    """Fetch the current local time in a specified timezone.
    Args:
        timezone: A string representing a valid timezone (e.g., 'America/New_York').
    """
    try:
        tz = pytz.timezone(timezone)
        local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
        return f"The current local time in {timezone} is: {local_time}"
    except Exception as e:
        return f"Error fetching time for timezone '{timezone}': {str(e)}"

final_answer = FinalAnswerTool()

# ---------------------------
# Model (works on Spaces)
# ---------------------------
# If the model is gated or you use a private Inference Endpoint, add a Space secret "HF_TOKEN".
model = InferenceClientModel (
    max_tokens=2096,
    temperature=0.5,
    model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
    custom_role_conversions=None,
)

# ---------------------------
# Text-to-image (Hub tool) + wrapper that returns a local file path
# ---------------------------
_raw_text_to_image = load_tool("agents-course/text-to-image", trust_remote_code=True)

@tool
def generate_image(prompt: str) -> str:
    """Generate an image from text and save it locally. Always returns `IMAGE:<abs_path>`.
    Args:
        prompt: The image description to generate.
    """
    result = _raw_text_to_image(prompt=prompt)

    import base64, io
    from PIL import Image, ImageDraw

    out_dir = os.path.abspath("generated_images")
    os.makedirs(out_dir, exist_ok=True)
    out_path = os.path.join(out_dir, f"img_{int(datetime.datetime.now().timestamp())}.png")

    def _save_pil(img):
        img.save(out_path)
        return out_path

    try:
        # PIL Image?
        if "PIL.Image" in str(type(result)):
            return f"IMAGE:{_save_pil(result)}"

        # dict variants seen in the wild
        if isinstance(result, dict):
            for key in ["image", "pil_image"]:
                if key in result and "PIL.Image" in str(type(result[key])):
                    return f"IMAGE:{_save_pil(result[key])}"
            for key in ["image_base64", "b64", "base64"]:
                if key in result and isinstance(result[key], str):
                    data = base64.b64decode(result[key], validate=False)
                    Image.open(io.BytesIO(data)).save(out_path)
                    return f"IMAGE:{out_path}"
            for key in ["path", "image_path"]:
                if key in result and isinstance(result[key], str):
                    p = result[key]
                    if os.path.isfile(p):
                        return f"IMAGE:{os.path.abspath(p)}"
                    try:
                        Image.open(p).save(out_path)
                        return f"IMAGE:{out_path}"
                    except Exception:
                        pass

        # raw bytes
        if isinstance(result, (bytes, bytearray)):
            Image.open(io.BytesIO(result)).save(out_path)
            return f"IMAGE:{out_path}"

        # string: file path or base64 or text
        if isinstance(result, str):
            if os.path.isfile(result):
                return f"IMAGE:{os.path.abspath(result)}"
            try:
                data = base64.b64decode(result, validate=False)
                Image.open(io.BytesIO(data)).save(out_path)
                return f"IMAGE:{out_path}"
            except Exception:
                img = Image.new("RGB", (1024, 512), color=(245, 245, 245))
                d = ImageDraw.Draw(img)
                d.multiline_text((20, 20), result[:4000], fill=(0, 0, 0))
                img.save(out_path)
                return f"IMAGE:{out_path}"

        # ultimate fallback: render the prompt as an image
        img = Image.new("RGB", (1024, 512), color=(245, 245, 245))
        d = ImageDraw.Draw(img)
        d.multiline_text((20, 20), prompt, fill=(0, 0, 0))
        img.save(out_path)
        return f"IMAGE:{out_path}"

    except Exception as e:
        return f"Error generating image: {e}"

search = DuckDuckGoSearchTool()

# ---------------------------
# Prompts
# ---------------------------

try:
    with open("prompts.yaml", "r", encoding="utf-8") as stream:
        user_prompts = yaml.safe_load(stream) or {}
except FileNotFoundError:
    user_prompts = {}

IMAGE_RULE = (
    "If you generate an image using the `generate_image` tool, "
    "include the exact returned line `IMAGE:<abs_path>` on a new line in your final answer. "
    "Always finish by calling final_answer(<plain text> with any IMAGE lines)."
)

prompt_templates = PromptTemplates(
    system_prompt=(
        "You are a helpful coding agent. Use tools when helpful. Prefer concise answers. "
        + IMAGE_RULE
    ),
    planning=PlanningPromptTemplate(
        plan="Briefly plan steps before acting when needed.",
        update_plan_pre_messages="Update the plan considering the latest observation.",
        update_plan_post_messages="Summarize the updated plan."
    ),
    managed_agent=ManagedAgentPromptTemplate(
        task="You are delegated a subtask: {task}. Follow the manager's instructions.",
        report="Return a clear, concise report of what you did and the result."
    ),
    final_answer=FinalAnswerPromptTemplate(
        pre_messages="Provide the final answer the user should see.",
        post_messages="End of answer."
    ),
)

# Start with defaults, let user overrides replace them
prompt_templates = {**DEFAULT_PROMPTS, **(user_prompts or {})}


# If user had a separate 'rules' string, append it to system_prompt
if isinstance(user_prompts.get("rules"), str) and user_prompts["rules"].strip():
    prompt_templates["system_prompt"] += "\n" + user_prompts["rules"].strip()

# ---------------------------
# Agent
# ---------------------------
agent = CodeAgent(
    model=model,
    tools=[
        final_answer,
        get_current_time_in_timezone,
        my_custom_tool,
        generate_image,
    ],
    max_steps=6,
    verbosity_level=1,
    prompt_templates=prompt_templates,
)
# ---------------------------
# Launch Gradio (Spaces-friendly)
# ---------------------------
if __name__ == "__main__":
    # Gradio will bind correctly on Spaces; no need to set host/port manually.
    GradioUI(agent).launch()