File size: 6,864 Bytes
8012ada
5c9c2bd
69b3e25
8012ada
 
496c8fa
8012ada
 
69e96e6
8012ada
 
 
0cbdf4b
69e96e6
8012ada
 
0cbdf4b
69e96e6
 
 
8012ada
0cbdf4b
69e96e6
 
8012ada
69e96e6
 
0cbdf4b
9b5b26a
 
69e96e6
9b5b26a
 
69e96e6
 
5c9c2bd
2713b70
8012ada
 
 
 
496c8fa
8012ada
 
 
 
69b3e25
 
8012ada
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69b3e25
8012ada
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69b3e25
8012ada
 
 
821d222
 
 
 
 
 
8012ada
 
 
 
 
 
821d222
fa3e07e
 
 
821d222
 
fa3e07e
 
 
 
821d222
fa3e07e
 
 
821d222
fa3e07e
 
 
821d222
fa3e07e
821d222
 
 
 
981e13a
821d222
 
 
8012ada
 
 
 
69b3e25
 
8012ada
 
 
 
 
 
69b3e25
69e96e6
8012ada
69b3e25
8012ada
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
import os
import datetime
import yaml
import pytz

from smolagents import CodeAgent, DuckDuckGoSearchTool, InferenceClientModel , load_tool, tool
from tools.final_answer import FinalAnswerTool  # keep your existing file
from Gradio_UI import GradioUI  # our UI that unwraps FinalAnswerStep and shows images

# ---------------------------
# Tools
# ---------------------------

@tool
def my_custom_tool(arg1: str, arg2: int) -> str:
    """A toy tool that echoes args.
    Args:
        arg1: the first argument
        arg2: the second argument
    """
    return f"my_custom_tool got: arg1={arg1}, arg2={arg2}"

@tool
def get_current_time_in_timezone(timezone: str) -> str:
    """Fetch the current local time in a specified timezone.
    Args:
        timezone: A string representing a valid timezone (e.g., 'America/New_York').
    """
    try:
        tz = pytz.timezone(timezone)
        local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
        return f"The current local time in {timezone} is: {local_time}"
    except Exception as e:
        return f"Error fetching time for timezone '{timezone}': {str(e)}"

final_answer = FinalAnswerTool()

# ---------------------------
# Model (works on Spaces)
# ---------------------------
# If the model is gated or you use a private Inference Endpoint, add a Space secret "HF_TOKEN".
model = InferenceClientModel (
    max_tokens=2096,
    temperature=0.5,
    model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
    custom_role_conversions=None,
)

# ---------------------------
# Text-to-image (Hub tool) + wrapper that returns a local file path
# ---------------------------
_raw_text_to_image = load_tool("agents-course/text-to-image", trust_remote_code=True)

@tool
def generate_image(prompt: str) -> str:
    """Generate an image from text and save it locally. Always returns `IMAGE:<abs_path>`.
    Args:
        prompt: The image description to generate.
    """
    result = _raw_text_to_image(prompt=prompt)

    import base64, io
    from PIL import Image, ImageDraw

    out_dir = os.path.abspath("generated_images")
    os.makedirs(out_dir, exist_ok=True)
    out_path = os.path.join(out_dir, f"img_{int(datetime.datetime.now().timestamp())}.png")

    def _save_pil(img):
        img.save(out_path)
        return out_path

    try:
        # PIL Image?
        if "PIL.Image" in str(type(result)):
            return f"IMAGE:{_save_pil(result)}"

        # dict variants seen in the wild
        if isinstance(result, dict):
            for key in ["image", "pil_image"]:
                if key in result and "PIL.Image" in str(type(result[key])):
                    return f"IMAGE:{_save_pil(result[key])}"
            for key in ["image_base64", "b64", "base64"]:
                if key in result and isinstance(result[key], str):
                    data = base64.b64decode(result[key], validate=False)
                    Image.open(io.BytesIO(data)).save(out_path)
                    return f"IMAGE:{out_path}"
            for key in ["path", "image_path"]:
                if key in result and isinstance(result[key], str):
                    p = result[key]
                    if os.path.isfile(p):
                        return f"IMAGE:{os.path.abspath(p)}"
                    try:
                        Image.open(p).save(out_path)
                        return f"IMAGE:{out_path}"
                    except Exception:
                        pass

        # raw bytes
        if isinstance(result, (bytes, bytearray)):
            Image.open(io.BytesIO(result)).save(out_path)
            return f"IMAGE:{out_path}"

        # string: file path or base64 or text
        if isinstance(result, str):
            if os.path.isfile(result):
                return f"IMAGE:{os.path.abspath(result)}"
            try:
                data = base64.b64decode(result, validate=False)
                Image.open(io.BytesIO(data)).save(out_path)
                return f"IMAGE:{out_path}"
            except Exception:
                img = Image.new("RGB", (1024, 512), color=(245, 245, 245))
                d = ImageDraw.Draw(img)
                d.multiline_text((20, 20), result[:4000], fill=(0, 0, 0))
                img.save(out_path)
                return f"IMAGE:{out_path}"

        # ultimate fallback: render the prompt as an image
        img = Image.new("RGB", (1024, 512), color=(245, 245, 245))
        d = ImageDraw.Draw(img)
        d.multiline_text((20, 20), prompt, fill=(0, 0, 0))
        img.save(out_path)
        return f"IMAGE:{out_path}"

    except Exception as e:
        return f"Error generating image: {e}"

search = DuckDuckGoSearchTool()

# ---------------------------
# Prompts
# ---------------------------

try:
    with open("prompts.yaml", "r", encoding="utf-8") as stream:
        user_prompts = yaml.safe_load(stream) or {}
except FileNotFoundError:
    user_prompts = {}

IMAGE_RULE = (
    "If you generate an image using the `generate_image` tool, "
    "include the exact returned line `IMAGE:<abs_path>` on a new line in your final answer. "
    "Always finish by calling final_answer(<plain text> with any IMAGE lines)."
)

prompt_templates = PromptTemplates(
    system_prompt=(
        "You are a helpful coding agent. Use tools when helpful. Prefer concise answers. "
        + IMAGE_RULE
    ),
    planning=PlanningPromptTemplate(
        plan="Briefly plan steps before acting when needed.",
        update_plan_pre_messages="Update the plan considering the latest observation.",
        update_plan_post_messages="Summarize the updated plan."
    ),
    managed_agent=ManagedAgentPromptTemplate(
        task="You are delegated a subtask: {task}. Follow the manager's instructions.",
        report="Return a clear, concise report of what you did and the result."
    ),
    final_answer=FinalAnswerPromptTemplate(
        pre_messages="Provide the final answer the user should see.",
        post_messages="End of answer."
    ),
)

# Start with defaults, let user overrides replace them
prompt_templates = {**DEFAULT_PROMPTS, **(user_prompts or {})}


# If user had a separate 'rules' string, append it to system_prompt
if isinstance(user_prompts.get("rules"), str) and user_prompts["rules"].strip():
    prompt_templates["system_prompt"] += "\n" + user_prompts["rules"].strip()

# ---------------------------
# Agent
# ---------------------------
agent = CodeAgent(
    model=model,
    tools=[
        final_answer,
        get_current_time_in_timezone,
        my_custom_tool,
        generate_image,
    ],
    max_steps=6,
    verbosity_level=1,
    prompt_templates=prompt_templates,
)
# ---------------------------
# Launch Gradio (Spaces-friendly)
# ---------------------------
if __name__ == "__main__":
    # Gradio will bind correctly on Spaces; no need to set host/port manually.
    GradioUI(agent).launch()