jkorstad commited on
Commit
24fb7b9
Β·
verified Β·
1 Parent(s): 2994b0b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -164
app.py CHANGED
@@ -3,7 +3,7 @@ import os
3
  import shutil
4
  from gradio_client import Client, handle_file # handle_file might be used by the agent
5
  # Use InferenceClientModel instead of HfApiModel
6
- from smolagents import Tool, CodeAgent, InferenceClientModel, ToolCollection
7
  import uuid
8
  import httpx # Often a dependency for HTTP clients, good to have
9
  from tenacity import retry, stop_after_attempt, wait_exponential
@@ -12,10 +12,6 @@ from PIL import Image # For potential image manipulation by the agent
12
  import traceback # For more detailed error logging if needed
13
 
14
  # Define initial tools from Spaces
15
- # Commenting out problematic spaces for now.
16
- # You'll need to verify their api_name or compatibility if you re-enable them.
17
- # Ensure the api_name is correct if you uncomment these.
18
- # Visit the HF Space page and look for "API - via gradio_client" for hints.
19
  spaces = [
20
  {"repo_id": "black-forest-labs/FLUX.1-schnell",
21
  "name": "image_generator_flux_schnell",
@@ -33,110 +29,100 @@ spaces = [
33
  "name": "pdf_text_extraction_mineru",
34
  "description": "Extracts the text of a PDF up to 20 pages long using MinerU. Expects a PDF file.",
35
  "api_name": "/to_pdf"},
36
- # {"repo_id": "InstantX/InstantCharacter",
37
- # "name": "instant_character_customization",
38
- # "description": "Personalize Any Characters with a Scalable Diffusion Transformer Framework to any style or pose using InstantCharacter. Expects an input image and potentially pose/style images or prompts.",
39
- # "api_name": "/predict"}, # Example: Verify this api_name if re-enabling
40
- # {"repo_id": "fotographerai/Zen-Style-Shape",
41
- # "name": "img_to_img_style_transfer_zen_shape",
42
- # "description": "Flux[dev] Redux + Flux[dev] Canny. Implements a custom image-to-image style transfer pipeline blending style from Image A to structure of Image B. Expects two images.",
43
- # "api_name": "/predict"}, # Example: Verify this api_name if re-enabling
44
- # {"repo_id": "moonshotai/Kimi-VL-A3B-Thinking",
45
- # "name": "multimodal_vlm_llm_kimi",
46
- # "description": "Kimi-VL-A3B-Thinking is a multi-modal LLM that can understand text and images, and generate text with thinking processes. Ask any question about an image. Expects text and optionally an image.",
47
- # "api_name": "/chat"}, # Example: Verify this api_name if re-enabling
48
  ]
49
 
50
  # Create tools from predefined Spaces with retry logic
51
  tools = []
52
  for space_info in spaces:
53
  repo_id = space_info['repo_id']
54
- name = space_info.get('name', repo_id.split('/')[-1].replace('-', '_')) # Default name from repo_id
55
  description = space_info.get('description', f'A tool to interact with the Hugging Face Space: {repo_id}')
56
- api_name = space_info.get('api_name') # Can be None, Tool.from_space will try to infer
57
 
58
  @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
59
  def create_tool_with_retry(repo_id, name, description, api_name):
60
- # If api_name is None, Tool.from_space will try to find a public API endpoint.
61
  print(f"Attempting to create tool: '{name}' from space: {repo_id} with api_name: {api_name}")
62
  new_tool = Tool.from_space(repo_id, name=name, description=description, api_name=api_name)
63
- # Explicitly check if name attribute is set after creation by Tool.from_space
64
  if not hasattr(new_tool, 'name') or new_tool.name != name:
65
  print(f"WARNING: Tool '{name}' from space {repo_id} might have a name mismatch or missing name attribute after creation. Actual name: {getattr(new_tool, 'name', 'MISSING')}")
66
  return new_tool
67
 
68
  try:
69
- tool = create_tool_with_retry(repo_id, name, description, api_name)
70
- tools.append(tool)
71
  print(f"Successfully loaded predefined tool: {name} from {repo_id}")
72
  except Exception as e:
73
  print(f"Failed to load predefined tool from {repo_id}. Error: {str(e)}. Continuing with available tools.")
74
 
75
- # Load tools from a Hugging Face Collection (User has this commented out)
76
- #collection_slug = "jkorstad/tools-680127d17eed47e759549ff4"
77
- #try:
78
- # collection = ToolCollection.from_hub(collection_slug=collection_slug, trust_remote_code=True)
79
- # tools.extend(collection.tools)
80
- # print(f"Successfully loaded tools from collection: {collection_slug}")
81
- #except Exception as e:
82
- # print(f"Warning: Failed to load collection {collection_slug}. Error: {str(e)}")
83
-
84
-
85
- # Tool for searching Hugging Face Spaces
86
- def search_hf_spaces(query: str, top_k: int = 3) -> str:
87
- """
88
- Searches Hugging Face Spaces for a given query and returns the top_k results.
89
- Provides repo_id, description, likes, and last modified date for each space found.
90
- Use this to discover new tools if the existing ones are not suitable.
91
- To use a found space, try: new_tool = Tool.from_space(repo_id='the_space_id', name='a_descriptive_name')
92
- Then call it: result = new_tool(param1=value1, ...)
93
- """
94
- try:
95
- print(f"Searching spaces with query: {query}, top_k: {top_k}")
96
- spaces_found = list(list_spaces(search=query, full=True, limit=top_k, sort="likes", direction=-1))
97
- if not spaces_found:
98
- return "No Spaces found for your query."
99
-
100
- results = "Found the following Spaces (sorted by likes):\n"
101
- for i, space_data in enumerate(spaces_found):
102
- description = "No description provided."
103
- if hasattr(space_data, 'cardData') and space_data.cardData and 'description' in space_data.cardData:
104
- description = space_data.cardData['description']
105
- elif hasattr(space_data, 'title') and space_data.title:
106
- description = space_data.title
107
-
108
- results += (
109
- f"{i+1}. ID: {space_data.id}\n"
110
- f" Description: {description}\n"
111
- f" Likes: {space_data.likes if hasattr(space_data, 'likes') else 'N/A'}\n"
112
- f" Last Modified: {space_data.lastModified if hasattr(space_data, 'lastModified') else 'N/A'}\n\n"
113
- )
114
- results += ("\nTo use one of these, you can try creating a tool in the code like this: "
115
- "my_new_tool = Tool.from_space(repo_id='SPACE_ID_HERE', name='custom_tool_name'). "
116
- "Then you can call it: result = my_new_tool(argument_name=value). "
117
- "The arguments depend on the specific Space. If Tool.from_space fails or the tool doesn't work, "
118
- "the Space might not have a compatible public API or may require a specific api_name.")
119
- return results
120
- except Exception as e:
121
- print(f"Error searching Spaces: {str(e)}")
122
- return f"Error searching Spaces: {str(e)}"
123
-
124
- space_search_tool = Tool(
125
- name="huggingface_space_searcher",
126
- description="Searches for Hugging Face Spaces that can perform a specific task. Input is a search query string (e.g., 'text to image', 'speech recognition'). Returns a list of Space IDs, their descriptions, and instructions on how to try using them.",
127
- func=search_hf_spaces,
128
- )
 
 
 
 
 
129
  tools.append(space_search_tool)
130
 
 
131
  # --- Debugging: Inspect tools before CodeAgent initialization ---
132
  print("\n--- Inspecting tools before CodeAgent initialization ---")
133
  for i, t in enumerate(tools):
134
  if t is None:
135
  print(f"Tool at index {i} is None!")
136
- # This would cause an error later, but the current error is 'Tool' object has no attribute 'name'
137
  continue
138
  try:
139
- # Attempt to access the name attribute
140
  tool_name = t.name
141
  print(f"Tool {i}: Name='{tool_name}', Type={type(t)}")
142
  except AttributeError:
@@ -147,55 +133,43 @@ print("-------------------------------------------------------\n")
147
 
148
 
149
  # Initialize the model - Use InferenceClientModel
150
- model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct") # Or your preferred model
151
 
152
- # Create the agent - Removed system_prompt from constructor
153
  agent = CodeAgent(
154
  tools=tools,
155
  model=model,
156
  additional_authorized_imports=['PIL', 'Pillow', 'os', 'sys', 'numpy', 'huggingface_hub', 'gradio_client', 'uuid'],
157
- add_base_tools=True, # Includes web search, python interpreter
158
  )
159
 
160
- # This is the detailed instruction set that was previously in system_prompt
161
  AGENT_INSTRUCTIONS = """You are a highly capable AI assistant. Your primary goal is to accomplish tasks using a variety of tools, prioritizing Hugging Face Spaces.
162
 
163
  Follow these steps:
164
  1. **Understand the Request:** Carefully analyze the user's prompt (which will follow these instructions). Identify the core task and any specific requirements or inputs.
165
- 2. **Check Predefined Tools:** Review your list of available tools. If a predefined tool can directly address the request, use it.
166
  3. **Search for Spaces (If Needed):** If no predefined tool is suitable, use the `huggingface_space_searcher` tool. Provide a concise search query related to the task (e.g., "image classification", "voice cloning", "document question answering").
167
- 4. **Select and Instantiate a Space Tool:** From the search results, choose the most promising Space. Attempt to create a tool from it using `Tool.from_space(repo_id='SELECTED_SPACE_ID', name='a_unique_tool_name')`. You might need to give it a unique name. If `Tool.from_space` fails, the Space might not be compatible, or you could try another one from the search results. Note that some Spaces might not have a public API or may require a specific `api_name` that `Tool.from_space` cannot infer; in such cases, you might not be able to use them.
168
  5. **Execute the Tool:** Call the tool (either predefined or dynamically created) with the necessary arguments.
169
- * **File Inputs:** If the user uploads files, their paths will be available as global string variables: `input_image_path`, `input_audio_path`, `input_video_path`, `input_3d_model_path`, `input_file_path`. Before using these variables, check if they exist and are not None (e.g., `if 'input_image_path' in globals() and input_image_path:`). Pass these file paths as arguments to tools that require them. `Tool.from_space` handles file uploads for compatible Spaces when you pass the filepath string.
170
- * **Chaining Tools:** If the task requires multiple steps, chain the tools together, passing the output of one tool as the input to the next.
171
  6. **Output Management:**
172
- * If a tool generates a file (image, audio, etc.), save it to the current working directory using a unique filename (e.g., `output_filename = os.path.join(os.getcwd(), f"{uuid.uuid4()}.png")`).
173
- * **Return the RESULT:** Your final response should be either:
174
- * A string containing the direct text answer.
175
- * The string path to the generated output file (e.g., `return output_filename`).
176
- 7. **Clarity and Error Handling:** If you encounter issues (e.g., a Space tool fails, required inputs are missing), clearly explain the problem in your response. If a Space doesn't work, try to explain why or suggest an alternative if possible.
177
 
178
  Example of dynamically using a Space after searching:
179
  ```python
180
- # This is an example of how I, the agent, would think and act.
181
- # User's actual prompt would follow these instructions.
182
- # Example user prompt: "Find a space that can make an image of a cat and then use it."
183
- #
184
- # My thought process:
185
- # 1. The user wants an image of a cat, and wants me to find a Space for it.
186
- # 2. I'll use `huggingface_space_searcher`.
187
  # search_results = huggingface_space_searcher(query="text to image cat")
188
- # print(search_results) # This would show me some options. Let's say 'user/cat-generator' is found.
189
  # try:
190
  # cat_image_tool = Tool.from_space(repo_id="user/cat-generator", name="cat_generator_tool")
191
- # # The arguments for cat_image_tool depend on the Space. I'll assume it takes a 'prompt' argument.
192
- # image_path = cat_image_tool(prompt="A fluffy siamese cat")
193
- # # image_path should be a path to the generated image file
194
  # return image_path
195
  # except Exception as e:
196
  # return f"Failed to use the cat generator Space: {e}"
197
  ```
198
- Always ensure your generated Python code is complete and directly callable. Use `print()` for debugging if necessary, but the final returned value should be the result or file path.
199
  You have access to `os`, `uuid`, `PIL.Image`.
200
  """
201
 
@@ -203,34 +177,22 @@ You have access to `os`, `uuid`, `PIL.Image`.
203
  def gradio_interface(user_prompt, input_image_path, input_audio_path, input_video_path, input_3d_model_path, input_file_path, progress=gr.Progress(track_tqdm=True)):
204
  try:
205
  progress(0, desc="Initializing Agent...")
206
-
207
- # Combine instructions with the user's prompt
208
  full_prompt_with_instructions = f"{AGENT_INSTRUCTIONS}\n\nUSER PROMPT: {user_prompt}"
209
-
210
- # Prepare a dictionary of potential inputs for the agent's execution scope
211
  agent_kwargs = {}
212
- if input_image_path:
213
- agent_kwargs["input_image_path"] = str(input_image_path)
214
- if input_audio_path:
215
- agent_kwargs["input_audio_path"] = str(input_audio_path)
216
- if input_video_path:
217
- agent_kwargs["input_video_path"] = str(input_video_path)
218
- if input_3d_model_path:
219
- agent_kwargs["input_3d_model_path"] = str(input_3d_model_path)
220
- if input_file_path:
221
- agent_kwargs["input_file_path"] = str(input_file_path)
222
 
223
  progress(0.2, desc="Agent processing request...")
224
  result = agent.run(full_prompt_with_instructions, **agent_kwargs)
225
 
226
  progress(0.8, desc="Processing result...")
227
  outputs = {
228
- "image": gr.update(value=None, visible=False),
229
- "file": gr.update(value=None, visible=False),
230
- "path": gr.update(value=None, visible=False),
231
- "audio": gr.update(value=None, visible=False),
232
- "model3d": gr.update(value=None, visible=False),
233
- "text": gr.update(value=None, visible=True),
234
  }
235
 
236
  if isinstance(result, str):
@@ -239,59 +201,40 @@ def gradio_interface(user_prompt, input_image_path, input_audio_path, input_vide
239
  outputs["file"] = gr.update(value=file_path, visible=True)
240
  outputs["path"] = gr.update(value=file_path, visible=True)
241
  ext = os.path.splitext(file_path.lower())[1]
242
- if ext in ('.png', '.jpg', '.jpeg', '.gif', '.webp'):
243
- outputs["image"] = gr.update(value=file_path, visible=True)
244
- elif ext in ('.mp3', '.wav', '.ogg', '.flac'):
245
- outputs["audio"] = gr.update(value=file_path, visible=True)
246
- elif ext == '.glb':
247
- outputs["model3d"] = gr.update(value=file_path, visible=True)
248
- else:
249
- outputs["text"] = gr.update(value=f"Output is a file: {os.path.basename(file_path)}. Download it using the 'Download File Output' component.", visible=True)
250
- else:
251
- outputs["text"] = gr.update(value=result, visible=True)
252
- elif result is None:
253
- outputs["text"] = gr.update(value="Agent returned no result (None). This might indicate an issue or that the task didn't produce a specific output string/file.", visible=True)
254
- else:
255
- outputs["text"] = gr.update(value=f"Unexpected result type from agent: {type(result)}. Content: {str(result)}", visible=True)
256
-
257
  progress(1, desc="Done!")
258
- return (
259
- outputs["image"], outputs["file"], outputs["path"],
260
- outputs["audio"], outputs["model3d"], outputs["text"]
261
- )
262
 
263
  except Exception as e:
264
- error_msg = f"An error occurred in the Gradio interface or agent execution: {str(e)}"
265
  print(error_msg)
266
  traceback.print_exc()
267
- return (
268
- gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False),
269
- gr.update(value=None, visible=False), gr.update(value=None, visible=False),
270
- gr.update(value=error_msg, visible=True)
271
- )
272
 
273
  # Create the Gradio app
274
  with gr.Blocks(theme=gr.themes.Soft()) as app:
275
  gr.Markdown("## πŸ€– Smolagent: Multi-Modal Agent with Hugging Face Space Discovery")
276
- gr.Markdown("Ask the agent to perform tasks. It will try to use its tools or find Hugging Face Spaces to help. You can provide optional file inputs below if your task requires them (e.g., 'Make this image Ghibli style', 'Summarize this PDF').")
277
 
278
  with gr.Row():
279
- prompt_input = gr.Textbox(
280
- label="Enter your prompt for the agent",
281
- placeholder="e.g., 'Generate an image of a futuristic city', 'Convert this text to speech: Hello world', or 'Search for a space that translates English to French and use it for: Good morning'",
282
- lines=3,
283
- elem_id="user_prompt_textbox"
284
- )
285
-
286
- with gr.Accordion("Optional File Inputs (for tasks requiring them)", open=False):
287
  with gr.Row():
288
  input_image = gr.Image(label="Image Input", type="filepath", sources=["upload", "clipboard"], elem_id="input_image_upload")
289
  input_audio = gr.Audio(label="Audio Input", type="filepath", sources=["upload", "microphone"], elem_id="input_audio_upload")
290
  with gr.Row():
291
  input_video = gr.Video(label="Video Input", type="filepath", sources=["upload"], elem_id="input_video_upload")
292
- input_model3d = gr.Model3D(label="3D Model Input (.glb, .obj, etc.)", type="filepath", elem_id="input_model3d_upload")
293
  with gr.Row():
294
- input_file = gr.File(label="Generic File Input (PDF, TXT, etc.)", type="filepath", elem_id="input_file_upload")
295
 
296
  submit_button = gr.Button("πŸš€ Generate", variant="primary", elem_id="submit_button_generate")
297
 
@@ -304,14 +247,14 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
304
  text_output = gr.Textbox(label="Text / Log Output", interactive=False, visible=True, lines=5, max_lines=20, elem_id="output_text_log")
305
  with gr.Row():
306
  file_output = gr.File(label="Download File Output", interactive=False, visible=False, elem_id="output_file_download")
307
- path_output = gr.Textbox(label="Output File Path (Copyable)", interactive=False, visible=False, elem_id="output_file_path_text")
308
 
309
  submit_button.click(
310
  fn=gradio_interface,
311
  inputs=[prompt_input, input_image, input_audio, input_video, input_model3d, input_file],
312
  outputs=[image_output, file_output, path_output, audio_output, model3d_output, text_output]
313
  )
314
-
315
  gr.Examples(
316
  examples=[
317
  ["Generate an image of a happy robot coding on a laptop, cyberpunk style.", None, None, None, None, None],
@@ -320,8 +263,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
320
  ["I have an image of a cat. Find a space that can make it look like a painting and apply it. You will need to use the 'input_image_path' variable which will contain the path to the uploaded cat image.", "path/to/your/cat_image.png", None, None, None, None],
321
  ],
322
  inputs=[prompt_input, input_image, input_audio, input_video, input_model3d, input_file],
323
- label="Example Prompts (Note: For examples with file inputs, you'll need to upload a relevant file first using the 'Optional File Inputs' section)"
324
  )
325
 
326
  if __name__ == "__main__":
327
- app.launch(debug=True)
 
3
  import shutil
4
  from gradio_client import Client, handle_file # handle_file might be used by the agent
5
  # Use InferenceClientModel instead of HfApiModel
6
+ from smolagents import Tool, CodeAgent, InferenceClientModel, ToolCollection # Tool is needed for subclassing
7
  import uuid
8
  import httpx # Often a dependency for HTTP clients, good to have
9
  from tenacity import retry, stop_after_attempt, wait_exponential
 
12
  import traceback # For more detailed error logging if needed
13
 
14
  # Define initial tools from Spaces
 
 
 
 
15
  spaces = [
16
  {"repo_id": "black-forest-labs/FLUX.1-schnell",
17
  "name": "image_generator_flux_schnell",
 
29
  "name": "pdf_text_extraction_mineru",
30
  "description": "Extracts the text of a PDF up to 20 pages long using MinerU. Expects a PDF file.",
31
  "api_name": "/to_pdf"},
 
 
 
 
 
 
 
 
 
 
 
 
32
  ]
33
 
34
  # Create tools from predefined Spaces with retry logic
35
  tools = []
36
  for space_info in spaces:
37
  repo_id = space_info['repo_id']
38
+ name = space_info.get('name', repo_id.split('/')[-1].replace('-', '_'))
39
  description = space_info.get('description', f'A tool to interact with the Hugging Face Space: {repo_id}')
40
+ api_name = space_info.get('api_name')
41
 
42
  @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
43
  def create_tool_with_retry(repo_id, name, description, api_name):
 
44
  print(f"Attempting to create tool: '{name}' from space: {repo_id} with api_name: {api_name}")
45
  new_tool = Tool.from_space(repo_id, name=name, description=description, api_name=api_name)
 
46
  if not hasattr(new_tool, 'name') or new_tool.name != name:
47
  print(f"WARNING: Tool '{name}' from space {repo_id} might have a name mismatch or missing name attribute after creation. Actual name: {getattr(new_tool, 'name', 'MISSING')}")
48
  return new_tool
49
 
50
  try:
51
+ tool_instance = create_tool_with_retry(repo_id, name, description, api_name) # Renamed to avoid conflict
52
+ tools.append(tool_instance)
53
  print(f"Successfully loaded predefined tool: {name} from {repo_id}")
54
  except Exception as e:
55
  print(f"Failed to load predefined tool from {repo_id}. Error: {str(e)}. Continuing with available tools.")
56
 
57
+ # --- Refactored HuggingFaceSpaceSearcherTool ---
58
+ class HuggingFaceSpaceSearcherTool(Tool):
59
+ # Define attributes as class variables
60
+ name = "huggingface_space_searcher"
61
+ description = "Searches for Hugging Face Spaces that can perform a specific task. Input is a search query string (e.g., 'text to image', 'speech recognition'). Returns a list of Space IDs, their descriptions, and instructions on how to try using them."
62
+ # Define input schema if needed, for now, we'll rely on type hinting in forward
63
+ # inputs = { "query": {"type": "string", "description": "The search query for Hugging Face Spaces."} }
64
+ # output_type = "string" # Optional: define output type
65
+
66
+ # The core logic goes into the forward method
67
+ def forward(self, query: str, top_k: int = 3) -> str:
68
+ """
69
+ Searches Hugging Face Spaces for a given query and returns the top_k results.
70
+ Provides repo_id, description, likes, and last modified date for each space found.
71
+ """
72
+ try:
73
+ print(f"Searching spaces with query: {query}, top_k: {top_k}")
74
+ spaces_found = list(list_spaces(search=query, full=True, limit=top_k, sort="likes", direction=-1))
75
+ if not spaces_found:
76
+ return "No Spaces found for your query."
77
+
78
+ results = "Found the following Spaces (sorted by likes):\n"
79
+ for i, space_data in enumerate(spaces_found):
80
+ description = "No description provided."
81
+ if hasattr(space_data, 'cardData') and space_data.cardData and 'description' in space_data.cardData:
82
+ description = space_data.cardData['description']
83
+ elif hasattr(space_data, 'title') and space_data.title:
84
+ description = space_data.title
85
+
86
+ results += (
87
+ f"{i+1}. ID: {space_data.id}\n"
88
+ f" Description: {description}\n"
89
+ f" Likes: {space_data.likes if hasattr(space_data, 'likes') else 'N/A'}\n"
90
+ f" Last Modified: {space_data.lastModified if hasattr(space_data, 'lastModified') else 'N/A'}\n\n"
91
+ )
92
+ results += ("\nTo use one of these, you can try creating a tool in the code like this: "
93
+ "my_new_tool = Tool.from_space(repo_id='SPACE_ID_HERE', name='custom_tool_name'). "
94
+ "Then you can call it: result = my_new_tool(argument_name=value). "
95
+ "The arguments depend on the specific Space. If Tool.from_space fails or the tool doesn't work, "
96
+ "the Space might not have a compatible public API or may require a specific api_name.")
97
+ return results
98
+ except Exception as e:
99
+ print(f"Error searching Spaces: {str(e)}")
100
+ return f"Error searching Spaces: {str(e)}"
101
+
102
+ # Instantiate the custom tool
103
+ space_search_tool = HuggingFaceSpaceSearcherTool()
104
+ # ---- Debug print for the refactored tool ----
105
+ try:
106
+ print(f"\nDEBUG: 'space_search_tool' (refactored class) immediately after creation.")
107
+ print(f"DEBUG: Name: {space_search_tool.name}") # Should now correctly access the class attribute
108
+ print(f"DEBUG: Type: {type(space_search_tool)}")
109
+ print(f"DEBUG: All attributes: {dir(space_search_tool)}\n")
110
+ except AttributeError as e:
111
+ print(f"\nDEBUG: 'space_search_tool' (refactored class) immediately after creation.")
112
+ print(f"DEBUG: Name attribute STILL MISSING. Error: {e}")
113
+ print(f"DEBUG: Type: {type(space_search_tool)}")
114
+ print(f"DEBUG: All attributes: {dir(space_search_tool)}\n")
115
+ # ---- END Debug print ----
116
  tools.append(space_search_tool)
117
 
118
+
119
  # --- Debugging: Inspect tools before CodeAgent initialization ---
120
  print("\n--- Inspecting tools before CodeAgent initialization ---")
121
  for i, t in enumerate(tools):
122
  if t is None:
123
  print(f"Tool at index {i} is None!")
 
124
  continue
125
  try:
 
126
  tool_name = t.name
127
  print(f"Tool {i}: Name='{tool_name}', Type={type(t)}")
128
  except AttributeError:
 
133
 
134
 
135
  # Initialize the model - Use InferenceClientModel
136
+ model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
137
 
138
+ # Create the agent
139
  agent = CodeAgent(
140
  tools=tools,
141
  model=model,
142
  additional_authorized_imports=['PIL', 'Pillow', 'os', 'sys', 'numpy', 'huggingface_hub', 'gradio_client', 'uuid'],
143
+ add_base_tools=True,
144
  )
145
 
 
146
  AGENT_INSTRUCTIONS = """You are a highly capable AI assistant. Your primary goal is to accomplish tasks using a variety of tools, prioritizing Hugging Face Spaces.
147
 
148
  Follow these steps:
149
  1. **Understand the Request:** Carefully analyze the user's prompt (which will follow these instructions). Identify the core task and any specific requirements or inputs.
150
+ 2. **Check Predefined Tools:** Review your list of available tools. If a predefined tool can directly address the request, use it. For the 'huggingface_space_searcher' tool, the input should be a dictionary like `{"query": "your search term"}` if you defined an inputs schema, or directly as arguments like `huggingface_space_searcher(query="your search term")` if using type hints in the forward method. The refactored `HuggingFaceSpaceSearcherTool` uses type hints in its `forward(self, query: str, top_k: int = 3)` method, so call it like `huggingface_space_searcher(query="your search term")`.
151
  3. **Search for Spaces (If Needed):** If no predefined tool is suitable, use the `huggingface_space_searcher` tool. Provide a concise search query related to the task (e.g., "image classification", "voice cloning", "document question answering").
152
+ 4. **Select and Instantiate a Space Tool:** From the search results, choose the most promising Space. Attempt to create a tool from it using `Tool.from_space(repo_id='SELECTED_SPACE_ID', name='a_unique_tool_name')`. You might need to give it a unique name. If `Tool.from_space` fails, the Space might not be compatible, or you could try another one from the search results.
153
  5. **Execute the Tool:** Call the tool (either predefined or dynamically created) with the necessary arguments.
154
+ * **File Inputs:** If the user uploads files, their paths will be available as global string variables: `input_image_path`, `input_audio_path`, `input_video_path`, `input_3d_model_path`, `input_file_path`. Before using these variables, check if they exist and are not None. Pass these file paths as arguments to tools that require them.
155
+ * **Chaining Tools:** If the task requires multiple steps, chain the tools together.
156
  6. **Output Management:**
157
+ * If a tool generates a file, save it to the current working directory using a unique filename (e.g., `output_filename = os.path.join(os.getcwd(), f"{uuid.uuid4()}.png")`).
158
+ * **Return the RESULT:** Your final response should be either a string text answer or the string path to the generated output file.
159
+ 7. **Clarity and Error Handling:** If you encounter issues, explain the problem.
 
 
160
 
161
  Example of dynamically using a Space after searching:
162
  ```python
 
 
 
 
 
 
 
163
  # search_results = huggingface_space_searcher(query="text to image cat")
164
+ # print(search_results)
165
  # try:
166
  # cat_image_tool = Tool.from_space(repo_id="user/cat-generator", name="cat_generator_tool")
167
+ # image_path = cat_image_tool(prompt="A fluffy siamese cat") # Arguments depend on the Space
 
 
168
  # return image_path
169
  # except Exception as e:
170
  # return f"Failed to use the cat generator Space: {e}"
171
  ```
172
+ Always ensure your generated Python code is complete and directly callable.
173
  You have access to `os`, `uuid`, `PIL.Image`.
174
  """
175
 
 
177
  def gradio_interface(user_prompt, input_image_path, input_audio_path, input_video_path, input_3d_model_path, input_file_path, progress=gr.Progress(track_tqdm=True)):
178
  try:
179
  progress(0, desc="Initializing Agent...")
 
 
180
  full_prompt_with_instructions = f"{AGENT_INSTRUCTIONS}\n\nUSER PROMPT: {user_prompt}"
 
 
181
  agent_kwargs = {}
182
+ if input_image_path: agent_kwargs["input_image_path"] = str(input_image_path)
183
+ if input_audio_path: agent_kwargs["input_audio_path"] = str(input_audio_path)
184
+ if input_video_path: agent_kwargs["input_video_path"] = str(input_video_path)
185
+ if input_3d_model_path: agent_kwargs["input_3d_model_path"] = str(input_3d_model_path)
186
+ if input_file_path: agent_kwargs["input_file_path"] = str(input_file_path)
 
 
 
 
 
187
 
188
  progress(0.2, desc="Agent processing request...")
189
  result = agent.run(full_prompt_with_instructions, **agent_kwargs)
190
 
191
  progress(0.8, desc="Processing result...")
192
  outputs = {
193
+ "image": gr.update(value=None, visible=False), "file": gr.update(value=None, visible=False),
194
+ "path": gr.update(value=None, visible=False), "audio": gr.update(value=None, visible=False),
195
+ "model3d": gr.update(value=None, visible=False), "text": gr.update(value=None, visible=True),
 
 
 
196
  }
197
 
198
  if isinstance(result, str):
 
201
  outputs["file"] = gr.update(value=file_path, visible=True)
202
  outputs["path"] = gr.update(value=file_path, visible=True)
203
  ext = os.path.splitext(file_path.lower())[1]
204
+ if ext in ('.png', '.jpg', '.jpeg', '.gif', '.webp'): outputs["image"] = gr.update(value=file_path, visible=True)
205
+ elif ext in ('.mp3', '.wav', '.ogg', '.flac'): outputs["audio"] = gr.update(value=file_path, visible=True)
206
+ elif ext == '.glb': outputs["model3d"] = gr.update(value=file_path, visible=True)
207
+ else: outputs["text"] = gr.update(value=f"Output is a file: {os.path.basename(file_path)}. Download it.", visible=True)
208
+ else: outputs["text"] = gr.update(value=result, visible=True)
209
+ elif result is None: outputs["text"] = gr.update(value="Agent returned no result (None).", visible=True)
210
+ else: outputs["text"] = gr.update(value=f"Unexpected result type: {type(result)}. Content: {str(result)}", visible=True)
211
+
 
 
 
 
 
 
 
212
  progress(1, desc="Done!")
213
+ return (outputs["image"], outputs["file"], outputs["path"], outputs["audio"], outputs["model3d"], outputs["text"])
 
 
 
214
 
215
  except Exception as e:
216
+ error_msg = f"An error occurred: {str(e)}"
217
  print(error_msg)
218
  traceback.print_exc()
219
+ return (None, None, None, None, None, gr.update(value=error_msg, visible=True))
 
 
 
 
220
 
221
  # Create the Gradio app
222
  with gr.Blocks(theme=gr.themes.Soft()) as app:
223
  gr.Markdown("## πŸ€– Smolagent: Multi-Modal Agent with Hugging Face Space Discovery")
224
+ gr.Markdown("Ask the agent to perform tasks...")
225
 
226
  with gr.Row():
227
+ prompt_input = gr.Textbox(label="Enter your prompt", placeholder="e.g., 'Generate an image of a futuristic city'", lines=3, elem_id="user_prompt_textbox")
228
+
229
+ with gr.Accordion("Optional File Inputs", open=False):
 
 
 
 
 
230
  with gr.Row():
231
  input_image = gr.Image(label="Image Input", type="filepath", sources=["upload", "clipboard"], elem_id="input_image_upload")
232
  input_audio = gr.Audio(label="Audio Input", type="filepath", sources=["upload", "microphone"], elem_id="input_audio_upload")
233
  with gr.Row():
234
  input_video = gr.Video(label="Video Input", type="filepath", sources=["upload"], elem_id="input_video_upload")
235
+ input_model3d = gr.Model3D(label="3D Model Input", type="filepath", elem_id="input_model3d_upload")
236
  with gr.Row():
237
+ input_file = gr.File(label="Generic File Input", type="filepath", elem_id="input_file_upload")
238
 
239
  submit_button = gr.Button("πŸš€ Generate", variant="primary", elem_id="submit_button_generate")
240
 
 
247
  text_output = gr.Textbox(label="Text / Log Output", interactive=False, visible=True, lines=5, max_lines=20, elem_id="output_text_log")
248
  with gr.Row():
249
  file_output = gr.File(label="Download File Output", interactive=False, visible=False, elem_id="output_file_download")
250
+ path_output = gr.Textbox(label="Output File Path", interactive=False, visible=False, elem_id="output_file_path_text")
251
 
252
  submit_button.click(
253
  fn=gradio_interface,
254
  inputs=[prompt_input, input_image, input_audio, input_video, input_model3d, input_file],
255
  outputs=[image_output, file_output, path_output, audio_output, model3d_output, text_output]
256
  )
257
+
258
  gr.Examples(
259
  examples=[
260
  ["Generate an image of a happy robot coding on a laptop, cyberpunk style.", None, None, None, None, None],
 
263
  ["I have an image of a cat. Find a space that can make it look like a painting and apply it. You will need to use the 'input_image_path' variable which will contain the path to the uploaded cat image.", "path/to/your/cat_image.png", None, None, None, None],
264
  ],
265
  inputs=[prompt_input, input_image, input_audio, input_video, input_model3d, input_file],
266
+ label="Example Prompts (Note: For examples with file inputs, you'll need to upload a relevant file first)"
267
  )
268
 
269
  if __name__ == "__main__":
270
+ app.launch(debug=True)