Spaces:

jkorstad
/

Easy-Spaces

Runtime error

App Files Files Community

jkorstad commited on May 16

Commit

24fb7b9

verified ·

1 Parent(s): 2994b0b

Update app.py

Browse files

Files changed (1) hide show

app.py +107 -164

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import os
 import shutil
 from gradio_client import Client, handle_file # handle_file might be used by the agent
 # Use InferenceClientModel instead of HfApiModel
-from smolagents import Tool, CodeAgent, InferenceClientModel, ToolCollection
 import uuid
 import httpx # Often a dependency for HTTP clients, good to have
 from tenacity import retry, stop_after_attempt, wait_exponential
@@ -12,10 +12,6 @@ from PIL import Image # For potential image manipulation by the agent
 import traceback # For more detailed error logging if needed
 # Define initial tools from Spaces
-# Commenting out problematic spaces for now.
-# You'll need to verify their api_name or compatibility if you re-enable them.
-# Ensure the api_name is correct if you uncomment these.
-# Visit the HF Space page and look for "API - via gradio_client" for hints.
 spaces = [
     {"repo_id": "black-forest-labs/FLUX.1-schnell",
      "name": "image_generator_flux_schnell",
@@ -33,110 +29,100 @@ spaces = [
      "name": "pdf_text_extraction_mineru",
      "description": "Extracts the text of a PDF up to 20 pages long using MinerU. Expects a PDF file.",
      "api_name": "/to_pdf"},
-    # {"repo_id": "InstantX/InstantCharacter",
-    #  "name": "instant_character_customization",
-    #  "description": "Personalize Any Characters with a Scalable Diffusion Transformer Framework to any style or pose using InstantCharacter. Expects an input image and potentially pose/style images or prompts.",
-    #  "api_name": "/predict"}, # Example: Verify this api_name if re-enabling
-    # {"repo_id": "fotographerai/Zen-Style-Shape",
-    #  "name": "img_to_img_style_transfer_zen_shape",
-    #  "description": "Flux[dev] Redux + Flux[dev] Canny. Implements a custom image-to-image style transfer pipeline blending style from Image A to structure of Image B. Expects two images.",
-    #  "api_name": "/predict"}, # Example: Verify this api_name if re-enabling
-    # {"repo_id": "moonshotai/Kimi-VL-A3B-Thinking",
-    #  "name": "multimodal_vlm_llm_kimi",
-    #  "description": "Kimi-VL-A3B-Thinking is a multi-modal LLM that can understand text and images, and generate text with thinking processes. Ask any question about an image. Expects text and optionally an image.",
-    #  "api_name": "/chat"}, # Example: Verify this api_name if re-enabling
 ]
 # Create tools from predefined Spaces with retry logic
 tools = []
 for space_info in spaces:
     repo_id = space_info['repo_id']
-    name = space_info.get('name', repo_id.split('/')[-1].replace('-', '_')) # Default name from repo_id
     description = space_info.get('description', f'A tool to interact with the Hugging Face Space: {repo_id}')
-    api_name = space_info.get('api_name') # Can be None, Tool.from_space will try to infer
     @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
     def create_tool_with_retry(repo_id, name, description, api_name):
-        # If api_name is None, Tool.from_space will try to find a public API endpoint.
         print(f"Attempting to create tool: '{name}' from space: {repo_id} with api_name: {api_name}")
         new_tool = Tool.from_space(repo_id, name=name, description=description, api_name=api_name)
-        # Explicitly check if name attribute is set after creation by Tool.from_space
         if not hasattr(new_tool, 'name') or new_tool.name != name:
             print(f"WARNING: Tool '{name}' from space {repo_id} might have a name mismatch or missing name attribute after creation. Actual name: {getattr(new_tool, 'name', 'MISSING')}")
         return new_tool
     try:
-        tool = create_tool_with_retry(repo_id, name, description, api_name)
-        tools.append(tool)
         print(f"Successfully loaded predefined tool: {name} from {repo_id}")
     except Exception as e:
         print(f"Failed to load predefined tool from {repo_id}. Error: {str(e)}. Continuing with available tools.")
-# Load tools from a Hugging Face Collection (User has this commented out)
-#collection_slug = "jkorstad/tools-680127d17eed47e759549ff4"
-#try:
-#    collection = ToolCollection.from_hub(collection_slug=collection_slug, trust_remote_code=True)
-#    tools.extend(collection.tools)
-#    print(f"Successfully loaded tools from collection: {collection_slug}")
-#except Exception as e:
-#    print(f"Warning: Failed to load collection {collection_slug}. Error: {str(e)}")
-# Tool for searching Hugging Face Spaces
-def search_hf_spaces(query: str, top_k: int = 3) -> str:
-    """
-    Searches Hugging Face Spaces for a given query and returns the top_k results.
-    Provides repo_id, description, likes, and last modified date for each space found.
-    Use this to discover new tools if the existing ones are not suitable.
-    To use a found space, try: new_tool = Tool.from_space(repo_id='the_space_id', name='a_descriptive_name')
-    Then call it: result = new_tool(param1=value1, ...)
-    """
-    try:
-        print(f"Searching spaces with query: {query}, top_k: {top_k}")
-        spaces_found = list(list_spaces(search=query, full=True, limit=top_k, sort="likes", direction=-1))
-        if not spaces_found:
-            return "No Spaces found for your query."
-        results = "Found the following Spaces (sorted by likes):\n"
-        for i, space_data in enumerate(spaces_found):
-            description = "No description provided."
-            if hasattr(space_data, 'cardData') and space_data.cardData and 'description' in space_data.cardData:
-                description = space_data.cardData['description']
-            elif hasattr(space_data, 'title') and space_data.title:
-                description = space_data.title
-            results += (
-                f"{i+1}. ID: {space_data.id}\n"
-                f"   Description: {description}\n"
-                f"   Likes: {space_data.likes if hasattr(space_data, 'likes') else 'N/A'}\n"
-                f"   Last Modified: {space_data.lastModified if hasattr(space_data, 'lastModified') else 'N/A'}\n\n"
-            )
-        results += ("\nTo use one of these, you can try creating a tool in the code like this: "
-                    "my_new_tool = Tool.from_space(repo_id='SPACE_ID_HERE', name='custom_tool_name'). "
-                    "Then you can call it: result = my_new_tool(argument_name=value). "
-                    "The arguments depend on the specific Space. If Tool.from_space fails or the tool doesn't work, "
-                    "the Space might not have a compatible public API or may require a specific api_name.")
-        return results
-    except Exception as e:
-        print(f"Error searching Spaces: {str(e)}")
-        return f"Error searching Spaces: {str(e)}"
-space_search_tool = Tool(
-    name="huggingface_space_searcher",
-    description="Searches for Hugging Face Spaces that can perform a specific task. Input is a search query string (e.g., 'text to image', 'speech recognition'). Returns a list of Space IDs, their descriptions, and instructions on how to try using them.",
-    func=search_hf_spaces,
-)
 tools.append(space_search_tool)
 # --- Debugging: Inspect tools before CodeAgent initialization ---
 print("\n--- Inspecting tools before CodeAgent initialization ---")
 for i, t in enumerate(tools):
     if t is None:
         print(f"Tool at index {i} is None!")
-        # This would cause an error later, but the current error is 'Tool' object has no attribute 'name'
         continue
     try:
-        # Attempt to access the name attribute
         tool_name = t.name
         print(f"Tool {i}: Name='{tool_name}', Type={type(t)}")
     except AttributeError:
@@ -147,55 +133,43 @@ print("-------------------------------------------------------\n")
 # Initialize the model - Use InferenceClientModel
-model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct") # Or your preferred model
-# Create the agent - Removed system_prompt from constructor
 agent = CodeAgent(
     tools=tools,
     model=model,
     additional_authorized_imports=['PIL', 'Pillow', 'os', 'sys', 'numpy', 'huggingface_hub', 'gradio_client', 'uuid'],
-    add_base_tools=True, # Includes web search, python interpreter
 )
-# This is the detailed instruction set that was previously in system_prompt
 AGENT_INSTRUCTIONS = """You are a highly capable AI assistant. Your primary goal is to accomplish tasks using a variety of tools, prioritizing Hugging Face Spaces.
 Follow these steps:
 1.  **Understand the Request:** Carefully analyze the user's prompt (which will follow these instructions). Identify the core task and any specific requirements or inputs.
-2.  **Check Predefined Tools:** Review your list of available tools. If a predefined tool can directly address the request, use it.
 3.  **Search for Spaces (If Needed):** If no predefined tool is suitable, use the `huggingface_space_searcher` tool. Provide a concise search query related to the task (e.g., "image classification", "voice cloning", "document question answering").
-4.  **Select and Instantiate a Space Tool:** From the search results, choose the most promising Space. Attempt to create a tool from it using `Tool.from_space(repo_id='SELECTED_SPACE_ID', name='a_unique_tool_name')`. You might need to give it a unique name. If `Tool.from_space` fails, the Space might not be compatible, or you could try another one from the search results. Note that some Spaces might not have a public API or may require a specific `api_name` that `Tool.from_space` cannot infer; in such cases, you might not be able to use them.
 5.  **Execute the Tool:** Call the tool (either predefined or dynamically created) with the necessary arguments.
-    * **File Inputs:** If the user uploads files, their paths will be available as global string variables: `input_image_path`, `input_audio_path`, `input_video_path`, `input_3d_model_path`, `input_file_path`. Before using these variables, check if they exist and are not None (e.g., `if 'input_image_path' in globals() and input_image_path:`). Pass these file paths as arguments to tools that require them. `Tool.from_space` handles file uploads for compatible Spaces when you pass the filepath string.
-    * **Chaining Tools:** If the task requires multiple steps, chain the tools together, passing the output of one tool as the input to the next.
 6.  **Output Management:**
-    * If a tool generates a file (image, audio, etc.), save it to the current working directory using a unique filename (e.g., `output_filename = os.path.join(os.getcwd(), f"{uuid.uuid4()}.png")`).
-    * **Return the RESULT:** Your final response should be either:
-        * A string containing the direct text answer.
-        * The string path to the generated output file (e.g., `return output_filename`).
-7.  **Clarity and Error Handling:** If you encounter issues (e.g., a Space tool fails, required inputs are missing), clearly explain the problem in your response. If a Space doesn't work, try to explain why or suggest an alternative if possible.
 Example of dynamically using a Space after searching:
 ```python
-# This is an example of how I, the agent, would think and act.
-# User's actual prompt would follow these instructions.
-# Example user prompt: "Find a space that can make an image of a cat and then use it."
-#
-# My thought process:
-# 1. The user wants an image of a cat, and wants me to find a Space for it.
-# 2. I'll use `huggingface_space_searcher`.
 # search_results = huggingface_space_searcher(query="text to image cat")
-# print(search_results) # This would show me some options. Let's say 'user/cat-generator' is found.
 # try:
 #     cat_image_tool = Tool.from_space(repo_id="user/cat-generator", name="cat_generator_tool")
-#     # The arguments for cat_image_tool depend on the Space. I'll assume it takes a 'prompt' argument.
-#     image_path = cat_image_tool(prompt="A fluffy siamese cat")
-#     # image_path should be a path to the generated image file
 #     return image_path
 # except Exception as e:
 #     return f"Failed to use the cat generator Space: {e}"
 ```
-Always ensure your generated Python code is complete and directly callable. Use `print()` for debugging if necessary, but the final returned value should be the result or file path.
 You have access to `os`, `uuid`, `PIL.Image`.
 """
@@ -203,34 +177,22 @@ You have access to `os`, `uuid`, `PIL.Image`.
 def gradio_interface(user_prompt, input_image_path, input_audio_path, input_video_path, input_3d_model_path, input_file_path, progress=gr.Progress(track_tqdm=True)):
     try:
         progress(0, desc="Initializing Agent...")
-        # Combine instructions with the user's prompt
         full_prompt_with_instructions = f"{AGENT_INSTRUCTIONS}\n\nUSER PROMPT: {user_prompt}"
-        # Prepare a dictionary of potential inputs for the agent's execution scope
         agent_kwargs = {}
-        if input_image_path:
-            agent_kwargs["input_image_path"] = str(input_image_path)
-        if input_audio_path:
-            agent_kwargs["input_audio_path"] = str(input_audio_path)
-        if input_video_path:
-            agent_kwargs["input_video_path"] = str(input_video_path)
-        if input_3d_model_path:
-            agent_kwargs["input_3d_model_path"] = str(input_3d_model_path)
-        if input_file_path:
-            agent_kwargs["input_file_path"] = str(input_file_path)
         progress(0.2, desc="Agent processing request...")
         result = agent.run(full_prompt_with_instructions, **agent_kwargs)
         progress(0.8, desc="Processing result...")
         outputs = {
-            "image": gr.update(value=None, visible=False),
-            "file": gr.update(value=None, visible=False),
-            "path": gr.update(value=None, visible=False),
-            "audio": gr.update(value=None, visible=False),
-            "model3d": gr.update(value=None, visible=False),
-            "text": gr.update(value=None, visible=True),
         }
         if isinstance(result, str):
@@ -239,59 +201,40 @@ def gradio_interface(user_prompt, input_image_path, input_audio_path, input_vide
                 outputs["file"] = gr.update(value=file_path, visible=True)
                 outputs["path"] = gr.update(value=file_path, visible=True)
                 ext = os.path.splitext(file_path.lower())[1]
-                if ext in ('.png', '.jpg', '.jpeg', '.gif', '.webp'):
-                    outputs["image"] = gr.update(value=file_path, visible=True)
-                elif ext in ('.mp3', '.wav', '.ogg', '.flac'):
-                    outputs["audio"] = gr.update(value=file_path, visible=True)
-                elif ext == '.glb':
-                    outputs["model3d"] = gr.update(value=file_path, visible=True)
-                else:
-                    outputs["text"] = gr.update(value=f"Output is a file: {os.path.basename(file_path)}. Download it using the 'Download File Output' component.", visible=True)
-            else:
-                outputs["text"] = gr.update(value=result, visible=True)
-        elif result is None:
-            outputs["text"] = gr.update(value="Agent returned no result (None). This might indicate an issue or that the task didn't produce a specific output string/file.", visible=True)
-        else:
-            outputs["text"] = gr.update(value=f"Unexpected result type from agent: {type(result)}. Content: {str(result)}", visible=True)
         progress(1, desc="Done!")
-        return (
-            outputs["image"], outputs["file"], outputs["path"],
-            outputs["audio"], outputs["model3d"], outputs["text"]
-        )
     except Exception as e:
-        error_msg = f"An error occurred in the Gradio interface or agent execution: {str(e)}"
         print(error_msg)
         traceback.print_exc()
-        return (
-            gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False),
-            gr.update(value=None, visible=False), gr.update(value=None, visible=False),
-            gr.update(value=error_msg, visible=True)
-        )
 # Create the Gradio app
 with gr.Blocks(theme=gr.themes.Soft()) as app:
     gr.Markdown("## 🤖 Smolagent: Multi-Modal Agent with Hugging Face Space Discovery")
-    gr.Markdown("Ask the agent to perform tasks. It will try to use its tools or find Hugging Face Spaces to help. You can provide optional file inputs below if your task requires them (e.g., 'Make this image Ghibli style', 'Summarize this PDF').")
     with gr.Row():
-        prompt_input = gr.Textbox(
-            label="Enter your prompt for the agent",
-            placeholder="e.g., 'Generate an image of a futuristic city', 'Convert this text to speech: Hello world', or 'Search for a space that translates English to French and use it for: Good morning'",
-            lines=3,
-            elem_id="user_prompt_textbox"
-        )
-    with gr.Accordion("Optional File Inputs (for tasks requiring them)", open=False):
         with gr.Row():
             input_image = gr.Image(label="Image Input", type="filepath", sources=["upload", "clipboard"], elem_id="input_image_upload")
             input_audio = gr.Audio(label="Audio Input", type="filepath", sources=["upload", "microphone"], elem_id="input_audio_upload")
         with gr.Row():
             input_video = gr.Video(label="Video Input", type="filepath", sources=["upload"], elem_id="input_video_upload")
-            input_model3d = gr.Model3D(label="3D Model Input (.glb, .obj, etc.)", type="filepath", elem_id="input_model3d_upload")
         with gr.Row():
-            input_file = gr.File(label="Generic File Input (PDF, TXT, etc.)", type="filepath", elem_id="input_file_upload")
     submit_button = gr.Button("🚀 Generate", variant="primary", elem_id="submit_button_generate")
@@ -304,14 +247,14 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
         text_output = gr.Textbox(label="Text / Log Output", interactive=False, visible=True, lines=5, max_lines=20, elem_id="output_text_log")
     with gr.Row():
         file_output = gr.File(label="Download File Output", interactive=False, visible=False, elem_id="output_file_download")
-        path_output = gr.Textbox(label="Output File Path (Copyable)", interactive=False, visible=False, elem_id="output_file_path_text")
     submit_button.click(
         fn=gradio_interface,
         inputs=[prompt_input, input_image, input_audio, input_video, input_model3d, input_file],
         outputs=[image_output, file_output, path_output, audio_output, model3d_output, text_output]
     )
     gr.Examples(
         examples=[
             ["Generate an image of a happy robot coding on a laptop, cyberpunk style.", None, None, None, None, None],
@@ -320,8 +263,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
             ["I have an image of a cat. Find a space that can make it look like a painting and apply it. You will need to use the 'input_image_path' variable which will contain the path to the uploaded cat image.", "path/to/your/cat_image.png", None, None, None, None],
         ],
         inputs=[prompt_input, input_image, input_audio, input_video, input_model3d, input_file],
-        label="Example Prompts (Note: For examples with file inputs, you'll need to upload a relevant file first using the 'Optional File Inputs' section)"
     )
 if __name__ == "__main__":
-    app.launch(debug=True)

 import shutil
 from gradio_client import Client, handle_file # handle_file might be used by the agent
 # Use InferenceClientModel instead of HfApiModel
+from smolagents import Tool, CodeAgent, InferenceClientModel, ToolCollection # Tool is needed for subclassing
 import uuid
 import httpx # Often a dependency for HTTP clients, good to have
 from tenacity import retry, stop_after_attempt, wait_exponential
 import traceback # For more detailed error logging if needed
 # Define initial tools from Spaces
 spaces = [
     {"repo_id": "black-forest-labs/FLUX.1-schnell",
      "name": "image_generator_flux_schnell",
      "name": "pdf_text_extraction_mineru",
      "description": "Extracts the text of a PDF up to 20 pages long using MinerU. Expects a PDF file.",
      "api_name": "/to_pdf"},
 ]
 # Create tools from predefined Spaces with retry logic
 tools = []
 for space_info in spaces:
     repo_id = space_info['repo_id']
+    name = space_info.get('name', repo_id.split('/')[-1].replace('-', '_'))
     description = space_info.get('description', f'A tool to interact with the Hugging Face Space: {repo_id}')
+    api_name = space_info.get('api_name')
     @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
     def create_tool_with_retry(repo_id, name, description, api_name):
         print(f"Attempting to create tool: '{name}' from space: {repo_id} with api_name: {api_name}")
         new_tool = Tool.from_space(repo_id, name=name, description=description, api_name=api_name)
         if not hasattr(new_tool, 'name') or new_tool.name != name:
             print(f"WARNING: Tool '{name}' from space {repo_id} might have a name mismatch or missing name attribute after creation. Actual name: {getattr(new_tool, 'name', 'MISSING')}")
         return new_tool
     try:
+        tool_instance = create_tool_with_retry(repo_id, name, description, api_name) # Renamed to avoid conflict
+        tools.append(tool_instance)
         print(f"Successfully loaded predefined tool: {name} from {repo_id}")
     except Exception as e:
         print(f"Failed to load predefined tool from {repo_id}. Error: {str(e)}. Continuing with available tools.")
+# --- Refactored HuggingFaceSpaceSearcherTool ---
+class HuggingFaceSpaceSearcherTool(Tool):
+    # Define attributes as class variables
+    name = "huggingface_space_searcher"
+    description = "Searches for Hugging Face Spaces that can perform a specific task. Input is a search query string (e.g., 'text to image', 'speech recognition'). Returns a list of Space IDs, their descriptions, and instructions on how to try using them."
+    # Define input schema if needed, for now, we'll rely on type hinting in forward
+    # inputs = { "query": {"type": "string", "description": "The search query for Hugging Face Spaces."} }
+    # output_type = "string" # Optional: define output type
+    # The core logic goes into the forward method
+    def forward(self, query: str, top_k: int = 3) -> str:
+        """
+        Searches Hugging Face Spaces for a given query and returns the top_k results.
+        Provides repo_id, description, likes, and last modified date for each space found.
+        """
+        try:
+            print(f"Searching spaces with query: {query}, top_k: {top_k}")
+            spaces_found = list(list_spaces(search=query, full=True, limit=top_k, sort="likes", direction=-1))
+            if not spaces_found:
+                return "No Spaces found for your query."
+            results = "Found the following Spaces (sorted by likes):\n"
+            for i, space_data in enumerate(spaces_found):
+                description = "No description provided."
+                if hasattr(space_data, 'cardData') and space_data.cardData and 'description' in space_data.cardData:
+                    description = space_data.cardData['description']
+                elif hasattr(space_data, 'title') and space_data.title:
+                    description = space_data.title
+                results += (
+                    f"{i+1}. ID: {space_data.id}\n"
+                    f"   Description: {description}\n"
+                    f"   Likes: {space_data.likes if hasattr(space_data, 'likes') else 'N/A'}\n"
+                    f"   Last Modified: {space_data.lastModified if hasattr(space_data, 'lastModified') else 'N/A'}\n\n"
+                )
+            results += ("\nTo use one of these, you can try creating a tool in the code like this: "
+                        "my_new_tool = Tool.from_space(repo_id='SPACE_ID_HERE', name='custom_tool_name'). "
+                        "Then you can call it: result = my_new_tool(argument_name=value). "
+                        "The arguments depend on the specific Space. If Tool.from_space fails or the tool doesn't work, "
+                        "the Space might not have a compatible public API or may require a specific api_name.")
+            return results
+        except Exception as e:
+            print(f"Error searching Spaces: {str(e)}")
+            return f"Error searching Spaces: {str(e)}"
+# Instantiate the custom tool
+space_search_tool = HuggingFaceSpaceSearcherTool()
+# ---- Debug print for the refactored tool ----
+try:
+    print(f"\nDEBUG: 'space_search_tool' (refactored class) immediately after creation.")
+    print(f"DEBUG: Name: {space_search_tool.name}") # Should now correctly access the class attribute
+    print(f"DEBUG: Type: {type(space_search_tool)}")
+    print(f"DEBUG: All attributes: {dir(space_search_tool)}\n")
+except AttributeError as e:
+    print(f"\nDEBUG: 'space_search_tool' (refactored class) immediately after creation.")
+    print(f"DEBUG: Name attribute STILL MISSING. Error: {e}")
+    print(f"DEBUG: Type: {type(space_search_tool)}")
+    print(f"DEBUG: All attributes: {dir(space_search_tool)}\n")
+# ---- END Debug print ----
 tools.append(space_search_tool)
 # --- Debugging: Inspect tools before CodeAgent initialization ---
 print("\n--- Inspecting tools before CodeAgent initialization ---")
 for i, t in enumerate(tools):
     if t is None:
         print(f"Tool at index {i} is None!")
         continue
     try:
         tool_name = t.name
         print(f"Tool {i}: Name='{tool_name}', Type={type(t)}")
     except AttributeError:
 # Initialize the model - Use InferenceClientModel
+model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
+# Create the agent
 agent = CodeAgent(
     tools=tools,
     model=model,
     additional_authorized_imports=['PIL', 'Pillow', 'os', 'sys', 'numpy', 'huggingface_hub', 'gradio_client', 'uuid'],
+    add_base_tools=True,
 )
 AGENT_INSTRUCTIONS = """You are a highly capable AI assistant. Your primary goal is to accomplish tasks using a variety of tools, prioritizing Hugging Face Spaces.
 Follow these steps:
 1.  **Understand the Request:** Carefully analyze the user's prompt (which will follow these instructions). Identify the core task and any specific requirements or inputs.
+2.  **Check Predefined Tools:** Review your list of available tools. If a predefined tool can directly address the request, use it. For the 'huggingface_space_searcher' tool, the input should be a dictionary like `{"query": "your search term"}` if you defined an inputs schema, or directly as arguments like `huggingface_space_searcher(query="your search term")` if using type hints in the forward method. The refactored `HuggingFaceSpaceSearcherTool` uses type hints in its `forward(self, query: str, top_k: int = 3)` method, so call it like `huggingface_space_searcher(query="your search term")`.
 3.  **Search for Spaces (If Needed):** If no predefined tool is suitable, use the `huggingface_space_searcher` tool. Provide a concise search query related to the task (e.g., "image classification", "voice cloning", "document question answering").
+4.  **Select and Instantiate a Space Tool:** From the search results, choose the most promising Space. Attempt to create a tool from it using `Tool.from_space(repo_id='SELECTED_SPACE_ID', name='a_unique_tool_name')`. You might need to give it a unique name. If `Tool.from_space` fails, the Space might not be compatible, or you could try another one from the search results.
 5.  **Execute the Tool:** Call the tool (either predefined or dynamically created) with the necessary arguments.
+    * **File Inputs:** If the user uploads files, their paths will be available as global string variables: `input_image_path`, `input_audio_path`, `input_video_path`, `input_3d_model_path`, `input_file_path`. Before using these variables, check if they exist and are not None. Pass these file paths as arguments to tools that require them.
+    * **Chaining Tools:** If the task requires multiple steps, chain the tools together.
 6.  **Output Management:**
+    * If a tool generates a file, save it to the current working directory using a unique filename (e.g., `output_filename = os.path.join(os.getcwd(), f"{uuid.uuid4()}.png")`).
+    * **Return the RESULT:** Your final response should be either a string text answer or the string path to the generated output file.
+7.  **Clarity and Error Handling:** If you encounter issues, explain the problem.
 Example of dynamically using a Space after searching:
 ```python
 # search_results = huggingface_space_searcher(query="text to image cat")
+# print(search_results)
 # try:
 #     cat_image_tool = Tool.from_space(repo_id="user/cat-generator", name="cat_generator_tool")
+#     image_path = cat_image_tool(prompt="A fluffy siamese cat") # Arguments depend on the Space
 #     return image_path
 # except Exception as e:
 #     return f"Failed to use the cat generator Space: {e}"
 ```
+Always ensure your generated Python code is complete and directly callable.
 You have access to `os`, `uuid`, `PIL.Image`.
 """
 def gradio_interface(user_prompt, input_image_path, input_audio_path, input_video_path, input_3d_model_path, input_file_path, progress=gr.Progress(track_tqdm=True)):
     try:
         progress(0, desc="Initializing Agent...")
         full_prompt_with_instructions = f"{AGENT_INSTRUCTIONS}\n\nUSER PROMPT: {user_prompt}"
         agent_kwargs = {}
+        if input_image_path: agent_kwargs["input_image_path"] = str(input_image_path)
+        if input_audio_path: agent_kwargs["input_audio_path"] = str(input_audio_path)
+        if input_video_path: agent_kwargs["input_video_path"] = str(input_video_path)
+        if input_3d_model_path: agent_kwargs["input_3d_model_path"] = str(input_3d_model_path)
+        if input_file_path: agent_kwargs["input_file_path"] = str(input_file_path)
         progress(0.2, desc="Agent processing request...")
         result = agent.run(full_prompt_with_instructions, **agent_kwargs)
         progress(0.8, desc="Processing result...")
         outputs = {
+            "image": gr.update(value=None, visible=False), "file": gr.update(value=None, visible=False),
+            "path": gr.update(value=None, visible=False), "audio": gr.update(value=None, visible=False),
+            "model3d": gr.update(value=None, visible=False), "text": gr.update(value=None, visible=True),
         }
         if isinstance(result, str):
                 outputs["file"] = gr.update(value=file_path, visible=True)
                 outputs["path"] = gr.update(value=file_path, visible=True)
                 ext = os.path.splitext(file_path.lower())[1]
+                if ext in ('.png', '.jpg', '.jpeg', '.gif', '.webp'): outputs["image"] = gr.update(value=file_path, visible=True)
+                elif ext in ('.mp3', '.wav', '.ogg', '.flac'): outputs["audio"] = gr.update(value=file_path, visible=True)
+                elif ext == '.glb': outputs["model3d"] = gr.update(value=file_path, visible=True)
+                else: outputs["text"] = gr.update(value=f"Output is a file: {os.path.basename(file_path)}. Download it.", visible=True)
+            else: outputs["text"] = gr.update(value=result, visible=True)
+        elif result is None: outputs["text"] = gr.update(value="Agent returned no result (None).", visible=True)
+        else: outputs["text"] = gr.update(value=f"Unexpected result type: {type(result)}. Content: {str(result)}", visible=True)
         progress(1, desc="Done!")
+        return (outputs["image"], outputs["file"], outputs["path"], outputs["audio"], outputs["model3d"], outputs["text"])
     except Exception as e:
+        error_msg = f"An error occurred: {str(e)}"
         print(error_msg)
         traceback.print_exc()
+        return (None, None, None, None, None, gr.update(value=error_msg, visible=True))
 # Create the Gradio app
 with gr.Blocks(theme=gr.themes.Soft()) as app:
     gr.Markdown("## 🤖 Smolagent: Multi-Modal Agent with Hugging Face Space Discovery")
+    gr.Markdown("Ask the agent to perform tasks...")
     with gr.Row():
+        prompt_input = gr.Textbox(label="Enter your prompt", placeholder="e.g., 'Generate an image of a futuristic city'", lines=3, elem_id="user_prompt_textbox")
+    with gr.Accordion("Optional File Inputs", open=False):
         with gr.Row():
             input_image = gr.Image(label="Image Input", type="filepath", sources=["upload", "clipboard"], elem_id="input_image_upload")
             input_audio = gr.Audio(label="Audio Input", type="filepath", sources=["upload", "microphone"], elem_id="input_audio_upload")
         with gr.Row():
             input_video = gr.Video(label="Video Input", type="filepath", sources=["upload"], elem_id="input_video_upload")
+            input_model3d = gr.Model3D(label="3D Model Input", type="filepath", elem_id="input_model3d_upload")
         with gr.Row():
+            input_file = gr.File(label="Generic File Input", type="filepath", elem_id="input_file_upload")
     submit_button = gr.Button("🚀 Generate", variant="primary", elem_id="submit_button_generate")
         text_output = gr.Textbox(label="Text / Log Output", interactive=False, visible=True, lines=5, max_lines=20, elem_id="output_text_log")
     with gr.Row():
         file_output = gr.File(label="Download File Output", interactive=False, visible=False, elem_id="output_file_download")
+        path_output = gr.Textbox(label="Output File Path", interactive=False, visible=False, elem_id="output_file_path_text")
     submit_button.click(
         fn=gradio_interface,
         inputs=[prompt_input, input_image, input_audio, input_video, input_model3d, input_file],
         outputs=[image_output, file_output, path_output, audio_output, model3d_output, text_output]
     )
     gr.Examples(
         examples=[
             ["Generate an image of a happy robot coding on a laptop, cyberpunk style.", None, None, None, None, None],
             ["I have an image of a cat. Find a space that can make it look like a painting and apply it. You will need to use the 'input_image_path' variable which will contain the path to the uploaded cat image.", "path/to/your/cat_image.png", None, None, None, None],
         ],
         inputs=[prompt_input, input_image, input_audio, input_video, input_model3d, input_file],
+        label="Example Prompts (Note: For examples with file inputs, you'll need to upload a relevant file first)"
     )
 if __name__ == "__main__":
+    app.launch(debug=True)