Spaces:

jkorstad
/

Easy-Spaces

Runtime error

App Files Files Community

jkorstad commited on May 16

Commit

cb57dca

verified ·

1 Parent(s): c78470e

Update app.py

Browse files

Files changed (1) hide show

app.py +144 -117

app.py CHANGED Viewed

@@ -1,49 +1,55 @@
 import gradio as gr
 import os
 import shutil
-from gradio_client import Client, handle_file # handle_file might be used by the agent if it constructs client calls manually
-from smolagents import Tool, CodeAgent, HfApiModel, ToolCollection
 import uuid
-import httpx
 from tenacity import retry, stop_after_attempt, wait_exponential
-from huggingface_hub import list_spaces # For the new search tool
 from PIL import Image # For potential image manipulation by the agent
-# Define initial tools from Spaces (your existing list)
 spaces = [
-    {"repo_id": "black-forest-labs/FLUX.1-schnell",
-     "name": "image_generator_flux_schnell", # Renamed for clarity if multiple image generators exist
      "description": "Generate an image from a prompt using FLUX.1-schnell. Expects a text prompt.",
      "api_name": "/infer"},
-    {"repo_id": "Remsky/Kokoro-TTS-Zero",
-     "name": "text_to_speech_kokoro",
      "description": "Generates speech (audio) from input text using Kokoro TTS Zero. Expects text input.",
      "api_name": "/generate_speech_from_ui"},
-    {"repo_id": "jamesliu1217/EasyControl_Ghibli",
-     "name": "ghibli_style_image_control", # Renamed for clarity
      "description": "Create Ghibli style image from an input image using EasyControl_Ghibli. Expects an image and a prompt/control parameters.",
      "api_name": "/single_condition_generate_image"},
-    {"repo_id": "opendatalab/MinerU",
-     "name": "pdf_text_extraction_mineru", # Renamed for clarity
      "description": "Extracts the text of a PDF up to 20 pages long using MinerU. Expects a PDF file.",
      "api_name": "/to_pdf"},
-    {"repo_id": "InstantX/InstantCharacter",
-     "name": "instant_character_customization", # Renamed for clarity
-     "description": "Personalize Any Characters with a Scalable Diffusion Transformer Framework to any style or pose using InstantCharacter. Expects an input image and potentially pose/style images or prompts.",
-     "api_name": "/predict"}, # Common API name, verify for this space
-    {"repo_id": "fotographerai/Zen-Style-Shape",
-     "name": "img_to_img_style_transfer_zen_shape", # Renamed for clarity
-     "description": "Flux[dev] Redux + Flux[dev] Canny. Implements a custom image-to-image style transfer pipeline blending style from Image A to structure of Image B. Expects two images.",
-     "api_name": "/predict"}, # Common API name, verify for this space
-    {"repo_id": "moonshotai/Kimi-VL-A3B-Thinking",
-     "name": "multimodal_vlm_llm_kimi", # Renamed for clarity
-     "description": "Kimi-VL-A3B-Thinking is a multi-modal LLM that can understand text and images, and generate text with thinking processes. Ask any question about an image. Expects text and optionally an image.",
-     "api_name": "/chat"}, # Verify this api_name for Kimi spaces
 ]
 # Create tools from predefined Spaces with retry logic
 tools = []
-for space_info in spaces: # Renamed 'space' to 'space_info' to avoid conflict
     repo_id = space_info['repo_id']
     name = space_info.get('name', repo_id.split('/')[-1].replace('-', '_')) # Default name from repo_id
     description = space_info.get('description', f'A tool to interact with the Hugging Face Space: {repo_id}')
@@ -62,16 +68,17 @@ for space_info in spaces: # Renamed 'space' to 'space_info' to avoid conflict
         print(f"Failed to load predefined tool from {repo_id}. Error: {str(e)}. Continuing with available tools.")
 # Load tools from a Hugging Face Collection
-collection_slug = "jkorstad/tools-680127d17eed47e759549ff4"
 try:
-    collection = ToolCollection.from_hub(collection_slug=collection_slug)
     tools.extend(collection.tools)
     print(f"Successfully loaded tools from collection: {collection_slug}")
 except Exception as e:
     print(f"Warning: Failed to load collection {collection_slug}. Error: {str(e)}")
-# NEW: Tool for searching Hugging Face Spaces
 def search_hf_spaces(query: str, top_k: int = 3) -> str:
     """
     Searches Hugging Face Spaces for a given query and returns the top_k results.
@@ -82,23 +89,26 @@ def search_hf_spaces(query: str, top_k: int = 3) -> str:
     """
     try:
         print(f"Searching spaces with query: {query}, top_k: {top_k}")
         spaces_found = list(list_spaces(search=query, full=True, limit=top_k, sort="likes", direction=-1))
         if not spaces_found:
             return "No Spaces found for your query."
         results = "Found the following Spaces (sorted by likes):\n"
         for i, space_data in enumerate(spaces_found):
-            description = "No description."
-            if space_data.cardData and 'description' in space_data.cardData:
                 description = space_data.cardData['description']
-            elif space_data.title: # Fallback to title if description missing
                 description = space_data.title
             results += (
                 f"{i+1}. ID: {space_data.id}\n"
                 f"   Description: {description}\n"
-                f"   Likes: {space_data.likes}\n"
-                f"   Last Modified: {space_data.lastModified}\n\n"
             )
         results += ("\nTo use one of these, you can try creating a tool in the code like this: "
                     "my_new_tool = Tool.from_space(repo_id='SPACE_ID_HERE', name='custom_tool_name'). "
@@ -108,85 +118,97 @@ def search_hf_spaces(query: str, top_k: int = 3) -> str:
         return results
     except Exception as e:
         print(f"Error searching Spaces: {str(e)}")
         return f"Error searching Spaces: {str(e)}"
 space_search_tool = Tool(
     name="huggingface_space_searcher",
     description="Searches for Hugging Face Spaces that can perform a specific task. Input is a search query string (e.g., 'text to image', 'speech recognition'). Returns a list of Space IDs, their descriptions, and instructions on how to try using them.",
     func=search_hf_spaces,
-    # args_schema can be defined if you want Pydantic validation for args, e.g., using a class Query(BaseModel): query: str; top_k: int = 3
 )
 tools.append(space_search_tool)
-# Initialize the model
-model = HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct") # Or your preferred model
-# Create the agent with extended imports and a more detailed system prompt
 agent = CodeAgent(
     tools=tools,
     model=model,
     additional_authorized_imports=['PIL', 'Pillow', 'os', 'sys', 'numpy', 'huggingface_hub', 'gradio_client', 'uuid'],
     add_base_tools=True, # Includes web search, python interpreter
-    system_prompt="""You are a highly capable AI assistant. Your primary goal is to accomplish tasks using a variety of tools, prioritizing Hugging Face Spaces.
-    Follow these steps:
-    1.  **Understand the Request:** Carefully analyze the user's prompt. Identify the core task and any specific requirements or inputs.
-    2.  **Check Predefined Tools:** Review your list of available tools. If a predefined tool can directly address the request, use it.
-    3.  **Search for Spaces (If Needed):** If no predefined tool is suitable, use the `huggingface_space_searcher` tool. Provide a concise search query related to the task (e.g., "image classification", "voice cloning", "document question answering").
-    4.  **Select and Instantiate a Space Tool:** From the search results, choose the most promising Space. Attempt to create a tool from it using `Tool.from_space(repo_id='SELECTED_SPACE_ID', name='a_unique_tool_name')`. You might need to give it a unique name. If `Tool.from_space` fails, the Space might not be compatible, or you could try another one from the search results. Note that some Spaces might not have a public API or may require a specific `api_name` that `Tool.from_space` cannot infer; in such cases, you might not be able to use them.
-    5.  **Execute the Tool:** Call the tool (either predefined or dynamically created) with the necessary arguments.
-        * **File Inputs:** If the user uploads files, their paths will be available as global string variables: `input_image_path`, `input_audio_path`, `input_video_path`, `input_3d_model_path`, `input_file_path`. Before using these variables, check if they exist and are not None (e.g., `if 'input_image_path' in globals() and input_image_path:`). Pass these file paths as arguments to tools that require them. `Tool.from_space` handles file uploads for compatible Spaces when you pass the filepath string.
-        * **Chaining Tools:** If the task requires multiple steps, chain the tools together, passing the output of one tool as the input to the next.
-    6.  **Output Management:**
-        * If a tool generates a file (image, audio, etc.), save it to the current working directory using a unique filename (e.g., `output_filename = os.path.join(os.getcwd(), f"{uuid.uuid4()}.png")`).
-        * **Return the RESULT:** Your final response should be either:
-            * A string containing the direct text answer.
-            * The string path to the generated output file (e.g., `return output_filename`).
-    7.  **Clarity and Error Handling:** If you encounter issues (e.g., a Space tool fails, required inputs are missing), clearly explain the problem in your response. If a Space doesn't work, try to explain why or suggest an alternative if possible.
-    Example of dynamically using a Space after searching:
-    ```python
-    # user_prompt = "Find a space that can make an image of a cat and then use it."
-    # First, I would use huggingface_space_searcher to find relevant spaces.
-    # search_results = huggingface_space_searcher(query="text to image cat")
-    # print(search_results) # This would show me some options. Let's say 'user/cat-generator' is found.
-    # try:
-    #     cat_image_tool = Tool.from_space(repo_id="user/cat-generator", name="cat_generator_tool")
-    #     # The arguments for cat_image_tool depend on the Space. I'll assume it takes a 'prompt' argument.
-    #     image_path = cat_image_tool(prompt="A fluffy siamese cat")
-    #     # image_path should be a path to the generated image file
-    #     return image_path
-    # except Exception as e:
-    #     return f"Failed to use the cat generator Space: {e}"
-    ```
-    Always ensure your generated Python code is complete and directly callable. Use `print()` for debugging if necessary, but the final returned value should be the result or file path.
-    You have access to `os`, `uuid`, `PIL.Image`.
-    """
 )
 # Gradio interface function
-def gradio_interface(prompt, input_image_path, input_audio_path, input_video_path, input_3d_model_path, input_file_path, progress=gr.Progress(track_tqdm=True)):
     try:
         progress(0, desc="Initializing Agent...")
         # Prepare a dictionary of potential inputs for the agent's execution scope
-        agent_context_inputs = {"prompt": prompt}
         # These will be available as global variables in the agent's Python execution environment
         if input_image_path:
-            agent_context_inputs["input_image_path"] = str(input_image_path) # Ensure it's a string path
         if input_audio_path:
-            agent_context_inputs["input_audio_path"] = str(input_audio_path)
         if input_video_path:
-            agent_context_inputs["input_video_path"] = str(input_video_path)
         if input_3d_model_path:
-            agent_context_inputs["input_3d_model_path"] = str(input_3d_model_path) # Path to .glb or similar
         if input_file_path:
-            agent_context_inputs["input_file_path"] = str(input_file_path) # Path to PDF, TXT etc.
-        # The agent will use these global variables based on the system prompt's guidance
-        # The `prompt` variable is the main user query.
         progress(0.2, desc="Agent processing request...")
-        result = agent.run(**agent_context_inputs) # Pass main prompt and other inputs to be set in global scope
         progress(0.8, desc="Processing result...")
         # Default all outputs to invisible and None
@@ -196,7 +218,7 @@ def gradio_interface(prompt, input_image_path, input_audio_path, input_video_pat
             "path": gr.update(value=None, visible=False),
             "audio": gr.update(value=None, visible=False),
             "model3d": gr.update(value=None, visible=False),
-            "text": gr.update(value=None, visible=False),
         }
         if isinstance(result, str):
@@ -204,23 +226,23 @@ def gradio_interface(prompt, input_image_path, input_audio_path, input_video_pat
                 file_path = result
                 outputs["file"] = gr.update(value=file_path, visible=True)
                 outputs["path"] = gr.update(value=file_path, visible=True)
-                ext = file_path.lower().split('.')[-1]
-                if ext in ('png', 'jpg', 'jpeg', 'gif', 'webp'):
                     outputs["image"] = gr.update(value=file_path, visible=True)
-                elif ext in ('mp3', 'wav', 'ogg', 'flac'):
                     outputs["audio"] = gr.update(value=file_path, visible=True)
-                elif ext == 'glb': # Common format for Model3D
                     outputs["model3d"] = gr.update(value=file_path, visible=True)
                 else: # Other file types like PDF, TXT - user can download via file component
-                    outputs["text"] = gr.update(value=f"Output is a file (e.g., PDF, TXT): {os.path.basename(file_path)}. Download it above.", visible=True)
             else:
-                # Result is a string (e.g., text output from a tool)
                 outputs["text"] = gr.update(value=result, visible=True)
         elif result is None:
-            outputs["text"] = gr.update(value="Agent returned no result (None). Check logs if available.", visible=True)
         else: # Other types (e.g. if agent returns a dict or list by mistake)
-            outputs["text"] = gr.update(value=f"Unexpected result type: {type(result)}. Content: {str(result)}", visible=True)
         progress(1, desc="Done!")
         return (
             outputs["image"], outputs["file"], outputs["path"],
@@ -230,11 +252,11 @@ def gradio_interface(prompt, input_image_path, input_audio_path, input_video_pat
     except Exception as e:
         error_msg = f"An error occurred in the Gradio interface or agent execution: {str(e)}"
         print(error_msg) # Also print to console for server-side logs
-        # traceback.print_exc() # For more detailed debugging
         return (
-            gr.update(visible=False), gr.update(visible=False), gr.update(visible=False),
-            gr.update(visible=False), gr.update(visible=False),
-            gr.update(value=error_msg, visible=True)
         )
 # Create the Gradio app
@@ -246,31 +268,32 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
         prompt_input = gr.Textbox(
             label="Enter your prompt for the agent",
             placeholder="e.g., 'Generate an image of a futuristic city', 'Convert this text to speech: Hello world', or 'Search for a space that translates English to French and use it for: Good morning'",
-            lines=3
         )
     with gr.Accordion("Optional File Inputs (for tasks requiring them)", open=False):
         with gr.Row():
-            input_image = gr.Image(label="Image Input", type="filepath", sources=["upload", "clipboard"], elem_id="input_image")
-            input_audio = gr.Audio(label="Audio Input", type="filepath", sources=["upload", "microphone"], elem_id="input_audio")
         with gr.Row():
-            input_video = gr.Video(label="Video Input", type="filepath", sources=["upload"], elem_id="input_video")
-            input_model3d = gr.Model3D(label="3D Model Input (.glb)", type="filepath", elem_id="input_model3d") # Gradio Model3D component expects .glb usually
         with gr.Row():
-            input_file = gr.File(label="Generic File Input (PDF, TXT, etc.)", type="filepath", elem_id="input_file")
-    submit_button = gr.Button("🚀 Generate", variant="primary")
     gr.Markdown("### Outputs:")
     with gr.Row():
-        image_output = gr.Image(label="Image Output", interactive=False, visible=False, show_download_button=True)
-        audio_output = gr.Audio(label="Audio Output", interactive=False, visible=False, show_download_button=True)
     with gr.Row():
-        model3d_output = gr.Model3D(label="3D Model Output", interactive=False, visible=False, show_download_button=True)
-        text_output = gr.Textbox(label="Text / Log Output", interactive=False, visible=True, lines=5, max_lines=10) # Start visible for logs/text
     with gr.Row():
-        file_output = gr.File(label="Download File Output", interactive=False, visible=False)
-        path_output = gr.Textbox(label="Output File Path (Copyable)", interactive=True, visible=False) # Keep for copying if needed
     # Link button click to the interface function
     submit_button.click(
@@ -278,18 +301,22 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
         inputs=[prompt_input, input_image, input_audio, input_video, input_model3d, input_file],
         outputs=[image_output, file_output, path_output, audio_output, model3d_output, text_output]
     )
     gr.Examples(
         examples=[
             ["Generate an image of a happy robot coding on a laptop, cyberpunk style.", None, None, None, None, None],
             ["Convert the following text to speech: 'Smolagents are amazing for building AI applications.'", None, None, None, None, None],
             ["Search for a Hugging Face Space that can perform image captioning. Describe the first result.", None, None, None, None, None],
-            ["I have an image of a cat (you'll need to upload one). Find a space that can make it look like a painting and apply it.", "path/to/your/cat_image.png", None, None, None, None], # User would replace path or upload
         ],
         inputs=[prompt_input, input_image, input_audio, input_video, input_model3d, input_file],
-        label="Example Prompts (Note: For examples with file inputs, you'll need to upload a relevant file)"
     )
 # Launch the app
 if __name__ == "__main__":
-    app.launch(debug=True) # Enable debug for more detailed logs during development

 import gradio as gr
 import os
 import shutil
+from gradio_client import Client, handle_file # handle_file might be used by the agent
+# Use InferenceClientModel instead of HfApiModel
+from smolagents import Tool, CodeAgent, InferenceClientModel, ToolCollection
 import uuid
+import httpx # Often a dependency for HTTP clients, good to have
 from tenacity import retry, stop_after_attempt, wait_exponential
+from huggingface_hub import list_spaces
 from PIL import Image # For potential image manipulation by the agent
+import traceback # For more detailed error logging if needed
+# Define initial tools from Spaces
+# Commenting out problematic spaces for now.
+# You'll need to verify their api_name or compatibility if you re-enable them.
+# Ensure the api_name is correct if you uncomment these.
+# Visit the HF Space page and look for "API - via gradio_client" for hints.
 spaces = [
+    {"repo_id": "black-forest-labs/FLUX.1-schnell",
+     "name": "image_generator_flux_schnell",
      "description": "Generate an image from a prompt using FLUX.1-schnell. Expects a text prompt.",
      "api_name": "/infer"},
+    {"repo_id": "Remsky/Kokoro-TTS-Zero",
+     "name": "text_to_speech_kokoro",
      "description": "Generates speech (audio) from input text using Kokoro TTS Zero. Expects text input.",
      "api_name": "/generate_speech_from_ui"},
+    {"repo_id": "jamesliu1217/EasyControl_Ghibli",
+     "name": "ghibli_style_image_control",
      "description": "Create Ghibli style image from an input image using EasyControl_Ghibli. Expects an image and a prompt/control parameters.",
      "api_name": "/single_condition_generate_image"},
+    {"repo_id": "opendatalab/MinerU",
+     "name": "pdf_text_extraction_mineru",
      "description": "Extracts the text of a PDF up to 20 pages long using MinerU. Expects a PDF file.",
      "api_name": "/to_pdf"},
+    # {"repo_id": "InstantX/InstantCharacter",
+    #  "name": "instant_character_customization",
+    #  "description": "Personalize Any Characters with a Scalable Diffusion Transformer Framework to any style or pose using InstantCharacter. Expects an input image and potentially pose/style images or prompts.",
+    #  "api_name": "/predict"}, # Example: Verify this api_name if re-enabling
+    # {"repo_id": "fotographerai/Zen-Style-Shape",
+    #  "name": "img_to_img_style_transfer_zen_shape",
+    #  "description": "Flux[dev] Redux + Flux[dev] Canny. Implements a custom image-to-image style transfer pipeline blending style from Image A to structure of Image B. Expects two images.",
+    #  "api_name": "/predict"}, # Example: Verify this api_name if re-enabling
+    # {"repo_id": "moonshotai/Kimi-VL-A3B-Thinking",
+    #  "name": "multimodal_vlm_llm_kimi",
+    #  "description": "Kimi-VL-A3B-Thinking is a multi-modal LLM that can understand text and images, and generate text with thinking processes. Ask any question about an image. Expects text and optionally an image.",
+    #  "api_name": "/chat"}, # Example: Verify this api_name if re-enabling
 ]
 # Create tools from predefined Spaces with retry logic
 tools = []
+for space_info in spaces:
     repo_id = space_info['repo_id']
     name = space_info.get('name', repo_id.split('/')[-1].replace('-', '_')) # Default name from repo_id
     description = space_info.get('description', f'A tool to interact with the Hugging Face Space: {repo_id}')
         print(f"Failed to load predefined tool from {repo_id}. Error: {str(e)}. Continuing with available tools.")
 # Load tools from a Hugging Face Collection
+collection_slug = "jkorstad/tools-680127d17eed47e759549ff4"
 try:
+    # Added trust_remote_code=True
+    collection = ToolCollection.from_hub(collection_slug=collection_slug, trust_remote_code=True)
     tools.extend(collection.tools)
     print(f"Successfully loaded tools from collection: {collection_slug}")
 except Exception as e:
     print(f"Warning: Failed to load collection {collection_slug}. Error: {str(e)}")
+# Tool for searching Hugging Face Spaces
 def search_hf_spaces(query: str, top_k: int = 3) -> str:
     """
     Searches Hugging Face Spaces for a given query and returns the top_k results.
     """
     try:
         print(f"Searching spaces with query: {query}, top_k: {top_k}")
+        # Using list_spaces, ensure it's imported: from huggingface_hub import list_spaces
+        # full=True gives more metadata, sort by likes, direction=-1 for descending
         spaces_found = list(list_spaces(search=query, full=True, limit=top_k, sort="likes", direction=-1))
         if not spaces_found:
             return "No Spaces found for your query."
         results = "Found the following Spaces (sorted by likes):\n"
         for i, space_data in enumerate(spaces_found):
+            # Safely access attributes, as they might not always be present
+            description = "No description provided."
+            if hasattr(space_data, 'cardData') and space_data.cardData and 'description' in space_data.cardData:
                 description = space_data.cardData['description']
+            elif hasattr(space_data, 'title') and space_data.title: # Fallback to title
                 description = space_data.title
             results += (
                 f"{i+1}. ID: {space_data.id}\n"
                 f"   Description: {description}\n"
+                f"   Likes: {space_data.likes if hasattr(space_data, 'likes') else 'N/A'}\n"
+                f"   Last Modified: {space_data.lastModified if hasattr(space_data, 'lastModified') else 'N/A'}\n\n"
             )
         results += ("\nTo use one of these, you can try creating a tool in the code like this: "
                     "my_new_tool = Tool.from_space(repo_id='SPACE_ID_HERE', name='custom_tool_name'). "
         return results
     except Exception as e:
         print(f"Error searching Spaces: {str(e)}")
+        # traceback.print_exc() # Uncomment for detailed search error debugging
         return f"Error searching Spaces: {str(e)}"
 space_search_tool = Tool(
     name="huggingface_space_searcher",
     description="Searches for Hugging Face Spaces that can perform a specific task. Input is a search query string (e.g., 'text to image', 'speech recognition'). Returns a list of Space IDs, their descriptions, and instructions on how to try using them.",
     func=search_hf_spaces,
 )
 tools.append(space_search_tool)
+# Initialize the model - Use InferenceClientModel
+model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct") # Or your preferred model
+# Create the agent - Removed system_prompt from constructor
 agent = CodeAgent(
     tools=tools,
     model=model,
     additional_authorized_imports=['PIL', 'Pillow', 'os', 'sys', 'numpy', 'huggingface_hub', 'gradio_client', 'uuid'],
     add_base_tools=True, # Includes web search, python interpreter
 )
+# This is the detailed instruction set that was previously in system_prompt
+AGENT_INSTRUCTIONS = """You are a highly capable AI assistant. Your primary goal is to accomplish tasks using a variety of tools, prioritizing Hugging Face Spaces.
+Follow these steps:
+1.  **Understand the Request:** Carefully analyze the user's prompt (which will follow these instructions). Identify the core task and any specific requirements or inputs.
+2.  **Check Predefined Tools:** Review your list of available tools. If a predefined tool can directly address the request, use it.
+3.  **Search for Spaces (If Needed):** If no predefined tool is suitable, use the `huggingface_space_searcher` tool. Provide a concise search query related to the task (e.g., "image classification", "voice cloning", "document question answering").
+4.  **Select and Instantiate a Space Tool:** From the search results, choose the most promising Space. Attempt to create a tool from it using `Tool.from_space(repo_id='SELECTED_SPACE_ID', name='a_unique_tool_name')`. You might need to give it a unique name. If `Tool.from_space` fails, the Space might not be compatible, or you could try another one from the search results. Note that some Spaces might not have a public API or may require a specific `api_name` that `Tool.from_space` cannot infer; in such cases, you might not be able to use them.
+5.  **Execute the Tool:** Call the tool (either predefined or dynamically created) with the necessary arguments.
+    * **File Inputs:** If the user uploads files, their paths will be available as global string variables: `input_image_path`, `input_audio_path`, `input_video_path`, `input_3d_model_path`, `input_file_path`. Before using these variables, check if they exist and are not None (e.g., `if 'input_image_path' in globals() and input_image_path:`). Pass these file paths as arguments to tools that require them. `Tool.from_space` handles file uploads for compatible Spaces when you pass the filepath string.
+    * **Chaining Tools:** If the task requires multiple steps, chain the tools together, passing the output of one tool as the input to the next.
+6.  **Output Management:**
+    * If a tool generates a file (image, audio, etc.), save it to the current working directory using a unique filename (e.g., `output_filename = os.path.join(os.getcwd(), f"{uuid.uuid4()}.png")`).
+    * **Return the RESULT:** Your final response should be either:
+        * A string containing the direct text answer.
+        * The string path to the generated output file (e.g., `return output_filename`).
+7.  **Clarity and Error Handling:** If you encounter issues (e.g., a Space tool fails, required inputs are missing), clearly explain the problem in your response. If a Space doesn't work, try to explain why or suggest an alternative if possible.
+Example of dynamically using a Space after searching:
+```python
+# This is an example of how I, the agent, would think and act.
+# User's actual prompt would follow these instructions.
+# Example user prompt: "Find a space that can make an image of a cat and then use it."
+#
+# My thought process:
+# 1. The user wants an image of a cat, and wants me to find a Space for it.
+# 2. I'll use `huggingface_space_searcher`.
+# search_results = huggingface_space_searcher(query="text to image cat")
+# print(search_results) # This would show me some options. Let's say 'user/cat-generator' is found.
+# try:
+#     cat_image_tool = Tool.from_space(repo_id="user/cat-generator", name="cat_generator_tool")
+#     # The arguments for cat_image_tool depend on the Space. I'll assume it takes a 'prompt' argument.
+#     image_path = cat_image_tool(prompt="A fluffy siamese cat")
+#     # image_path should be a path to the generated image file
+#     return image_path
+# except Exception as e:
+#     return f"Failed to use the cat generator Space: {e}"
+```
+Always ensure your generated Python code is complete and directly callable. Use `print()` for debugging if necessary, but the final returned value should be the result or file path.
+You have access to `os`, `uuid`, `PIL.Image`.
+"""
 # Gradio interface function
+def gradio_interface(user_prompt, input_image_path, input_audio_path, input_video_path, input_3d_model_path, input_file_path, progress=gr.Progress(track_tqdm=True)):
     try:
         progress(0, desc="Initializing Agent...")
+        # Combine instructions with the user's prompt
+        full_prompt_with_instructions = f"{AGENT_INSTRUCTIONS}\n\nUSER PROMPT: {user_prompt}"
         # Prepare a dictionary of potential inputs for the agent's execution scope
         # These will be available as global variables in the agent's Python execution environment
+        # when agent.run is called with keyword arguments.
+        agent_kwargs = {}
         if input_image_path:
+            agent_kwargs["input_image_path"] = str(input_image_path) # Ensure it's a string path
         if input_audio_path:
+            agent_kwargs["input_audio_path"] = str(input_audio_path)
         if input_video_path:
+            agent_kwargs["input_video_path"] = str(input_video_path)
         if input_3d_model_path:
+            agent_kwargs["input_3d_model_path"] = str(input_3d_model_path)
         if input_file_path:
+            agent_kwargs["input_file_path"] = str(input_file_path)
         progress(0.2, desc="Agent processing request...")
+        # The first argument to agent.run is the main prompt.
+        # Other kwargs are set as global variables in the agent's execution context.
+        result = agent.run(full_prompt_with_instructions, **agent_kwargs)
         progress(0.8, desc="Processing result...")
         # Default all outputs to invisible and None
             "path": gr.update(value=None, visible=False),
             "audio": gr.update(value=None, visible=False),
             "model3d": gr.update(value=None, visible=False),
+            "text": gr.update(value=None, visible=True), # Text output is often default
         }
         if isinstance(result, str):
                 file_path = result
                 outputs["file"] = gr.update(value=file_path, visible=True)
                 outputs["path"] = gr.update(value=file_path, visible=True)
+                ext = os.path.splitext(file_path.lower())[1] # Get extension like .png
+                if ext in ('.png', '.jpg', '.jpeg', '.gif', '.webp'):
                     outputs["image"] = gr.update(value=file_path, visible=True)
+                elif ext in ('.mp3', '.wav', '.ogg', '.flac'):
                     outputs["audio"] = gr.update(value=file_path, visible=True)
+                elif ext == '.glb': # Common format for Model3D
                     outputs["model3d"] = gr.update(value=file_path, visible=True)
                 else: # Other file types like PDF, TXT - user can download via file component
+                    outputs["text"] = gr.update(value=f"Output is a file: {os.path.basename(file_path)}. Download it using the 'Download File Output' component.", visible=True)
             else:
+                # Result is a string (e.g., text output from a tool or an error message from the agent)
                 outputs["text"] = gr.update(value=result, visible=True)
         elif result is None:
+            outputs["text"] = gr.update(value="Agent returned no result (None). This might indicate an issue or that the task didn't produce a specific output string/file.", visible=True)
         else: # Other types (e.g. if agent returns a dict or list by mistake)
+            outputs["text"] = gr.update(value=f"Unexpected result type from agent: {type(result)}. Content: {str(result)}", visible=True)
         progress(1, desc="Done!")
         return (
             outputs["image"], outputs["file"], outputs["path"],
     except Exception as e:
         error_msg = f"An error occurred in the Gradio interface or agent execution: {str(e)}"
         print(error_msg) # Also print to console for server-side logs
+        traceback.print_exc() # For more detailed debugging
         return (
+            gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False),
+            gr.update(value=None, visible=False), gr.update(value=None, visible=False),
+            gr.update(value=error_msg, visible=True) # Show error in the text output
         )
 # Create the Gradio app
         prompt_input = gr.Textbox(
             label="Enter your prompt for the agent",
             placeholder="e.g., 'Generate an image of a futuristic city', 'Convert this text to speech: Hello world', or 'Search for a space that translates English to French and use it for: Good morning'",
+            lines=3,
+            elem_id="user_prompt_textbox"
         )
     with gr.Accordion("Optional File Inputs (for tasks requiring them)", open=False):
         with gr.Row():
+            input_image = gr.Image(label="Image Input", type="filepath", sources=["upload", "clipboard"], elem_id="input_image_upload")
+            input_audio = gr.Audio(label="Audio Input", type="filepath", sources=["upload", "microphone"], elem_id="input_audio_upload")
         with gr.Row():
+            input_video = gr.Video(label="Video Input", type="filepath", sources=["upload"], elem_id="input_video_upload") # Gradio Video component might have limitations
+            input_model3d = gr.Model3D(label="3D Model Input (.glb, .obj, etc.)", type="filepath", elem_id="input_model3d_upload") # Check Gradio docs for supported Model3D types
         with gr.Row():
+            input_file = gr.File(label="Generic File Input (PDF, TXT, etc.)", type="filepath", elem_id="input_file_upload")
+    submit_button = gr.Button("🚀 Generate", variant="primary", elem_id="submit_button_generate")
     gr.Markdown("### Outputs:")
     with gr.Row():
+        image_output = gr.Image(label="Image Output", interactive=False, visible=False, show_download_button=True, elem_id="output_image_display")
+        audio_output = gr.Audio(label="Audio Output", interactive=False, visible=False, show_download_button=True, elem_id="output_audio_display")
     with gr.Row():
+        model3d_output = gr.Model3D(label="3D Model Output", interactive=False, visible=False, show_download_button=True, elem_id="output_model3d_display")
+        text_output = gr.Textbox(label="Text / Log Output", interactive=False, visible=True, lines=5, max_lines=20, elem_id="output_text_log") # Start visible for logs/text
     with gr.Row():
+        file_output = gr.File(label="Download File Output", interactive=False, visible=False, elem_id="output_file_download")
+        path_output = gr.Textbox(label="Output File Path (Copyable)", interactive=False, visible=False, elem_id="output_file_path_text") # Keep for copying if needed
     # Link button click to the interface function
     submit_button.click(
         inputs=[prompt_input, input_image, input_audio, input_video, input_model3d, input_file],
         outputs=[image_output, file_output, path_output, audio_output, model3d_output, text_output]
     )
     gr.Examples(
         examples=[
             ["Generate an image of a happy robot coding on a laptop, cyberpunk style.", None, None, None, None, None],
             ["Convert the following text to speech: 'Smolagents are amazing for building AI applications.'", None, None, None, None, None],
             ["Search for a Hugging Face Space that can perform image captioning. Describe the first result.", None, None, None, None, None],
+            # For examples with file inputs, the user needs to upload a file manually.
+            # The string path here is just a placeholder for the example text.
+            ["I have an image of a cat. Find a space that can make it look like a painting and apply it. You will need to use the 'input_image_path' variable which will contain the path to the uploaded cat image.", "path/to/your/cat_image.png", None, None, None, None],
         ],
         inputs=[prompt_input, input_image, input_audio, input_video, input_model3d, input_file],
+        label="Example Prompts (Note: For examples with file inputs, you'll need to upload a relevant file first using the 'Optional File Inputs' section)"
     )
 # Launch the app
 if __name__ == "__main__":
+    # share=True can be used to create a public link if you're running this locally and want to test from another device.
+    # debug=True provides more detailed Gradio logs.
+    app.launch(debug=True)