Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,49 +1,55 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import os
|
| 3 |
import shutil
|
| 4 |
-
from gradio_client import Client, handle_file # handle_file might be used by the agent
|
| 5 |
-
|
|
|
|
| 6 |
import uuid
|
| 7 |
-
import httpx
|
| 8 |
from tenacity import retry, stop_after_attempt, wait_exponential
|
| 9 |
-
from huggingface_hub import list_spaces
|
| 10 |
from PIL import Image # For potential image manipulation by the agent
|
|
|
|
| 11 |
|
| 12 |
-
# Define initial tools from Spaces
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
spaces = [
|
| 14 |
-
{"repo_id": "black-forest-labs/FLUX.1-schnell",
|
| 15 |
-
"name": "image_generator_flux_schnell",
|
| 16 |
"description": "Generate an image from a prompt using FLUX.1-schnell. Expects a text prompt.",
|
| 17 |
"api_name": "/infer"},
|
| 18 |
-
{"repo_id": "Remsky/Kokoro-TTS-Zero",
|
| 19 |
-
"name": "text_to_speech_kokoro",
|
| 20 |
"description": "Generates speech (audio) from input text using Kokoro TTS Zero. Expects text input.",
|
| 21 |
"api_name": "/generate_speech_from_ui"},
|
| 22 |
-
{"repo_id": "jamesliu1217/EasyControl_Ghibli",
|
| 23 |
-
"name": "ghibli_style_image_control",
|
| 24 |
"description": "Create Ghibli style image from an input image using EasyControl_Ghibli. Expects an image and a prompt/control parameters.",
|
| 25 |
"api_name": "/single_condition_generate_image"},
|
| 26 |
-
{"repo_id": "opendatalab/MinerU",
|
| 27 |
-
"name": "pdf_text_extraction_mineru",
|
| 28 |
"description": "Extracts the text of a PDF up to 20 pages long using MinerU. Expects a PDF file.",
|
| 29 |
"api_name": "/to_pdf"},
|
| 30 |
-
{"repo_id": "InstantX/InstantCharacter",
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
{"repo_id": "fotographerai/Zen-Style-Shape",
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
{"repo_id": "moonshotai/Kimi-VL-A3B-Thinking",
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
]
|
| 43 |
|
| 44 |
# Create tools from predefined Spaces with retry logic
|
| 45 |
tools = []
|
| 46 |
-
for space_info in spaces:
|
| 47 |
repo_id = space_info['repo_id']
|
| 48 |
name = space_info.get('name', repo_id.split('/')[-1].replace('-', '_')) # Default name from repo_id
|
| 49 |
description = space_info.get('description', f'A tool to interact with the Hugging Face Space: {repo_id}')
|
|
@@ -62,16 +68,17 @@ for space_info in spaces: # Renamed 'space' to 'space_info' to avoid conflict
|
|
| 62 |
print(f"Failed to load predefined tool from {repo_id}. Error: {str(e)}. Continuing with available tools.")
|
| 63 |
|
| 64 |
# Load tools from a Hugging Face Collection
|
| 65 |
-
collection_slug = "jkorstad/tools-680127d17eed47e759549ff4"
|
| 66 |
try:
|
| 67 |
-
|
|
|
|
| 68 |
tools.extend(collection.tools)
|
| 69 |
print(f"Successfully loaded tools from collection: {collection_slug}")
|
| 70 |
except Exception as e:
|
| 71 |
print(f"Warning: Failed to load collection {collection_slug}. Error: {str(e)}")
|
| 72 |
|
| 73 |
|
| 74 |
-
#
|
| 75 |
def search_hf_spaces(query: str, top_k: int = 3) -> str:
|
| 76 |
"""
|
| 77 |
Searches Hugging Face Spaces for a given query and returns the top_k results.
|
|
@@ -82,23 +89,26 @@ def search_hf_spaces(query: str, top_k: int = 3) -> str:
|
|
| 82 |
"""
|
| 83 |
try:
|
| 84 |
print(f"Searching spaces with query: {query}, top_k: {top_k}")
|
|
|
|
|
|
|
| 85 |
spaces_found = list(list_spaces(search=query, full=True, limit=top_k, sort="likes", direction=-1))
|
| 86 |
if not spaces_found:
|
| 87 |
return "No Spaces found for your query."
|
| 88 |
-
|
| 89 |
results = "Found the following Spaces (sorted by likes):\n"
|
| 90 |
for i, space_data in enumerate(spaces_found):
|
| 91 |
-
|
| 92 |
-
|
|
|
|
| 93 |
description = space_data.cardData['description']
|
| 94 |
-
elif space_data.title: # Fallback to title
|
| 95 |
description = space_data.title
|
| 96 |
|
| 97 |
results += (
|
| 98 |
f"{i+1}. ID: {space_data.id}\n"
|
| 99 |
f" Description: {description}\n"
|
| 100 |
-
f" Likes: {space_data.likes}\n"
|
| 101 |
-
f" Last Modified: {space_data.lastModified}\n\n"
|
| 102 |
)
|
| 103 |
results += ("\nTo use one of these, you can try creating a tool in the code like this: "
|
| 104 |
"my_new_tool = Tool.from_space(repo_id='SPACE_ID_HERE', name='custom_tool_name'). "
|
|
@@ -108,85 +118,97 @@ def search_hf_spaces(query: str, top_k: int = 3) -> str:
|
|
| 108 |
return results
|
| 109 |
except Exception as e:
|
| 110 |
print(f"Error searching Spaces: {str(e)}")
|
|
|
|
| 111 |
return f"Error searching Spaces: {str(e)}"
|
| 112 |
|
| 113 |
space_search_tool = Tool(
|
| 114 |
name="huggingface_space_searcher",
|
| 115 |
description="Searches for Hugging Face Spaces that can perform a specific task. Input is a search query string (e.g., 'text to image', 'speech recognition'). Returns a list of Space IDs, their descriptions, and instructions on how to try using them.",
|
| 116 |
func=search_hf_spaces,
|
| 117 |
-
# args_schema can be defined if you want Pydantic validation for args, e.g., using a class Query(BaseModel): query: str; top_k: int = 3
|
| 118 |
)
|
| 119 |
tools.append(space_search_tool)
|
| 120 |
|
| 121 |
|
| 122 |
-
# Initialize the model
|
| 123 |
-
model =
|
| 124 |
|
| 125 |
-
# Create the agent
|
| 126 |
agent = CodeAgent(
|
| 127 |
tools=tools,
|
| 128 |
model=model,
|
| 129 |
additional_authorized_imports=['PIL', 'Pillow', 'os', 'sys', 'numpy', 'huggingface_hub', 'gradio_client', 'uuid'],
|
| 130 |
add_base_tools=True, # Includes web search, python interpreter
|
| 131 |
-
system_prompt="""You are a highly capable AI assistant. Your primary goal is to accomplish tasks using a variety of tools, prioritizing Hugging Face Spaces.
|
| 132 |
-
|
| 133 |
-
Follow these steps:
|
| 134 |
-
1. **Understand the Request:** Carefully analyze the user's prompt. Identify the core task and any specific requirements or inputs.
|
| 135 |
-
2. **Check Predefined Tools:** Review your list of available tools. If a predefined tool can directly address the request, use it.
|
| 136 |
-
3. **Search for Spaces (If Needed):** If no predefined tool is suitable, use the `huggingface_space_searcher` tool. Provide a concise search query related to the task (e.g., "image classification", "voice cloning", "document question answering").
|
| 137 |
-
4. **Select and Instantiate a Space Tool:** From the search results, choose the most promising Space. Attempt to create a tool from it using `Tool.from_space(repo_id='SELECTED_SPACE_ID', name='a_unique_tool_name')`. You might need to give it a unique name. If `Tool.from_space` fails, the Space might not be compatible, or you could try another one from the search results. Note that some Spaces might not have a public API or may require a specific `api_name` that `Tool.from_space` cannot infer; in such cases, you might not be able to use them.
|
| 138 |
-
5. **Execute the Tool:** Call the tool (either predefined or dynamically created) with the necessary arguments.
|
| 139 |
-
* **File Inputs:** If the user uploads files, their paths will be available as global string variables: `input_image_path`, `input_audio_path`, `input_video_path`, `input_3d_model_path`, `input_file_path`. Before using these variables, check if they exist and are not None (e.g., `if 'input_image_path' in globals() and input_image_path:`). Pass these file paths as arguments to tools that require them. `Tool.from_space` handles file uploads for compatible Spaces when you pass the filepath string.
|
| 140 |
-
* **Chaining Tools:** If the task requires multiple steps, chain the tools together, passing the output of one tool as the input to the next.
|
| 141 |
-
6. **Output Management:**
|
| 142 |
-
* If a tool generates a file (image, audio, etc.), save it to the current working directory using a unique filename (e.g., `output_filename = os.path.join(os.getcwd(), f"{uuid.uuid4()}.png")`).
|
| 143 |
-
* **Return the RESULT:** Your final response should be either:
|
| 144 |
-
* A string containing the direct text answer.
|
| 145 |
-
* The string path to the generated output file (e.g., `return output_filename`).
|
| 146 |
-
7. **Clarity and Error Handling:** If you encounter issues (e.g., a Space tool fails, required inputs are missing), clearly explain the problem in your response. If a Space doesn't work, try to explain why or suggest an alternative if possible.
|
| 147 |
-
|
| 148 |
-
Example of dynamically using a Space after searching:
|
| 149 |
-
```python
|
| 150 |
-
# user_prompt = "Find a space that can make an image of a cat and then use it."
|
| 151 |
-
# First, I would use huggingface_space_searcher to find relevant spaces.
|
| 152 |
-
# search_results = huggingface_space_searcher(query="text to image cat")
|
| 153 |
-
# print(search_results) # This would show me some options. Let's say 'user/cat-generator' is found.
|
| 154 |
-
# try:
|
| 155 |
-
# cat_image_tool = Tool.from_space(repo_id="user/cat-generator", name="cat_generator_tool")
|
| 156 |
-
# # The arguments for cat_image_tool depend on the Space. I'll assume it takes a 'prompt' argument.
|
| 157 |
-
# image_path = cat_image_tool(prompt="A fluffy siamese cat")
|
| 158 |
-
# # image_path should be a path to the generated image file
|
| 159 |
-
# return image_path
|
| 160 |
-
# except Exception as e:
|
| 161 |
-
# return f"Failed to use the cat generator Space: {e}"
|
| 162 |
-
```
|
| 163 |
-
Always ensure your generated Python code is complete and directly callable. Use `print()` for debugging if necessary, but the final returned value should be the result or file path.
|
| 164 |
-
You have access to `os`, `uuid`, `PIL.Image`.
|
| 165 |
-
"""
|
| 166 |
)
|
| 167 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
# Gradio interface function
|
| 169 |
-
def gradio_interface(
|
| 170 |
try:
|
| 171 |
progress(0, desc="Initializing Agent...")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
# Prepare a dictionary of potential inputs for the agent's execution scope
|
| 173 |
-
agent_context_inputs = {"prompt": prompt}
|
| 174 |
# These will be available as global variables in the agent's Python execution environment
|
|
|
|
|
|
|
| 175 |
if input_image_path:
|
| 176 |
-
|
| 177 |
if input_audio_path:
|
| 178 |
-
|
| 179 |
if input_video_path:
|
| 180 |
-
|
| 181 |
if input_3d_model_path:
|
| 182 |
-
|
| 183 |
if input_file_path:
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
# The agent will use these global variables based on the system prompt's guidance
|
| 187 |
-
# The `prompt` variable is the main user query.
|
| 188 |
progress(0.2, desc="Agent processing request...")
|
| 189 |
-
|
|
|
|
|
|
|
| 190 |
|
| 191 |
progress(0.8, desc="Processing result...")
|
| 192 |
# Default all outputs to invisible and None
|
|
@@ -196,7 +218,7 @@ def gradio_interface(prompt, input_image_path, input_audio_path, input_video_pat
|
|
| 196 |
"path": gr.update(value=None, visible=False),
|
| 197 |
"audio": gr.update(value=None, visible=False),
|
| 198 |
"model3d": gr.update(value=None, visible=False),
|
| 199 |
-
"text": gr.update(value=None, visible=
|
| 200 |
}
|
| 201 |
|
| 202 |
if isinstance(result, str):
|
|
@@ -204,23 +226,23 @@ def gradio_interface(prompt, input_image_path, input_audio_path, input_video_pat
|
|
| 204 |
file_path = result
|
| 205 |
outputs["file"] = gr.update(value=file_path, visible=True)
|
| 206 |
outputs["path"] = gr.update(value=file_path, visible=True)
|
| 207 |
-
ext = file_path.lower()
|
| 208 |
-
if ext in ('png', 'jpg', 'jpeg', 'gif', 'webp'):
|
| 209 |
outputs["image"] = gr.update(value=file_path, visible=True)
|
| 210 |
-
elif ext in ('mp3', 'wav', 'ogg', 'flac'):
|
| 211 |
outputs["audio"] = gr.update(value=file_path, visible=True)
|
| 212 |
-
elif ext == 'glb': # Common format for Model3D
|
| 213 |
outputs["model3d"] = gr.update(value=file_path, visible=True)
|
| 214 |
else: # Other file types like PDF, TXT - user can download via file component
|
| 215 |
-
outputs["text"] = gr.update(value=f"Output is a file
|
| 216 |
else:
|
| 217 |
-
# Result is a string (e.g., text output from a tool)
|
| 218 |
outputs["text"] = gr.update(value=result, visible=True)
|
| 219 |
elif result is None:
|
| 220 |
-
outputs["text"] = gr.update(value="Agent returned no result (None).
|
| 221 |
else: # Other types (e.g. if agent returns a dict or list by mistake)
|
| 222 |
-
outputs["text"] = gr.update(value=f"Unexpected result type: {type(result)}. Content: {str(result)}", visible=True)
|
| 223 |
-
|
| 224 |
progress(1, desc="Done!")
|
| 225 |
return (
|
| 226 |
outputs["image"], outputs["file"], outputs["path"],
|
|
@@ -230,11 +252,11 @@ def gradio_interface(prompt, input_image_path, input_audio_path, input_video_pat
|
|
| 230 |
except Exception as e:
|
| 231 |
error_msg = f"An error occurred in the Gradio interface or agent execution: {str(e)}"
|
| 232 |
print(error_msg) # Also print to console for server-side logs
|
| 233 |
-
|
| 234 |
return (
|
| 235 |
-
gr.update(visible=False), gr.update(visible=False), gr.update(visible=False),
|
| 236 |
-
gr.update(visible=False), gr.update(visible=False),
|
| 237 |
-
gr.update(value=error_msg, visible=True)
|
| 238 |
)
|
| 239 |
|
| 240 |
# Create the Gradio app
|
|
@@ -246,31 +268,32 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
|
|
| 246 |
prompt_input = gr.Textbox(
|
| 247 |
label="Enter your prompt for the agent",
|
| 248 |
placeholder="e.g., 'Generate an image of a futuristic city', 'Convert this text to speech: Hello world', or 'Search for a space that translates English to French and use it for: Good morning'",
|
| 249 |
-
lines=3
|
|
|
|
| 250 |
)
|
| 251 |
-
|
| 252 |
with gr.Accordion("Optional File Inputs (for tasks requiring them)", open=False):
|
| 253 |
with gr.Row():
|
| 254 |
-
input_image = gr.Image(label="Image Input", type="filepath", sources=["upload", "clipboard"], elem_id="
|
| 255 |
-
input_audio = gr.Audio(label="Audio Input", type="filepath", sources=["upload", "microphone"], elem_id="
|
| 256 |
with gr.Row():
|
| 257 |
-
input_video = gr.Video(label="Video Input", type="filepath", sources=["upload"], elem_id="
|
| 258 |
-
input_model3d = gr.Model3D(label="3D Model Input (.glb)", type="filepath", elem_id="
|
| 259 |
with gr.Row():
|
| 260 |
-
input_file = gr.File(label="Generic File Input (PDF, TXT, etc.)", type="filepath", elem_id="
|
| 261 |
|
| 262 |
-
submit_button = gr.Button("π Generate", variant="primary")
|
| 263 |
|
| 264 |
gr.Markdown("### Outputs:")
|
| 265 |
with gr.Row():
|
| 266 |
-
image_output = gr.Image(label="Image Output", interactive=False, visible=False, show_download_button=True)
|
| 267 |
-
audio_output = gr.Audio(label="Audio Output", interactive=False, visible=False, show_download_button=True)
|
| 268 |
with gr.Row():
|
| 269 |
-
model3d_output = gr.Model3D(label="3D Model Output", interactive=False, visible=False, show_download_button=True)
|
| 270 |
-
text_output = gr.Textbox(label="Text / Log Output", interactive=False, visible=True, lines=5, max_lines=
|
| 271 |
with gr.Row():
|
| 272 |
-
file_output = gr.File(label="Download File Output", interactive=False, visible=False)
|
| 273 |
-
path_output = gr.Textbox(label="Output File Path (Copyable)", interactive=
|
| 274 |
|
| 275 |
# Link button click to the interface function
|
| 276 |
submit_button.click(
|
|
@@ -278,18 +301,22 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
|
|
| 278 |
inputs=[prompt_input, input_image, input_audio, input_video, input_model3d, input_file],
|
| 279 |
outputs=[image_output, file_output, path_output, audio_output, model3d_output, text_output]
|
| 280 |
)
|
| 281 |
-
|
| 282 |
gr.Examples(
|
| 283 |
examples=[
|
| 284 |
["Generate an image of a happy robot coding on a laptop, cyberpunk style.", None, None, None, None, None],
|
| 285 |
["Convert the following text to speech: 'Smolagents are amazing for building AI applications.'", None, None, None, None, None],
|
| 286 |
["Search for a Hugging Face Space that can perform image captioning. Describe the first result.", None, None, None, None, None],
|
| 287 |
-
|
|
|
|
|
|
|
| 288 |
],
|
| 289 |
inputs=[prompt_input, input_image, input_audio, input_video, input_model3d, input_file],
|
| 290 |
-
label="Example Prompts (Note: For examples with file inputs, you'll need to upload a relevant file)"
|
| 291 |
)
|
| 292 |
|
| 293 |
# Launch the app
|
| 294 |
if __name__ == "__main__":
|
| 295 |
-
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import os
|
| 3 |
import shutil
|
| 4 |
+
from gradio_client import Client, handle_file # handle_file might be used by the agent
|
| 5 |
+
# Use InferenceClientModel instead of HfApiModel
|
| 6 |
+
from smolagents import Tool, CodeAgent, InferenceClientModel, ToolCollection
|
| 7 |
import uuid
|
| 8 |
+
import httpx # Often a dependency for HTTP clients, good to have
|
| 9 |
from tenacity import retry, stop_after_attempt, wait_exponential
|
| 10 |
+
from huggingface_hub import list_spaces
|
| 11 |
from PIL import Image # For potential image manipulation by the agent
|
| 12 |
+
import traceback # For more detailed error logging if needed
|
| 13 |
|
| 14 |
+
# Define initial tools from Spaces
|
| 15 |
+
# Commenting out problematic spaces for now.
|
| 16 |
+
# You'll need to verify their api_name or compatibility if you re-enable them.
|
| 17 |
+
# Ensure the api_name is correct if you uncomment these.
|
| 18 |
+
# Visit the HF Space page and look for "API - via gradio_client" for hints.
|
| 19 |
spaces = [
|
| 20 |
+
{"repo_id": "black-forest-labs/FLUX.1-schnell",
|
| 21 |
+
"name": "image_generator_flux_schnell",
|
| 22 |
"description": "Generate an image from a prompt using FLUX.1-schnell. Expects a text prompt.",
|
| 23 |
"api_name": "/infer"},
|
| 24 |
+
{"repo_id": "Remsky/Kokoro-TTS-Zero",
|
| 25 |
+
"name": "text_to_speech_kokoro",
|
| 26 |
"description": "Generates speech (audio) from input text using Kokoro TTS Zero. Expects text input.",
|
| 27 |
"api_name": "/generate_speech_from_ui"},
|
| 28 |
+
{"repo_id": "jamesliu1217/EasyControl_Ghibli",
|
| 29 |
+
"name": "ghibli_style_image_control",
|
| 30 |
"description": "Create Ghibli style image from an input image using EasyControl_Ghibli. Expects an image and a prompt/control parameters.",
|
| 31 |
"api_name": "/single_condition_generate_image"},
|
| 32 |
+
{"repo_id": "opendatalab/MinerU",
|
| 33 |
+
"name": "pdf_text_extraction_mineru",
|
| 34 |
"description": "Extracts the text of a PDF up to 20 pages long using MinerU. Expects a PDF file.",
|
| 35 |
"api_name": "/to_pdf"},
|
| 36 |
+
# {"repo_id": "InstantX/InstantCharacter",
|
| 37 |
+
# "name": "instant_character_customization",
|
| 38 |
+
# "description": "Personalize Any Characters with a Scalable Diffusion Transformer Framework to any style or pose using InstantCharacter. Expects an input image and potentially pose/style images or prompts.",
|
| 39 |
+
# "api_name": "/predict"}, # Example: Verify this api_name if re-enabling
|
| 40 |
+
# {"repo_id": "fotographerai/Zen-Style-Shape",
|
| 41 |
+
# "name": "img_to_img_style_transfer_zen_shape",
|
| 42 |
+
# "description": "Flux[dev] Redux + Flux[dev] Canny. Implements a custom image-to-image style transfer pipeline blending style from Image A to structure of Image B. Expects two images.",
|
| 43 |
+
# "api_name": "/predict"}, # Example: Verify this api_name if re-enabling
|
| 44 |
+
# {"repo_id": "moonshotai/Kimi-VL-A3B-Thinking",
|
| 45 |
+
# "name": "multimodal_vlm_llm_kimi",
|
| 46 |
+
# "description": "Kimi-VL-A3B-Thinking is a multi-modal LLM that can understand text and images, and generate text with thinking processes. Ask any question about an image. Expects text and optionally an image.",
|
| 47 |
+
# "api_name": "/chat"}, # Example: Verify this api_name if re-enabling
|
| 48 |
]
|
| 49 |
|
| 50 |
# Create tools from predefined Spaces with retry logic
|
| 51 |
tools = []
|
| 52 |
+
for space_info in spaces:
|
| 53 |
repo_id = space_info['repo_id']
|
| 54 |
name = space_info.get('name', repo_id.split('/')[-1].replace('-', '_')) # Default name from repo_id
|
| 55 |
description = space_info.get('description', f'A tool to interact with the Hugging Face Space: {repo_id}')
|
|
|
|
| 68 |
print(f"Failed to load predefined tool from {repo_id}. Error: {str(e)}. Continuing with available tools.")
|
| 69 |
|
| 70 |
# Load tools from a Hugging Face Collection
|
| 71 |
+
collection_slug = "jkorstad/tools-680127d17eed47e759549ff4"
|
| 72 |
try:
|
| 73 |
+
# Added trust_remote_code=True
|
| 74 |
+
collection = ToolCollection.from_hub(collection_slug=collection_slug, trust_remote_code=True)
|
| 75 |
tools.extend(collection.tools)
|
| 76 |
print(f"Successfully loaded tools from collection: {collection_slug}")
|
| 77 |
except Exception as e:
|
| 78 |
print(f"Warning: Failed to load collection {collection_slug}. Error: {str(e)}")
|
| 79 |
|
| 80 |
|
| 81 |
+
# Tool for searching Hugging Face Spaces
|
| 82 |
def search_hf_spaces(query: str, top_k: int = 3) -> str:
|
| 83 |
"""
|
| 84 |
Searches Hugging Face Spaces for a given query and returns the top_k results.
|
|
|
|
| 89 |
"""
|
| 90 |
try:
|
| 91 |
print(f"Searching spaces with query: {query}, top_k: {top_k}")
|
| 92 |
+
# Using list_spaces, ensure it's imported: from huggingface_hub import list_spaces
|
| 93 |
+
# full=True gives more metadata, sort by likes, direction=-1 for descending
|
| 94 |
spaces_found = list(list_spaces(search=query, full=True, limit=top_k, sort="likes", direction=-1))
|
| 95 |
if not spaces_found:
|
| 96 |
return "No Spaces found for your query."
|
| 97 |
+
|
| 98 |
results = "Found the following Spaces (sorted by likes):\n"
|
| 99 |
for i, space_data in enumerate(spaces_found):
|
| 100 |
+
# Safely access attributes, as they might not always be present
|
| 101 |
+
description = "No description provided."
|
| 102 |
+
if hasattr(space_data, 'cardData') and space_data.cardData and 'description' in space_data.cardData:
|
| 103 |
description = space_data.cardData['description']
|
| 104 |
+
elif hasattr(space_data, 'title') and space_data.title: # Fallback to title
|
| 105 |
description = space_data.title
|
| 106 |
|
| 107 |
results += (
|
| 108 |
f"{i+1}. ID: {space_data.id}\n"
|
| 109 |
f" Description: {description}\n"
|
| 110 |
+
f" Likes: {space_data.likes if hasattr(space_data, 'likes') else 'N/A'}\n"
|
| 111 |
+
f" Last Modified: {space_data.lastModified if hasattr(space_data, 'lastModified') else 'N/A'}\n\n"
|
| 112 |
)
|
| 113 |
results += ("\nTo use one of these, you can try creating a tool in the code like this: "
|
| 114 |
"my_new_tool = Tool.from_space(repo_id='SPACE_ID_HERE', name='custom_tool_name'). "
|
|
|
|
| 118 |
return results
|
| 119 |
except Exception as e:
|
| 120 |
print(f"Error searching Spaces: {str(e)}")
|
| 121 |
+
# traceback.print_exc() # Uncomment for detailed search error debugging
|
| 122 |
return f"Error searching Spaces: {str(e)}"
|
| 123 |
|
| 124 |
space_search_tool = Tool(
|
| 125 |
name="huggingface_space_searcher",
|
| 126 |
description="Searches for Hugging Face Spaces that can perform a specific task. Input is a search query string (e.g., 'text to image', 'speech recognition'). Returns a list of Space IDs, their descriptions, and instructions on how to try using them.",
|
| 127 |
func=search_hf_spaces,
|
|
|
|
| 128 |
)
|
| 129 |
tools.append(space_search_tool)
|
| 130 |
|
| 131 |
|
| 132 |
+
# Initialize the model - Use InferenceClientModel
|
| 133 |
+
model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct") # Or your preferred model
|
| 134 |
|
| 135 |
+
# Create the agent - Removed system_prompt from constructor
|
| 136 |
agent = CodeAgent(
|
| 137 |
tools=tools,
|
| 138 |
model=model,
|
| 139 |
additional_authorized_imports=['PIL', 'Pillow', 'os', 'sys', 'numpy', 'huggingface_hub', 'gradio_client', 'uuid'],
|
| 140 |
add_base_tools=True, # Includes web search, python interpreter
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
)
|
| 142 |
|
| 143 |
+
# This is the detailed instruction set that was previously in system_prompt
|
| 144 |
+
AGENT_INSTRUCTIONS = """You are a highly capable AI assistant. Your primary goal is to accomplish tasks using a variety of tools, prioritizing Hugging Face Spaces.
|
| 145 |
+
|
| 146 |
+
Follow these steps:
|
| 147 |
+
1. **Understand the Request:** Carefully analyze the user's prompt (which will follow these instructions). Identify the core task and any specific requirements or inputs.
|
| 148 |
+
2. **Check Predefined Tools:** Review your list of available tools. If a predefined tool can directly address the request, use it.
|
| 149 |
+
3. **Search for Spaces (If Needed):** If no predefined tool is suitable, use the `huggingface_space_searcher` tool. Provide a concise search query related to the task (e.g., "image classification", "voice cloning", "document question answering").
|
| 150 |
+
4. **Select and Instantiate a Space Tool:** From the search results, choose the most promising Space. Attempt to create a tool from it using `Tool.from_space(repo_id='SELECTED_SPACE_ID', name='a_unique_tool_name')`. You might need to give it a unique name. If `Tool.from_space` fails, the Space might not be compatible, or you could try another one from the search results. Note that some Spaces might not have a public API or may require a specific `api_name` that `Tool.from_space` cannot infer; in such cases, you might not be able to use them.
|
| 151 |
+
5. **Execute the Tool:** Call the tool (either predefined or dynamically created) with the necessary arguments.
|
| 152 |
+
* **File Inputs:** If the user uploads files, their paths will be available as global string variables: `input_image_path`, `input_audio_path`, `input_video_path`, `input_3d_model_path`, `input_file_path`. Before using these variables, check if they exist and are not None (e.g., `if 'input_image_path' in globals() and input_image_path:`). Pass these file paths as arguments to tools that require them. `Tool.from_space` handles file uploads for compatible Spaces when you pass the filepath string.
|
| 153 |
+
* **Chaining Tools:** If the task requires multiple steps, chain the tools together, passing the output of one tool as the input to the next.
|
| 154 |
+
6. **Output Management:**
|
| 155 |
+
* If a tool generates a file (image, audio, etc.), save it to the current working directory using a unique filename (e.g., `output_filename = os.path.join(os.getcwd(), f"{uuid.uuid4()}.png")`).
|
| 156 |
+
* **Return the RESULT:** Your final response should be either:
|
| 157 |
+
* A string containing the direct text answer.
|
| 158 |
+
* The string path to the generated output file (e.g., `return output_filename`).
|
| 159 |
+
7. **Clarity and Error Handling:** If you encounter issues (e.g., a Space tool fails, required inputs are missing), clearly explain the problem in your response. If a Space doesn't work, try to explain why or suggest an alternative if possible.
|
| 160 |
+
|
| 161 |
+
Example of dynamically using a Space after searching:
|
| 162 |
+
```python
|
| 163 |
+
# This is an example of how I, the agent, would think and act.
|
| 164 |
+
# User's actual prompt would follow these instructions.
|
| 165 |
+
# Example user prompt: "Find a space that can make an image of a cat and then use it."
|
| 166 |
+
#
|
| 167 |
+
# My thought process:
|
| 168 |
+
# 1. The user wants an image of a cat, and wants me to find a Space for it.
|
| 169 |
+
# 2. I'll use `huggingface_space_searcher`.
|
| 170 |
+
# search_results = huggingface_space_searcher(query="text to image cat")
|
| 171 |
+
# print(search_results) # This would show me some options. Let's say 'user/cat-generator' is found.
|
| 172 |
+
# try:
|
| 173 |
+
# cat_image_tool = Tool.from_space(repo_id="user/cat-generator", name="cat_generator_tool")
|
| 174 |
+
# # The arguments for cat_image_tool depend on the Space. I'll assume it takes a 'prompt' argument.
|
| 175 |
+
# image_path = cat_image_tool(prompt="A fluffy siamese cat")
|
| 176 |
+
# # image_path should be a path to the generated image file
|
| 177 |
+
# return image_path
|
| 178 |
+
# except Exception as e:
|
| 179 |
+
# return f"Failed to use the cat generator Space: {e}"
|
| 180 |
+
```
|
| 181 |
+
Always ensure your generated Python code is complete and directly callable. Use `print()` for debugging if necessary, but the final returned value should be the result or file path.
|
| 182 |
+
You have access to `os`, `uuid`, `PIL.Image`.
|
| 183 |
+
"""
|
| 184 |
+
|
| 185 |
# Gradio interface function
|
| 186 |
+
def gradio_interface(user_prompt, input_image_path, input_audio_path, input_video_path, input_3d_model_path, input_file_path, progress=gr.Progress(track_tqdm=True)):
|
| 187 |
try:
|
| 188 |
progress(0, desc="Initializing Agent...")
|
| 189 |
+
|
| 190 |
+
# Combine instructions with the user's prompt
|
| 191 |
+
full_prompt_with_instructions = f"{AGENT_INSTRUCTIONS}\n\nUSER PROMPT: {user_prompt}"
|
| 192 |
+
|
| 193 |
# Prepare a dictionary of potential inputs for the agent's execution scope
|
|
|
|
| 194 |
# These will be available as global variables in the agent's Python execution environment
|
| 195 |
+
# when agent.run is called with keyword arguments.
|
| 196 |
+
agent_kwargs = {}
|
| 197 |
if input_image_path:
|
| 198 |
+
agent_kwargs["input_image_path"] = str(input_image_path) # Ensure it's a string path
|
| 199 |
if input_audio_path:
|
| 200 |
+
agent_kwargs["input_audio_path"] = str(input_audio_path)
|
| 201 |
if input_video_path:
|
| 202 |
+
agent_kwargs["input_video_path"] = str(input_video_path)
|
| 203 |
if input_3d_model_path:
|
| 204 |
+
agent_kwargs["input_3d_model_path"] = str(input_3d_model_path)
|
| 205 |
if input_file_path:
|
| 206 |
+
agent_kwargs["input_file_path"] = str(input_file_path)
|
| 207 |
+
|
|
|
|
|
|
|
| 208 |
progress(0.2, desc="Agent processing request...")
|
| 209 |
+
# The first argument to agent.run is the main prompt.
|
| 210 |
+
# Other kwargs are set as global variables in the agent's execution context.
|
| 211 |
+
result = agent.run(full_prompt_with_instructions, **agent_kwargs)
|
| 212 |
|
| 213 |
progress(0.8, desc="Processing result...")
|
| 214 |
# Default all outputs to invisible and None
|
|
|
|
| 218 |
"path": gr.update(value=None, visible=False),
|
| 219 |
"audio": gr.update(value=None, visible=False),
|
| 220 |
"model3d": gr.update(value=None, visible=False),
|
| 221 |
+
"text": gr.update(value=None, visible=True), # Text output is often default
|
| 222 |
}
|
| 223 |
|
| 224 |
if isinstance(result, str):
|
|
|
|
| 226 |
file_path = result
|
| 227 |
outputs["file"] = gr.update(value=file_path, visible=True)
|
| 228 |
outputs["path"] = gr.update(value=file_path, visible=True)
|
| 229 |
+
ext = os.path.splitext(file_path.lower())[1] # Get extension like .png
|
| 230 |
+
if ext in ('.png', '.jpg', '.jpeg', '.gif', '.webp'):
|
| 231 |
outputs["image"] = gr.update(value=file_path, visible=True)
|
| 232 |
+
elif ext in ('.mp3', '.wav', '.ogg', '.flac'):
|
| 233 |
outputs["audio"] = gr.update(value=file_path, visible=True)
|
| 234 |
+
elif ext == '.glb': # Common format for Model3D
|
| 235 |
outputs["model3d"] = gr.update(value=file_path, visible=True)
|
| 236 |
else: # Other file types like PDF, TXT - user can download via file component
|
| 237 |
+
outputs["text"] = gr.update(value=f"Output is a file: {os.path.basename(file_path)}. Download it using the 'Download File Output' component.", visible=True)
|
| 238 |
else:
|
| 239 |
+
# Result is a string (e.g., text output from a tool or an error message from the agent)
|
| 240 |
outputs["text"] = gr.update(value=result, visible=True)
|
| 241 |
elif result is None:
|
| 242 |
+
outputs["text"] = gr.update(value="Agent returned no result (None). This might indicate an issue or that the task didn't produce a specific output string/file.", visible=True)
|
| 243 |
else: # Other types (e.g. if agent returns a dict or list by mistake)
|
| 244 |
+
outputs["text"] = gr.update(value=f"Unexpected result type from agent: {type(result)}. Content: {str(result)}", visible=True)
|
| 245 |
+
|
| 246 |
progress(1, desc="Done!")
|
| 247 |
return (
|
| 248 |
outputs["image"], outputs["file"], outputs["path"],
|
|
|
|
| 252 |
except Exception as e:
|
| 253 |
error_msg = f"An error occurred in the Gradio interface or agent execution: {str(e)}"
|
| 254 |
print(error_msg) # Also print to console for server-side logs
|
| 255 |
+
traceback.print_exc() # For more detailed debugging
|
| 256 |
return (
|
| 257 |
+
gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False),
|
| 258 |
+
gr.update(value=None, visible=False), gr.update(value=None, visible=False),
|
| 259 |
+
gr.update(value=error_msg, visible=True) # Show error in the text output
|
| 260 |
)
|
| 261 |
|
| 262 |
# Create the Gradio app
|
|
|
|
| 268 |
prompt_input = gr.Textbox(
|
| 269 |
label="Enter your prompt for the agent",
|
| 270 |
placeholder="e.g., 'Generate an image of a futuristic city', 'Convert this text to speech: Hello world', or 'Search for a space that translates English to French and use it for: Good morning'",
|
| 271 |
+
lines=3,
|
| 272 |
+
elem_id="user_prompt_textbox"
|
| 273 |
)
|
| 274 |
+
|
| 275 |
with gr.Accordion("Optional File Inputs (for tasks requiring them)", open=False):
|
| 276 |
with gr.Row():
|
| 277 |
+
input_image = gr.Image(label="Image Input", type="filepath", sources=["upload", "clipboard"], elem_id="input_image_upload")
|
| 278 |
+
input_audio = gr.Audio(label="Audio Input", type="filepath", sources=["upload", "microphone"], elem_id="input_audio_upload")
|
| 279 |
with gr.Row():
|
| 280 |
+
input_video = gr.Video(label="Video Input", type="filepath", sources=["upload"], elem_id="input_video_upload") # Gradio Video component might have limitations
|
| 281 |
+
input_model3d = gr.Model3D(label="3D Model Input (.glb, .obj, etc.)", type="filepath", elem_id="input_model3d_upload") # Check Gradio docs for supported Model3D types
|
| 282 |
with gr.Row():
|
| 283 |
+
input_file = gr.File(label="Generic File Input (PDF, TXT, etc.)", type="filepath", elem_id="input_file_upload")
|
| 284 |
|
| 285 |
+
submit_button = gr.Button("π Generate", variant="primary", elem_id="submit_button_generate")
|
| 286 |
|
| 287 |
gr.Markdown("### Outputs:")
|
| 288 |
with gr.Row():
|
| 289 |
+
image_output = gr.Image(label="Image Output", interactive=False, visible=False, show_download_button=True, elem_id="output_image_display")
|
| 290 |
+
audio_output = gr.Audio(label="Audio Output", interactive=False, visible=False, show_download_button=True, elem_id="output_audio_display")
|
| 291 |
with gr.Row():
|
| 292 |
+
model3d_output = gr.Model3D(label="3D Model Output", interactive=False, visible=False, show_download_button=True, elem_id="output_model3d_display")
|
| 293 |
+
text_output = gr.Textbox(label="Text / Log Output", interactive=False, visible=True, lines=5, max_lines=20, elem_id="output_text_log") # Start visible for logs/text
|
| 294 |
with gr.Row():
|
| 295 |
+
file_output = gr.File(label="Download File Output", interactive=False, visible=False, elem_id="output_file_download")
|
| 296 |
+
path_output = gr.Textbox(label="Output File Path (Copyable)", interactive=False, visible=False, elem_id="output_file_path_text") # Keep for copying if needed
|
| 297 |
|
| 298 |
# Link button click to the interface function
|
| 299 |
submit_button.click(
|
|
|
|
| 301 |
inputs=[prompt_input, input_image, input_audio, input_video, input_model3d, input_file],
|
| 302 |
outputs=[image_output, file_output, path_output, audio_output, model3d_output, text_output]
|
| 303 |
)
|
| 304 |
+
|
| 305 |
gr.Examples(
|
| 306 |
examples=[
|
| 307 |
["Generate an image of a happy robot coding on a laptop, cyberpunk style.", None, None, None, None, None],
|
| 308 |
["Convert the following text to speech: 'Smolagents are amazing for building AI applications.'", None, None, None, None, None],
|
| 309 |
["Search for a Hugging Face Space that can perform image captioning. Describe the first result.", None, None, None, None, None],
|
| 310 |
+
# For examples with file inputs, the user needs to upload a file manually.
|
| 311 |
+
# The string path here is just a placeholder for the example text.
|
| 312 |
+
["I have an image of a cat. Find a space that can make it look like a painting and apply it. You will need to use the 'input_image_path' variable which will contain the path to the uploaded cat image.", "path/to/your/cat_image.png", None, None, None, None],
|
| 313 |
],
|
| 314 |
inputs=[prompt_input, input_image, input_audio, input_video, input_model3d, input_file],
|
| 315 |
+
label="Example Prompts (Note: For examples with file inputs, you'll need to upload a relevant file first using the 'Optional File Inputs' section)"
|
| 316 |
)
|
| 317 |
|
| 318 |
# Launch the app
|
| 319 |
if __name__ == "__main__":
|
| 320 |
+
# share=True can be used to create a public link if you're running this locally and want to test from another device.
|
| 321 |
+
# debug=True provides more detailed Gradio logs.
|
| 322 |
+
app.launch(debug=True)
|