Spaces:

jkorstad
/

Easy-Spaces

Runtime error

App Files Files Community

Easy-Spaces / app.py

jkorstad

Update app.py

b593116 verified 7 months ago

raw

history blame

16.9 kB

	import gradio as gr
	import os
	import shutil
	from gradio_client import Client, handle_file # handle_file might be used by the agent
	# Use InferenceClientModel instead of HfApiModel
	from smolagents import Tool, CodeAgent, InferenceClientModel, ToolCollection # Tool is needed for subclassing
	import uuid
	import httpx # Often a dependency for HTTP clients, good to have
	from tenacity import retry, stop_after_attempt, wait_exponential
	from huggingface_hub import list_spaces
	from PIL import Image # For potential image manipulation by the agent
	import traceback # For more detailed error logging if needed

	# Define initial tools from Spaces
	spaces = [
	{"repo_id": "black-forest-labs/FLUX.1-schnell",
	"name": "image_generator_flux_schnell",
	"description": "Generate an image from a prompt using FLUX.1-schnell. Expects a text prompt.",
	"api_name": "/infer"},
	{"repo_id": "Remsky/Kokoro-TTS-Zero",
	"name": "text_to_speech_kokoro",
	"description": "Generates speech (audio) from input text using Kokoro TTS Zero. Expects text input.",
	"api_name": "/generate_speech_from_ui"},
	{"repo_id": "jamesliu1217/EasyControl_Ghibli",
	"name": "ghibli_style_image_control",
	"description": "Create Ghibli style image from an input image using EasyControl_Ghibli. Expects an image and a prompt/control parameters.",
	"api_name": "/single_condition_generate_image"},
	{"repo_id": "opendatalab/MinerU",
	"name": "pdf_text_extraction_mineru",
	"description": "Extracts the text of a PDF up to 20 pages long using MinerU. Expects a PDF file.",
	"api_name": "/to_pdf"},
	]

	# Create tools from predefined Spaces with retry logic
	tools = []
	for space_info in spaces:
	repo_id = space_info['repo_id']
	name = space_info.get('name', repo_id.split('/')[-1].replace('-', '_'))
	description = space_info.get('description', f'A tool to interact with the Hugging Face Space: {repo_id}')
	api_name = space_info.get('api_name')

	@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
	def create_tool_with_retry(repo_id, name, description, api_name):
	print(f"Attempting to create tool: '{name}' from space: {repo_id} with api_name: {api_name}")
	new_tool = Tool.from_space(repo_id, name=name, description=description, api_name=api_name)
	if not hasattr(new_tool, 'name') or new_tool.name != name:
	print(f"WARNING: Tool '{name}' from space {repo_id} might have a name mismatch or missing name attribute after creation. Actual name: {getattr(new_tool, 'name', 'MISSING')}")
	return new_tool

	try:
	tool_instance = create_tool_with_retry(repo_id, name, description, api_name) # Renamed to avoid conflict
	tools.append(tool_instance)
	print(f"Successfully loaded predefined tool: {name} from {repo_id}")
	except Exception as e:
	print(f"Failed to load predefined tool from {repo_id}. Error: {str(e)}. Continuing with available tools.")

	# --- Refactored HuggingFaceSpaceSearcherTool ---
	class HuggingFaceSpaceSearcherTool(Tool):
	name = "huggingface_space_searcher"
	description = "Searches for Hugging Face Spaces that can perform a specific task. Input is a search query string (e.g., 'text to image', 'speech recognition'). Returns a list of Space IDs, their descriptions, and instructions on how to try using them."
	inputs = {
	"query": {
	"type": "string",
	"description": "The search query for Hugging Face Spaces."
	},
	"top_k": {
	"type": "integer",
	"description": "The number of top results to return (default is 3).",
	"nullable": True
	}
	}
	output_type = "string"

	def forward(self, query: str, top_k: int = 3) -> str:
	try:
	actual_top_k = top_k if top_k is not None else 3
	print(f"Searching spaces with query: {query}, top_k: {actual_top_k}")
	spaces_found = list(list_spaces(search=query, full=True, limit=actual_top_k, sort="likes", direction=-1))
	if not spaces_found:
	return "No Spaces found for your query."
	results = "Found the following Spaces (sorted by likes):\n"
	for i, space_data in enumerate(spaces_found):
	description = "No description provided."
	if hasattr(space_data, 'cardData') and space_data.cardData and 'description' in space_data.cardData:
	description = space_data.cardData['description']
	elif hasattr(space_data, 'title') and space_data.title:
	description = space_data.title
	results += (
	f"{i+1}. ID: {space_data.id}\n"
	f" Description: {description}\n"
	f" Likes: {space_data.likes if hasattr(space_data, 'likes') else 'N/A'}\n"
	f" Last Modified: {space_data.lastModified if hasattr(space_data, 'lastModified') else 'N/A'}\n\n"
	)
	results += ("\nTo use one of these, you should first try creating a tool using "
	"`Tool.from_space(repo_id='SPACE_ID_HERE', name='custom_tool_name')`. "
	"Then call that new tool: `result = custom_tool_name(argument_name=value)`. "
	"The arguments depend on the specific Space. If `Tool.from_space` fails, "
	"the Space might not have a compatible public API.")
	return results
	except Exception as e:
	print(f"Error searching Spaces: {str(e)}")
	return f"Error searching Spaces: {str(e)}"

	space_search_tool = HuggingFaceSpaceSearcherTool()
	tools.append(space_search_tool)

	# Initialize the model
	model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")

	# Create the agent
	agent = CodeAgent(
	tools=tools,
	model=model,
	additional_authorized_imports=['PIL', 'Pillow', 'os', 'sys', 'numpy', 'huggingface_hub', 'gradio_client', 'uuid'],
	add_base_tools=True,
	)

	AGENT_INSTRUCTIONS = """You are a highly capable AI assistant. Your primary goal is to accomplish tasks using a variety of tools, prioritizing Hugging Face Spaces.

	Follow these steps:
	1. Understand the Request: Carefully analyze the user's prompt. Identify the core task and any specific requirements or inputs.
	2. Check Predefined Tools: Review your list of available tools. If a predefined tool can directly address the request, use it.
	* For the 'huggingface_space_searcher' tool, call it with direct keyword arguments like: `huggingface_space_searcher(query="your search term", top_k=3)`. The `query` is mandatory. `top_k` is optional and defaults to 3 if not provided.
	3. Search for Spaces (If Needed): If no predefined tool is suitable, use the `huggingface_space_searcher` tool as described above.
	4. Select and Instantiate a Space Tool (PRIORITY): From the search results, choose the most promising Space. Your primary method to use this Space is to attempt to create a tool from it using `Tool.from_space(repo_id='SELECTED_SPACE_ID', name='a_unique_tool_name')`. You must give it a unique and descriptive name.
	* If `Tool.from_space()` succeeds, you now have a new tool. Call this new tool with the appropriate arguments for that Space.
	* If `Tool.from_space()` fails (e.g., raises an exception), the Space might not have a compatible public API for this method. In this case, you can then consider if direct `gradio_client.Client` usage is feasible, but this is a fallback and often more complex.
	5. Execute the Tool: Call the tool (predefined, or dynamically created via `Tool.from_space()`) with the necessary arguments.
	* File Inputs: If the user uploads files, their paths will be available as global string variables: `input_image_path`, `input_audio_path`, `input_video_path`, `input_3d_model_path`, `input_file_path`. Before using these variables, check if they exist and are not None. Pass these file paths as arguments to tools that require them.
	* Imports in Generated Code: If your code block for execution uses modules like `os` or `uuid`, you must include the import statements (e.g., `import os`, `import uuid`) within that specific code block.
	6. Output Management:
	* If a tool returns a filepath string (e.g., to an image, audio, or other file), your final answer for this step should usually be that direct filepath string. Do NOT attempt to re-save the file using `os.path.join` or `image.save()` unless you are performing an explicit transformation on the file content that requires loading and then saving. The system is designed to handle these returned filepaths.
	* If a tool returns text, return that text.
	7. Clarity and Error Handling: If you encounter issues (e.g., a Space tool fails, required inputs are missing), clearly explain the problem in your response. If a Space doesn't work, try to explain why or suggest an alternative if possible.

	Example of the PREFERRED way to use a discovered Space:
	```python
	# User prompt: "Find a space that can make an image of a cat and use it."
	#
	# Step 1: Search for the space
	# search_results = huggingface_space_searcher(query="text to image cat", top_k=1)
	# print(search_results) # Assume 'someuser/cat-image-generator' is found.
	#
	# Step 2: Try to create a tool from the discovered space
	# try:
	# cat_tool = Tool.from_space(repo_id="someuser/cat-image-generator", name="cat_image_generator_tool")
	# # Now use the newly created tool. Arguments depend on the Space's API.
	# # Let's assume it takes a 'prompt'.
	# image_filepath = cat_tool(prompt="A fluffy siamese cat, cyberpunk style")
	# return image_filepath # Return the filepath directly
	# except Exception as e:
	# print(f"Failed to create or use tool from Space 'someuser/cat-image-generator': {e}")
	# # Optionally, try another space or a predefined tool if appropriate.
	# # return "Could not use the discovered space. Trying a fallback..." (then try another step)
	```

	Example of using a predefined tool that returns a filepath:
	```python
	# User prompt: "Generate an image of a happy robot."
	# (Assuming 'image_generator_flux_schnell' is a predefined tool)
	#
	# image_filepath = image_generator_flux_schnell(prompt="A happy robot coding on a laptop, cyberpunk style")
	# return image_filepath # Return the filepath string directly.
	```
	Always ensure your generated Python code is complete and directly callable.
	You have access to `PIL.Image` (as `Image`), `os`, `sys`, `numpy`, `huggingface_hub`, `gradio_client`, `uuid`. Remember to import them if you use them in a code block.
	"""

	# Gradio interface function
	def gradio_interface(user_prompt, input_image_path, input_audio_path, input_video_path, input_3d_model_path, input_file_path, progress=gr.Progress(track_tqdm=True)):
	try:
	progress(0, desc="Initializing Agent...")
	full_prompt_with_instructions = f"{AGENT_INSTRUCTIONS}\n\nUSER PROMPT: {user_prompt}"
	agent_kwargs = {}
	if input_image_path: agent_kwargs["input_image_path"] = str(input_image_path)
	if input_audio_path: agent_kwargs["input_audio_path"] = str(input_audio_path)
	if input_video_path: agent_kwargs["input_video_path"] = str(input_video_path)
	if input_3d_model_path: agent_kwargs["input_3d_model_path"] = str(input_3d_model_path)
	if input_file_path: agent_kwargs["input_file_path"] = str(input_file_path)

	progress(0.2, desc="Agent processing request...")
	result = agent.run(full_prompt_with_instructions, **agent_kwargs)

	progress(0.8, desc="Processing result...")
	outputs = {
	"image": gr.update(value=None, visible=False), "file": gr.update(value=None, visible=False),
	"path": gr.update(value=None, visible=False), "audio": gr.update(value=None, visible=False),
	"model3d": gr.update(value=None, visible=False), "text": gr.update(value=None, visible=True),
	}

	if isinstance(result, str):
	if os.path.isfile(result):
	file_path = result
	outputs["file"] = gr.update(value=file_path, visible=True)
	outputs["path"] = gr.update(value=file_path, visible=True)
	ext = os.path.splitext(file_path.lower())[1]
	if ext in ('.png', '.jpg', '.jpeg', '.gif', '.webp'): outputs["image"] = gr.update(value=file_path, visible=True)
	elif ext in ('.mp3', '.wav', '.ogg', '.flac'): outputs["audio"] = gr.update(value=file_path, visible=True)
	elif ext == '.glb': outputs["model3d"] = gr.update(value=file_path, visible=True)
	else: outputs["text"] = gr.update(value=f"Output is a file: {os.path.basename(file_path)}. Download it.", visible=True)
	else: outputs["text"] = gr.update(value=result, visible=True)
	elif result is None: outputs["text"] = gr.update(value="Agent returned no result (None).", visible=True)
	else: outputs["text"] = gr.update(value=f"Unexpected result type: {type(result)}. Content: {str(result)}", visible=True)

	progress(1, desc="Done!")
	return (outputs["image"], outputs["file"], outputs["path"], outputs["audio"], outputs["model3d"], outputs["text"])

	except Exception as e:
	error_msg = f"An error occurred: {str(e)}"
	print(error_msg)
	traceback.print_exc()
	return (None, None, None, None, None, gr.update(value=error_msg, visible=True))

	# Create the Gradio app
	with gr.Blocks(theme=gr.themes.Soft()) as app:
	gr.Markdown("## 🤖 Smolagent: Multi-Modal Agent with Hugging Face Space Discovery")
	gr.Markdown("Ask the agent to perform tasks...")

	with gr.Row():
	prompt_input = gr.Textbox(label="Enter your prompt", placeholder="e.g., 'Generate an image of a futuristic city'", lines=3, elem_id="user_prompt_textbox")

	with gr.Accordion("Optional File Inputs", open=False):
	with gr.Row():
	input_image = gr.Image(label="Image Input", type="filepath", sources=["upload", "clipboard"], elem_id="input_image_upload")
	input_audio = gr.Audio(label="Audio Input", type="filepath", sources=["upload", "microphone"], elem_id="input_audio_upload")
	with gr.Row():
	input_video = gr.Video(label="Video Input", sources=["upload"], elem_id="input_video_upload")
	input_model3d = gr.Model3D(label="3D Model Input", elem_id="input_model3d_upload")
	with gr.Row():
	input_file = gr.File(label="Generic File Input", type="filepath", elem_id="input_file_upload")

	submit_button = gr.Button("🚀 Generate", variant="primary", elem_id="submit_button_generate")

	gr.Markdown("### Outputs:")
	with gr.Row():
	image_output = gr.Image(label="Image Output", interactive=False, visible=False, show_download_button=True, elem_id="output_image_display")
	audio_output = gr.Audio(label="Audio Output", interactive=False, visible=False, show_download_button=True, elem_id="output_audio_display")
	with gr.Row():
	model3d_output = gr.Model3D(label="3D Model Output", interactive=False, visible=False, elem_id="output_model3d_display")
	text_output = gr.Textbox(label="Text / Log Output", interactive=False, visible=True, lines=5, max_lines=20, elem_id="output_text_log")
	with gr.Row():
	file_output = gr.File(label="Download File Output", interactive=False, visible=False, elem_id="output_file_download")
	path_output = gr.Textbox(label="Output File Path", interactive=False, visible=False, elem_id="output_file_path_text")

	submit_button.click(
	fn=gradio_interface,
	inputs=[prompt_input, input_image, input_audio, input_video, input_model3d, input_file],
	outputs=[image_output, file_output, path_output, audio_output, model3d_output, text_output]
	)

	gr.Examples(
	examples=[
	["Generate an image of a happy robot coding on a laptop, cyberpunk style.", None, None, None, None, None],
	["Convert the following text to speech: 'Smolagents are amazing for building AI applications.'", None, None, None, None, None],
	["Search for a Hugging Face Space that can perform image captioning. Describe the first result.", None, None, None, None, None],
	["I have an image of a cat. Find a space that can make it look like a painting and apply it. You will need to use the 'input_image_path' variable which will contain the path to the uploaded cat image.", "path/to/your/cat_image.png", None, None, None, None],
	],
	inputs=[prompt_input, input_image, input_audio, input_video, input_model3d, input_file],
	label="Example Prompts (Note: For examples with file inputs, you'll need to upload a relevant file first)"
	)

	if __name__ == "__main__":
	app.launch(debug=True)