Spaces:

Nymbo
/

Serverless-TextGen-Hub

Running

App Files Files Community

Nymbo commited on May 18

Commit

6f66243

verified ·

1 Parent(s): 13fb3b8

Update app.py

Browse files

Files changed (1) hide show

app.py +516 -730

app.py CHANGED Viewed

@@ -5,40 +5,47 @@ import json
 import base64
 from PIL import Image
 import io
-import requests
-from smolagents.mcp_client import MCPClient
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
-print("Access token loaded.")
 # Function to encode image to base64
-def encode_image(image_path):
-    if not image_path:
-        print("No image path provided")
         return None
     try:
-        print(f"Encoding image from path: {image_path}")
-        # If it's already a PIL Image
-        if isinstance(image_path, Image.Image):
-            image = image_path
         else:
-            # Try to open the image file
-            image = Image.open(image_path)
-        # Convert to RGB if image has an alpha channel (RGBA)
         if image.mode == 'RGBA':
             image = image.convert('RGB')
-        # Encode to base64
         buffered = io.BytesIO()
-        image.save(buffered, format="JPEG")
         img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
-        print("Image encoded successfully")
         return img_str
     except Exception as e:
         print(f"Error encoding image: {e}")
         return None
 # Dictionary to store active MCP connections
@@ -47,827 +54,606 @@ mcp_connections = {}
 def connect_to_mcp_server(server_url, server_name=None):
     """Connect to an MCP server and return available tools"""
     if not server_url:
-        return None, "No server URL provided"
     try:
-        # Create an MCP client and connect to the server
-        client = MCPClient({"url": server_url})
-        # Get available tools
-        tools = client.get_tools()
-        # Store the connection for later use
-        name = server_name or f"Server_{len(mcp_connections)}"
         mcp_connections[name] = {"client": client, "tools": tools, "url": server_url}
-        return name, f"Successfully connected to {name} with {len(tools)} available tools"
     except Exception as e:
-        print(f"Error connecting to MCP server: {e}")
-        return None, f"Error connecting to MCP server: {str(e)}"
 def list_mcp_tools(server_name):
     """List available tools for a connected MCP server"""
     if server_name not in mcp_connections:
-        return "Server not connected"
     tools = mcp_connections[server_name]["tools"]
     tool_info = []
     for tool in tools:
-        tool_info.append(f"- {tool.name}: {tool.description}")
     if not tool_info:
-        return "No tools available for this server"
     return "\n".join(tool_info)
 def call_mcp_tool(server_name, tool_name, **kwargs):
-    """Call a specific tool from an MCP server"""
     if server_name not in mcp_connections:
-        return f"Server '{server_name}' not connected"
-    client = mcp_connections[server_name]["client"]
-    tools = mcp_connections[server_name]["tools"]
-    # Find the requested tool
-    tool = next((t for t in tools if t.name == tool_name), None)
-    if not tool:
-        return f"Tool '{tool_name}' not found on server '{server_name}'"
     try:
-        # Call the tool with provided arguments
-        result = client.call_tool(tool_name, kwargs)
-        return result
     except Exception as e:
-        print(f"Error calling MCP tool: {e}")
-        return f"Error calling MCP tool: {str(e)}"
-def analyze_message_for_tool_call(message, active_mcp_servers, client, model_to_use, system_message):
     """Analyze a message to determine if an MCP tool should be called"""
-    # Skip analysis if message is empty
-    if not message or not message.strip():
         return None, None
-    # Get information about available tools
-    tool_info = []
-    for server_name in active_mcp_servers:
-        if server_name in mcp_connections:
-            server_tools = mcp_connections[server_name]["tools"]
             for tool in server_tools:
-                tool_info.append({
-                    "server_name": server_name,
-                    "tool_name": tool.name,
-                    "description": tool.description
-                })
-    if not tool_info:
         return None, None
-    # Create a structured query for the LLM to analyze if a tool call is needed
-    tools_desc = []
-    for info in tool_info:
-        tools_desc.append(f"{info['server_name']}.{info['tool_name']}: {info['description']}")
-    tools_string = "\n".join(tools_desc)
-    analysis_system_prompt = f"""You are an assistant that helps determine if a user message requires using an external tool.
-Available tools:
-{tools_string}
-Your job is to:
-1. Analyze the user's message
-2. Determine if they're asking to use one of the tools
-3. If yes, respond with a JSON object with the server_name, tool_name, and parameters
-4. If no, respond with "NO_TOOL_NEEDED"
-Example 1:
-User: "Please turn this text into speech: Hello world"
-Response: {{"server_name": "kokoroTTS", "tool_name": "text_to_audio", "parameters": {{"text": "Hello world", "speed": 1.0}}}}
-Example 2:
-User: "What is the capital of France?"
-Response: NO_TOOL_NEEDED"""
     try:
-        # Call the LLM to analyze the message
-        response = client.chat_completion(
-            model=model_to_use,
             messages=[
-                {"role": "system", "content": analysis_system_prompt},
-                {"role": "user", "content": message}
             ],
-            temperature=0.2,  # Low temperature for more deterministic responses
-            max_tokens=300
         )
-        analysis = response.choices[0].message.content
-        print(f"Tool analysis: {analysis}")
-        if "NO_TOOL_NEEDED" in analysis:
             return None, None
-        # Try to extract JSON from the response
-        json_start = analysis.find("{")
-        json_end = analysis.rfind("}") + 1
-        if json_start < 0 or json_end <= 0:
             return None, None
-        json_str = analysis[json_start:json_end]
         try:
-            tool_call = json.loads(json_str)
-            return tool_call.get("server_name"), {
-                "tool_name": tool_call.get("tool_name"),
-                "parameters": tool_call.get("parameters", {})
-            }
-        except json.JSONDecodeError:
-            print(f"Failed to parse tool call JSON: {json_str}")
             return None, None
     except Exception as e:
-        print(f"Error analyzing message for tool calls: {str(e)}")
         return None, None
 def respond(
-    message,
-    image_files,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-    frequency_penalty,
-    seed,
-    provider,
-    custom_api_key,
-    custom_model,
-    model_search_term,
-    selected_model,
-    mcp_enabled=False,
-    active_mcp_servers=None,
-    mcp_interaction_mode="Natural Language"
 ):
-    print(f"Received message: {message}")
-    print(f"Received {len(image_files) if image_files else 0} images")
-    print(f"History: {history}")
-    print(f"System message: {system_message}")
-    print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
-    print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
-    print(f"Selected provider: {provider}")
-    print(f"Custom API Key provided: {bool(custom_api_key.strip())}")
-    print(f"Selected model (custom_model): {custom_model}")
-    print(f"Model search term: {model_search_term}")
-    print(f"Selected model from radio: {selected_model}")
-    print(f"MCP enabled: {mcp_enabled}")
-    print(f"Active MCP servers: {active_mcp_servers}")
-    print(f"MCP interaction mode: {mcp_interaction_mode}")
-    # Determine which token to use
-    token_to_use = custom_api_key if custom_api_key.strip() != "" else ACCESS_TOKEN
-    if custom_api_key.strip() != "":
-        print("USING CUSTOM API KEY: BYOK token provided by user is being used for authentication")
-    else:
-        print("USING DEFAULT API KEY: Environment variable HF_TOKEN is being used for authentication")
-    # Initialize the Inference Client with the provider and appropriate token
-    client = InferenceClient(token=token_to_use, provider=provider)
-    print(f"Hugging Face Inference Client initialized with {provider} provider.")
-    # Convert seed to None if -1 (meaning random)
-    if seed == -1:
-        seed = None
-    # Determine which model to use
-    model_to_use = custom_model.strip() if custom_model.strip() != "" else selected_model
-    print(f"Model selected for inference: {model_to_use}")
-    # Process MCP commands in command mode
-    if mcp_enabled and message:
-        if message.startswith("/mcp"):  # Always handle explicit commands
-            # Handle MCP command
-            command_parts = message.split(" ", 3)
             if len(command_parts) < 3:
-                return "Invalid MCP command. Format: /mcp <server_name> <tool_name> [arguments]"
-            _, server_name, tool_name = command_parts[:3]
-            args_json = "{}" if len(command_parts) < 4 else command_parts[3]
             try:
-                args_dict = json.loads(args_json)
-                result = call_mcp_tool(server_name, tool_name, **args_dict)
-                if isinstance(result, dict):
-                    return json.dumps(result, indent=2)
-                return str(result)
             except json.JSONDecodeError:
-                return f"Invalid JSON arguments: {args_json}"
-            except Exception as e:
-                return f"Error executing MCP command: {str(e)}"
-        elif mcp_interaction_mode == "Natural Language" and active_mcp_servers:
-            # Use natural language processing to detect tool calls
-            server_name, tool_info = analyze_message_for_tool_call(
-                message,
-                active_mcp_servers,
-                client,
-                model_to_use,
-                system_message
             )
-            if server_name and tool_info:
-                try:
-                    # Call the detected tool
-                    print(f"Calling tool via natural language: {server_name}.{tool_info['tool_name']} with parameters: {tool_info['parameters']}")
-                    result = call_mcp_tool(server_name, tool_info['tool_name'], **tool_info['parameters'])
-                    # Format the response to include what was done
-                    if isinstance(result, dict):
-                        result_str = json.dumps(result, indent=2)
-                    else:
-                        result_str = str(result)
-                    return f"I used the {tool_info['tool_name']} tool from {server_name} with your request.\n\nResult:\n{result_str}"
-                except Exception as e:
-                    print(f"Error executing MCP tool via natural language: {str(e)}")
-                    # Continue with normal response if tool call fails
-    # Create multimodal content if images are present
-    if image_files and len(image_files) > 0:
-        # Process the user message to include images
-        user_content = []
-        # Add text part if there is any
-        if message and message.strip():
-            user_content.append({
-                "type": "text",
-                "text": message
-            })
-        # Add image parts
-        for img in image_files:
-            if img is not None:
-                # Get raw image data from path
-                try:
-                    encoded_image = encode_image(img)
-                    if encoded_image:
-                        user_content.append({
-                            "type": "image_url",
-                            "image_url": {
-                                "url": f"data:image/jpeg;base64,{encoded_image}"
-                            }
-                        })
-                except Exception as e:
-                    print(f"Error encoding image: {e}")
-    else:
-        # Text-only message
-        user_content = message
-    # Add information about available MCP tools to the system message if MCP is enabled
-    augmented_system_message = system_message
-    if mcp_enabled and active_mcp_servers:
-        tool_info = []
-        for server_name in active_mcp_servers:
-            if server_name in mcp_connections:
-                server_tools = list_mcp_tools(server_name).split("\n")
-                tool_info.extend([f"{server_name}: {tool}" for tool in server_tools])
-        if tool_info:
-            mcp_tools_description = "\n".join(tool_info)
-            if mcp_interaction_mode == "Command Mode":
-                augmented_system_message += f"\n\nYou have access to the following MCP tools:\n{mcp_tools_description}\n\nTo use these tools, the user can type a command in the format: /mcp <server_name> <tool_name> <arguments_json>"
-            else:
-                augmented_system_message += f"\n\nYou have access to the following MCP tools:\n{mcp_tools_description}\n\nThe user can use these tools by describing what they want in natural language, and the system will automatically detect when to use a tool based on their request."
-    # Prepare messages in the format expected by the API
-    messages = [{"role": "system", "content": augmented_system_message}]
-    print("Initial messages array constructed.")
-    # Add conversation history to the context
-    for val in history:
-        user_part = val[0]
-        assistant_part = val[1]
-        if user_part:
-            # Handle both text-only and multimodal messages in history
-            if isinstance(user_part, tuple) and len(user_part) == 2:
-                # This is a multimodal message with text and images
-                history_content = []
-                if user_part[0]:  # Text
-                    history_content.append({
-                        "type": "text",
-                        "text": user_part[0]
                     })
-                for img in user_part[1]:  # Images
-                    if img:
-                        try:
-                            encoded_img = encode_image(img)
-                            if encoded_img:
-                                history_content.append({
-                                    "type": "image_url",
-                                    "image_url": {
-                                        "url": f"data:image/jpeg;base64,{encoded_img}"
-                                    }
-                                })
-                        except Exception as e:
-                            print(f"Error encoding history image: {e}")
-                messages.append({"role": "user", "content": history_content})
-            else:
-                # Regular text message
-                messages.append({"role": "user", "content": user_part})
-            print(f"Added user message to context (type: {type(user_part)})")
-        if assistant_part:
-            messages.append({"role": "assistant", "content": assistant_part})
-            print(f"Added assistant message to context: {assistant_part}")
-    # Append the latest user message
-    messages.append({"role": "user", "content": user_content})
-    print(f"Latest user message appended (content type: {type(user_content)})")
-    # Determine which model to use, prioritizing custom_model if provided
-    model_to_use = custom_model.strip() if custom_model.strip() != "" else selected_model
-    print(f"Model selected for inference: {model_to_use}")
-    # Start with an empty string to build the response as tokens stream in
-    response = ""
-    print(f"Sending request to {provider} provider.")
-    # Prepare parameters for the chat completion request
-    parameters = {
-        "max_tokens": max_tokens,
-        "temperature": temperature,
-        "top_p": top_p,
-        "frequency_penalty": frequency_penalty,
     }
-    if seed is not None:
-        parameters["seed"] = seed
-    # Use the InferenceClient for making the request
     try:
-        # Create a generator for the streaming response
-        stream = client.chat_completion(
-            model=model_to_use,
-            messages=messages,
             stream=True,
-            **parameters
         )
-        print("Received tokens: ", end="", flush=True)
-        # Process the streaming response
-        for chunk in stream:
             if hasattr(chunk, 'choices') and len(chunk.choices) > 0:
-                # Extract the content from the response
-                if hasattr(chunk.choices[0], 'delta') and hasattr(chunk.choices[0].delta, 'content'):
-                    token_text = chunk.choices[0].delta.content
-                    if token_text:
-                        print(token_text, end="", flush=True)
-                        response += token_text
-                        yield response
-        print()
-    except Exception as e:
-        print(f"Error during inference: {e}")
-        response += f"\nError: {str(e)}"
-        yield response
-    print("Completed response generation.")
 # GRADIO UI
-with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
-    # Create the chatbot component
     chatbot = gr.Chatbot(
         height=600,
         show_copy_button=True,
-        placeholder="Select a model and begin chatting. Now supports multiple inference providers, multimodal inputs, and MCP tools",
-        layout="panel"
-    )
-    print("Chatbot interface created.")
-    # Multimodal textbox for messages (combines text and file uploads)
-    msg = gr.MultimodalTextbox(
-        placeholder="Type a message or upload images...",
-        show_label=False,
-        container=False,
-        scale=12,
-        file_types=["image"],
-        file_count="multiple",
-        sources=["upload"]
     )
-    # Create accordion for settings
-    with gr.Accordion("Settings", open=False):
-        # System message
-        system_message_box = gr.Textbox(
-            value="You are a helpful AI assistant that can understand images and text.",
-            placeholder="You are a helpful assistant.",
-            label="System Prompt"
         )
-        # Generation parameters
         with gr.Row():
-            with gr.Column():
-                max_tokens_slider = gr.Slider(
-                    minimum=1,
-                    maximum=4096,
-                    value=512,
-                    step=1,
-                    label="Max tokens"
-                )
-                temperature_slider = gr.Slider(
-                    minimum=0.1,
-                    maximum=4.0,
-                    value=0.7,
-                    step=0.1,
-                    label="Temperature"
-                )
-                top_p_slider = gr.Slider(
-                    minimum=0.1,
-                    maximum=1.0,
-                    value=0.95,
-                    step=0.05,
-                    label="Top-P"
-                )
-            with gr.Column():
-                frequency_penalty_slider = gr.Slider(
-                    minimum=-2.0,
-                    maximum=2.0,
-                    value=0.0,
-                    step=0.1,
-                    label="Frequency Penalty"
-                )
-                seed_slider = gr.Slider(
-                    minimum=-1,
-                    maximum=65535,
-                    value=-1,
-                    step=1,
-                    label="Seed (-1 for random)"
-                )
-        # Provider selection
-        providers_list = [
-            "hf-inference",  # Default Hugging Face Inference
-            "cerebras",      # Cerebras provider
-            "together",      # Together AI
-            "sambanova",     # SambaNova
-            "novita",        # Novita AI
-            "cohere",        # Cohere
-            "fireworks-ai",  # Fireworks AI
-            "hyperbolic",    # Hyperbolic
-            "nebius",        # Nebius
         ]
-        provider_radio = gr.Radio(
-            choices=providers_list,
-            value="hf-inference",
-            label="Inference Provider",
-        )
-        # New BYOK textbox
-        byok_textbox = gr.Textbox(
-            value="",
-            label="BYOK (Bring Your Own Key)",
-            info="Enter a custom Hugging Face API key here. When empty, only 'hf-inference' provider can be used.",
-            placeholder="Enter your Hugging Face API token",
-            type="password"  # Hide the API key for security
-        )
-        # Custom model box
-        custom_model_box = gr.Textbox(
-            value="",
-            label="Custom Model",
-            info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model.",
-            placeholder="meta-llama/Llama-3.3-70B-Instruct"
-        )
-        # Model search
-        model_search_box = gr.Textbox(
-            label="Filter Models",
-            placeholder="Search for a featured model...",
-            lines=1
-        )
-        # Featured models list
-        # Updated to include multimodal models
-        models_list = [
-            "meta-llama/Llama-3.2-11B-Vision-Instruct",
-            "meta-llama/Llama-3.3-70B-Instruct",
-            "meta-llama/Llama-3.1-70B-Instruct",
-            "meta-llama/Llama-3.0-70B-Instruct",
-            "meta-llama/Llama-3.2-3B-Instruct",
-            "meta-llama/Llama-3.2-1B-Instruct",
-            "meta-llama/Llama-3.1-8B-Instruct",
-            "NousResearch/Hermes-3-Llama-3.1-8B",
-            "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
             "mistralai/Mistral-Nemo-Instruct-2407",
-            "mistralai/Mixtral-8x7B-Instruct-v0.1",
-            "mistralai/Mistral-7B-Instruct-v0.3",
-            "mistralai/Mistral-7B-Instruct-v0.2",
-            "Qwen/Qwen3-235B-A22B",
-            "Qwen/Qwen3-32B",
-            "Qwen/Qwen2.5-72B-Instruct",
-            "Qwen/Qwen2.5-3B-Instruct",
-            "Qwen/Qwen2.5-0.5B-Instruct",
-            "Qwen/QwQ-32B",
-            "Qwen/Qwen2.5-Coder-32B-Instruct",
-            "microsoft/Phi-3.5-mini-instruct",
-            "microsoft/Phi-3-mini-128k-instruct",
-            "microsoft/Phi-3-mini-4k-instruct",
         ]
-        featured_model_radio = gr.Radio(
-            label="Select a model below",
-            choices=models_list,
-            value="meta-llama/Llama-3.2-11B-Vision-Instruct",  # Default to a multimodal model
-            interactive=True
-        )
-        gr.Markdown("[View all Text-to-Text models](https://huggingface.co/models?inference_provider=all&pipeline_tag=text-generation&sort=trending) | [View all multimodal models](https://huggingface.co/models?inference_provider=all&pipeline_tag=image-text-to-text&sort=trending)")
-    # Create accordion for MCP settings
-    with gr.Accordion("MCP Settings", open=False):
-        mcp_enabled_checkbox = gr.Checkbox(
-            label="Enable MCP Support",
-            value=False,
-            info="Enable Model Context Protocol support to connect to external tools and services"
-        )
         with gr.Row():
-            mcp_server_url = gr.Textbox(
-                label="MCP Server URL",
-                placeholder="https://example-mcp-server.hf.space/gradio_api/mcp/sse",
-                info="URL of the MCP server to connect to"
-            )
-            mcp_server_name = gr.Textbox(
-                label="Server Name",
-                placeholder="Optional name for this server",
-                info="A friendly name to identify this server"
-            )
-            mcp_connect_button = gr.Button("Connect to MCP Server")
-        mcp_status = gr.Textbox(
-            label="MCP Connection Status",
-            placeholder="No MCP servers connected",
-            interactive=False
-        )
-        active_mcp_servers = gr.Dropdown(
-            label="Active MCP Servers",
-            choices=[],
-            multiselect=True,
-            info="Select which MCP servers to use in chat"
         )
-        mcp_mode = gr.Radio(
-            label="MCP Interaction Mode",
-            choices=["Natural Language", "Command Mode"],
-            value="Natural Language",
-            info="Choose how to interact with MCP tools"
         )
-        gr.Markdown("""
-        ### MCP Interaction Modes
-        **Natural Language Mode**: Simply describe what you want in plain English. Examples:
-        ```
-        Please convert the text "Hello world" to speech
-        Can you read this text aloud: "Welcome to MCP integration"
-        ```
-        **Command Mode**: Use structured commands (for advanced users)
-        ```
-        /mcp <server_name> <tool_name> {"param1": "value1", "param2": "value2"}
-        ```
-        Example:
-        ```
-        /mcp kokoroTTS text_to_audio {"text": "Hello world", "speed": 1.0}
-        ```
-        """)
-    # Chat history state
-    chat_history = gr.State([])
-    # Function to filter models
-    def filter_models(search_term):
-        print(f"Filtering models with search term: {search_term}")
-        filtered = [m for m in models_list if search_term.lower() in m.lower()]
-        print(f"Filtered models: {filtered}")
-        return gr.update(choices=filtered)
-    # Function to set custom model from radio
-    def set_custom_model_from_radio(selected):
-        print(f"Featured model selected: {selected}")
-        return selected
-    # Function to connect to MCP server
-    def connect_mcp_server(url, name):
-        server_name, status = connect_to_mcp_server(url, name)
-        # Update the active servers dropdown
-        servers = list(mcp_connections.keys())
-        # Return the status message and updated server list
-        return status, gr.update(choices=servers)
-    # Function for the chat interface
-    def user(user_message, history):
-        # Debug logging for troubleshooting
-        print(f"User message received: {user_message}")
-        # Skip if message is empty (no text and no files)
-        if not user_message or (not user_message.get("text") and not user_message.get("files")):
-            print("Empty message, skipping")
-            return history
-        # Prepare multimodal message format
-        text_content = user_message.get("text", "").strip()
-        files = user_message.get("files", [])
-        print(f"Text content: {text_content}")
-        print(f"Files: {files}")
-        # If both text and files are empty, skip
-        if not text_content and not files:
-            print("No content to display")
-            return history
-        # Add message with images to history
-        if files and len(files) > 0:
-            # Add text message first if it exists
-            if text_content:
-                # Add a separate text message
-                print(f"Adding text message: {text_content}")
-                history.append([text_content, None])
-            # Then add each image file separately
-            for file_path in files:
-                if file_path and isinstance(file_path, str):
-                    print(f"Adding image: {file_path}")
-                    # Add image as a separate message with no text
-                    history.append([f"![Image]({file_path})", None])
-            return history
-        else:
-            # For text-only messages
-            print(f"Adding text-only message: {text_content}")
-            history.append([text_content, None])
-            return history
-    # Define bot response function
-    def bot(history, system_msg, max_tokens, temperature, top_p, freq_penalty, seed, provider, api_key, custom_model, search_term, selected_model, mcp_enabled, selected_servers, mcp_interaction_mode):
-        # Check if history is valid
-        if not history or len(history) == 0:
-            print("No history to process")
-            return history
-        # Get the most recent message and detect if it's an image
-        user_message = history[-1][0]
-        print(f"Processing user message: {user_message}")
-        is_image = False
-        image_path = None
-        text_content = user_message
-        # Check if this is an image message (marked with ![Image])
-        if isinstance(user_message, str) and user_message.startswith("![Image]("):
-            is_image = True
-            # Extract image path from markdown format ![Image](path)
-            image_path = user_message.replace("![Image](", "").replace(")", "")
-            print(f"Image detected: {image_path}")
-            text_content = ""  # No text for image-only messages
-        # Look back for text context if this is an image
-        text_context = ""
-        if is_image and len(history) > 1:
-            # Use the previous message as context if it's text
-            prev_message = history[-2][0]
-            if isinstance(prev_message, str) and not prev_message.startswith("![Image]("):
-                text_context = prev_message
-                print(f"Using text context from previous message: {text_context}")
-        # Process message through respond function
-        history[-1][1] = ""
-        # Use either the image or text for the API
-        if is_image:
-            # For image messages
-            for response in respond(
-                text_context,           # Text context from previous message if any
-                [image_path],           # Current image
-                history[:-1],           # Previous history
-                system_msg,
-                max_tokens,
-                temperature,
-                top_p,
-                freq_penalty,
-                seed,
-                provider,
-                api_key,
-                custom_model,
-                search_term,
-                selected_model,
-                mcp_enabled,
-                selected_servers,
-                mcp_interaction_mode
-            ):
-                history[-1][1] = response
-                yield history
-        else:
-            # For text-only messages
-            for response in respond(
-                text_content,           # Text message
-                None,                   # No image
-                history[:-1],           # Previous history
-                system_msg,
-                max_tokens,
-                temperature,
-                top_p,
-                freq_penalty,
-                seed,
-                provider,
-                api_key,
-                custom_model,
-                search_term,
-                selected_model,
-                mcp_enabled,
-                selected_servers,
-                mcp_interaction_mode
-            ):
-                history[-1][1] = response
-                yield history
-    # Update function for provider validation based on BYOK
-    def validate_provider(api_key, provider):
-        if not api_key.strip() and provider != "hf-inference":
-            return gr.update(value="hf-inference")
-        return gr.update(value=provider)
-    # Event handlers
-    msg.submit(
-        user,
-        [msg, chatbot],
-        [chatbot],
-        queue=False
-    ).then(
-        bot,
-        [chatbot, system_message_box, max_tokens_slider, temperature_slider, top_p_slider,
-         frequency_penalty_slider, seed_slider, provider_radio, byok_textbox, custom_model_box,
-         model_search_box, featured_model_radio, mcp_enabled_checkbox, active_mcp_servers, mcp_mode],
-        [chatbot]
-    ).then(
-        lambda: {"text": "", "files": []},  # Clear inputs after submission
-        None,
-        [msg]
-    )
-    # Connect MCP connect button
-    mcp_connect_button.click(
-        connect_mcp_server,
-        [mcp_server_url, mcp_server_name],
-        [mcp_status, active_mcp_servers]
-    )
-    # Connect the model filter to update the radio choices
-    model_search_box.change(
-        fn=filter_models,
-        inputs=model_search_box,
-        outputs=featured_model_radio
     )
-    print("Model search box change event linked.")
-    # Connect the featured model radio to update the custom model box
-    featured_model_radio.change(
-        fn=set_custom_model_from_radio,
-        inputs=featured_model_radio,
-        outputs=custom_model_box
-    )
-    print("Featured model radio button change event linked.")
-    # Connect the BYOK textbox to validate provider selection
-    byok_textbox.change(
-        fn=validate_provider,
-        inputs=[byok_textbox, provider_radio],
-        outputs=provider_radio
     )
-    print("BYOK textbox change event linked.")
-    # Also validate provider when the radio changes to ensure consistency
-    provider_radio.change(
-        fn=validate_provider,
-        inputs=[byok_textbox, provider_radio],
-        outputs=provider_radio
-    )
-    print("Provider radio button change event linked.")
-print("Gradio interface initialized.")
 if __name__ == "__main__":
-    print("Launching the demo application.")
-    demo.launch(show_api=True, mcp_server=False)  # Not launching as MCP server as we're the client

 import base64
 from PIL import Image
 import io
+import requests # Keep for potential future use, though not directly used in core logic now
+from smolagents.mcp_client import MCPClient # Ensure this is correctly installed and importable
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
+if ACCESS_TOKEN:
+    print("Access token loaded from HF_TOKEN environment variable.")
+else:
+    print("Warning: HF_TOKEN environment variable not set. Some operations might fail.")
 # Function to encode image to base64
+def encode_image(image_path_or_pil):
+    if not image_path_or_pil:
+        print("No image path or PIL Image provided")
         return None
     try:
+        if isinstance(image_path_or_pil, Image.Image):
+            image = image_path_or_pil
+            print(f"Encoding PIL Image object.")
+        elif isinstance(image_path_or_pil, str):
+            print(f"Encoding image from path: {image_path_or_pil}")
+            if not os.path.exists(image_path_or_pil):
+                print(f"Error: Image file not found at {image_path_or_pil}")
+                return None
+            image = Image.open(image_path_or_pil)
         else:
+            print(f"Error: Unsupported image input type: {type(image_path_or_pil)}")
+            return None
         if image.mode == 'RGBA':
             image = image.convert('RGB')
         buffered = io.BytesIO()
+        image.save(buffered, format="JPEG") # Or PNG if preferred, ensure consistency
         img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
+        print("Image encoded successfully to base64.")
         return img_str
     except Exception as e:
         print(f"Error encoding image: {e}")
+        import traceback
+        traceback.print_exc()
         return None
 # Dictionary to store active MCP connections
 def connect_to_mcp_server(server_url, server_name=None):
     """Connect to an MCP server and return available tools"""
     if not server_url:
+        return None, "No server URL provided. Please enter a valid URL."
     try:
+        print(f"Attempting to connect to MCP server at URL: {server_url}")
+        client = MCPClient({"url": server_url}) # This might block or raise if connection fails
+        tools = client.get_tools() # This should also be a blocking call until tools are fetched
+        name = server_name.strip() if server_name and server_name.strip() else f"Server_{len(mcp_connections) + 1}"
         mcp_connections[name] = {"client": client, "tools": tools, "url": server_url}
+        print(f"Successfully connected to MCP server: {name} with {len(tools)} tools.")
+        return name, f"Successfully connected to '{name}' ({server_url}). Found {len(tools)} tool(s)."
     except Exception as e:
+        print(f"Error connecting to MCP server at {server_url}: {e}")
+        import traceback
+        traceback.print_exc()
+        return None, f"Error connecting to MCP server '{server_url}': {str(e)}"
 def list_mcp_tools(server_name):
     """List available tools for a connected MCP server"""
     if server_name not in mcp_connections:
+        return "Server not connected or name not found."
     tools = mcp_connections[server_name]["tools"]
     tool_info = []
     for tool in tools:
+        tool_info.append(f"- **{tool.name}**: {tool.description}")
     if not tool_info:
+        return "No tools available for this server."
     return "\n".join(tool_info)
 def call_mcp_tool(server_name, tool_name, **kwargs):
+    """Call a specific tool from an MCP server and process its result."""
     if server_name not in mcp_connections:
+        return {"type": "error", "message": f"Server '{server_name}' not connected."}
+    mcp_client_instance = mcp_connections[server_name]["client"]
     try:
+        print(f"Calling MCP tool: {server_name}.{tool_name} with args: {kwargs}")
+        # Assuming mcp_client_instance.call_tool returns an mcp.client.tool.ToolResult object
+        tool_result = mcp_client_instance.call_tool(tool_name, kwargs)
+        if tool_result and tool_result.content:
+            # Process multiple blocks if present, concatenating text or prioritizing audio
+            audio_block_found = None
+            text_parts = []
+            json_parts = []
+            other_parts = []
+            for block in tool_result.content:
+                if hasattr(block, 'uri') and isinstance(block.uri, str) and block.uri.startswith('data:audio/'):
+                    audio_block_found = {
+                        "type": "audio",
+                        "data_uri": block.uri,
+                        "name": getattr(block, 'name', 'audio_output.wav')
+                    }
+                    break # Prioritize first audio block
+                elif hasattr(block, 'text') and block.text is not None:
+                    text_parts.append(str(block.text))
+                elif hasattr(block, 'json_data') and block.json_data is not None:
+                    try:
+                        json_parts.append(json.dumps(block.json_data, indent=2))
+                    except TypeError:
+                        json_parts.append(str(block.json_data)) # Fallback
+                else:
+                    other_parts.append(str(block))
+            if audio_block_found:
+                print(f"MCP tool returned audio: {audio_block_found['name']}")
+                return audio_block_found
+            elif text_parts:
+                full_text = "\n".join(text_parts)
+                print(f"MCP tool returned text: {full_text[:100]}...")
+                return {"type": "text", "value": full_text}
+            elif json_parts:
+                full_json_str = "\n".join(json_parts)
+                print(f"MCP tool returned JSON string.")
+                return {"type": "json_string", "value": full_json_str} # Treat as string for display
+            elif other_parts:
+                print(f"MCP tool returned other content types.")
+                return {"type": "text", "value": "\n".join(other_parts)}
+            else:
+                print("MCP tool executed but returned no interpretable primary content blocks.")
+                return {"type": "text", "value": "Tool executed, but returned no standard content (audio/text/json)."}
+        print("MCP tool executed, but ToolResult or its content was empty.")
+        return {"type": "text", "value": "Tool executed, but returned no content."}
     except Exception as e:
+        print(f"Error calling MCP tool '{tool_name}' or processing its result: {e}")
+        import traceback
+        traceback.print_exc()
+        return {"type": "error", "message": f"Error during MCP tool '{tool_name}' execution: {str(e)}"}
+def analyze_message_for_tool_call(message, active_mcp_servers, llm_client, llm_model_to_use, base_system_message):
     """Analyze a message to determine if an MCP tool should be called"""
+    if not message or not message.strip() or not active_mcp_servers:
         return None, None
+    tool_info_for_llm = []
+    for server_name_iter in active_mcp_servers:
+        if server_name_iter in mcp_connections:
+            server_tools = mcp_connections[server_name_iter]["tools"]
             for tool in server_tools:
+                # Provide a concise description for the LLM
+                tool_info_for_llm.append(
+                    f"- Server: '{server_name_iter}', Tool: '{tool.name}', Description: '{tool.description}'"
+                )
+    if not tool_info_for_llm:
+        print("No active MCP tools found for analysis.")
         return None, None
+    tools_string_for_llm = "\n".join(tool_info_for_llm)
+    # More robust system prompt for tool detection
+    analysis_system_prompt = f"""You are an expert assistant that determines if a user's request requires an external tool.
+You have access to the following tools:
+{tools_string_for_llm}
+Based on the user's message, decide if any of these tools are appropriate.
+If a tool is needed, respond ONLY with a JSON object containing:
+"server_name": The name of the server providing the tool.
+"tool_name": The name of the tool to be called.
+"parameters": A dictionary of parameters for the tool, inferred from the user's message. Ensure parameter names match what the tool expects (often 'text', 'query', 'speed', etc.).
+If NO tool is needed, respond ONLY with the exact string: NO_TOOL_NEEDED
+Example 1 (TTS tool):
+User: "Can you say 'hello world' for me at a slightly faster speed?"
+Response: {{"server_name": "kokoroTTS", "tool_name": "text_to_audio", "parameters": {{"text": "hello world", "speed": 1.2}}}}
+Example 2 (File tool):
+User: "Read the content of my_document.txt"
+Response: {{"server_name": "FileSystemServer", "tool_name": "readFile", "parameters": {{"path": "my_document.txt"}}}}
+Example 3 (No tool):
+User: "What's the weather like today?" (Assuming no weather tool is listed)
+Response: NO_TOOL_NEEDED
+User's current message is: "{message}"
+Now, provide your decision:"""
     try:
+        print(f"Sending tool analysis request to LLM model: {llm_model_to_use}")
+        response = llm_client.chat_completion(
+            model=llm_model_to_use,
             messages=[
+                # {"role": "system", "content": base_system_message}, # Optional: provide original system message for context
+                {"role": "user", "content": analysis_system_prompt} # The prompt itself is the user message here
             ],
+            temperature=0.1, # Low temperature for deterministic tool selection
+            max_tokens=300,
+            stop=["\n\n"] # Stop early if LLM adds extra verbiage
         )
+        analysis_text = response.choices[0].message.content.strip()
+        print(f"LLM tool analysis response: '{analysis_text}'")
+        if "NO_TOOL_NEEDED" in analysis_text or analysis_text == "NO_TOOL_NEEDED":
+            print("LLM determined no tool needed.")
             return None, None
+        # Try to extract JSON from the response (handle potential markdown code blocks)
+        if analysis_text.startswith("```json"):
+            analysis_text = analysis_text.replace("```json", "").replace("```", "").strip()
+        elif analysis_text.startswith("```"):
+             analysis_text = analysis_text.replace("```", "").strip()
+        json_start = analysis_text.find("{")
+        json_end = analysis_text.rfind("}") + 1
+        if json_start == -1 or json_end <= json_start:
+            print(f"Could not find valid JSON object in LLM response: '{analysis_text}'")
             return None, None
+        json_str = analysis_text[json_start:json_end]
         try:
+            tool_call_data = json.loads(json_str)
+            if "server_name" in tool_call_data and "tool_name" in tool_call_data:
+                print(f"LLM suggested tool call: {tool_call_data}")
+                return tool_call_data.get("server_name"), {
+                    "tool_name": tool_call_data.get("tool_name"),
+                    "parameters": tool_call_data.get("parameters", {})
+                }
+            else:
+                print(f"LLM response parsed as JSON but missing server_name or tool_name: {json_str}")
+                return None, None
+        except json.JSONDecodeError as e:
+            print(f"Failed to parse tool call JSON from LLM response: '{json_str}'. Error: {e}")
             return None, None
     except Exception as e:
+        print(f"Error during LLM analysis for tool calls: {str(e)}")
+        import traceback
+        traceback.print_exc()
         return None, None
 def respond(
+    message_text_input, # From user function, this is just the text part
+    message_files_input, # From user function, this is the list of file paths
+    history_tuples: list[tuple[tuple[str, list], str]], # History: list of ((user_text, [user_files]), assistant_response)
+    system_message_prompt,
+    max_tokens_val,
+    temperature_val,
+    top_p_val,
+    frequency_penalty_val,
+    seed_val,
+    provider_choice,
+    custom_api_key_val,
+    custom_model_id,
+    # model_search_term_val, # Not directly used in respond, but kept for signature consistency if UI passes it
+    selected_hf_model_id,
+    mcp_is_enabled,
+    active_mcp_server_names, # List of selected server names
+    mcp_interaction_mode_choice
 ):
+    print(f"\n--- RESPOND FUNCTION CALLED ---")
+    print(f"Message Text: '{message_text_input}'")
+    print(f"Message Files: {message_files_input}")
+    # print(f"History (first item type if exists): {type(history_tuples) if history_tuples else 'No history'}")
+    print(f"System Prompt: '{system_message_prompt}'")
+    print(f"Provider: {provider_choice}, MCP Enabled: {mcp_is_enabled}, MCP Mode: {mcp_interaction_mode_choice}")
+    print(f"Active MCP Servers: {active_mcp_server_names}")
+    token_to_use_for_llm = custom_api_key_val if custom_api_key_val.strip() else ACCESS_TOKEN
+    if not token_to_use_for_llm and provider_choice != "hf-inference": # Basic check
+        yield "Error: API Key required for non-hf-inference providers."
+        return
+    llm_client_instance = InferenceClient(token=token_to_use_for_llm, provider=provider_choice)
+    current_seed = None if seed_val == -1 else seed_val
+    model_id_for_llm = custom_model_id.strip() if custom_model_id.strip() else selected_hf_model_id
+    print(f"Using LLM model: {model_id_for_llm} via {provider_choice}")
+    # --- MCP Tool Call Logic ---
+    if mcp_is_enabled and (message_text_input or message_files_input) and active_mcp_server_names:
+        tool_call_output_dict = None
+        invoked_tool_display_name = "a tool"
+        invoked_server_display_name = "an MCP server"
+        if message_text_input and message_text_input.startswith("/mcp"):
+            print("Processing explicit MCP command...")
+            command_parts = message_text_input.split(" ", 3)
             if len(command_parts) < 3:
+                yield "Invalid MCP command. Format: /mcp <server_name> <tool_name> [arguments_json]"
+                return
+            _, server_name_cmd, tool_name_cmd = command_parts[:3]
+            invoked_server_display_name = server_name_cmd
+            invoked_tool_display_name = tool_name_cmd
+            args_json_str = "{}" if len(command_parts) < 4 else command_parts
             try:
+                args_dict_cmd = json.loads(args_json_str)
+                tool_call_output_dict = call_mcp_tool(invoked_server_display_name, invoked_tool_display_name, **args_dict_cmd)
             except json.JSONDecodeError:
+                yield f"Invalid JSON arguments for MCP command: {args_json_str}"
+                return
+            except Exception as e_cmd:
+                yield f"Error preparing MCP command: {str(e_cmd)}"
+                return
+        elif mcp_interaction_mode_choice == "Natural Language":
+            print("Analyzing message for natural language tool call...")
+            # For natural language, primarily use message_text_input. Files could be context later.
+            detected_server_nl, tool_info_nl = analyze_message_for_tool_call(
+                message_text_input,
+                active_mcp_server_names,
+                llm_client_instance,
+                model_id_for_llm,
+                system_message_prompt
             )
+            if detected_server_nl and tool_info_nl and tool_info_nl.get("tool_name"):
+                invoked_server_display_name = detected_server_nl
+                invoked_tool_display_name = tool_info_nl['tool_name']
+                tool_params_nl = tool_info_nl.get("parameters", {})
+                tool_call_output_dict = call_mcp_tool(invoked_server_display_name, invoked_tool_display_name, **tool_params_nl)
+        # --- Handle MCP Tool Result (if a tool was called) ---
+        if tool_call_output_dict:
+            response_message_parts = [f"I attempted to use the **{invoked_tool_display_name}** tool from **{invoked_server_display_name}**."]
+            if tool_call_output_dict.get("type") == "audio":
+                audio_data_uri = tool_call_output_dict["data_uri"]
+                audio_html_tag = f"<audio controls src='{audio_data_uri}' title='{tool_call_output_dict.get('name', 'Audio Output')}'></audio>"
+                response_message_parts.append(f"Here's the audio output:\n{audio_html_tag}")
+            elif tool_call_output_dict.get("type") == "text":
+                response_message_parts.append(f"\nResult:\n```\n{tool_call_output_dict['value']}\n```")
+            elif tool_call_output_dict.get("type") == "json_string": # Changed from "json" to avoid confusion with dict
+                response_message_parts.append(f"\nResult (JSON):\n```json\n{tool_call_output_dict['value']}\n```")
+            elif tool_call_output_dict.get("type") == "error":
+                response_message_parts.append(f"\nUnfortunately, there was an error: {tool_call_output_dict['message']}")
+            else: # Fallback for unexpected result structure
+                response_message_parts.append(f"\nThe tool returned: {str(tool_call_output_dict)}")
+            yield "\n".join(response_message_parts)
+            return # End here if a tool was called and processed
+    # --- Regular LLM Response Logic (if no MCP tool was successfully called and returned primary content) ---
+    print("Proceeding with standard LLM response generation.")
+    # Prepare current user message for LLM (multimodal if files exist)
+    current_user_llm_content = []
+    if message_text_input and message_text_input.strip():
+        current_user_llm_content.append({"type": "text", "text": message_text_input})
+    if message_files_input:
+        for file_path in message_files_input:
+            if file_path: # file_path is already the actual temp path from gr.File or gr.Image
+                encoded_img_str = encode_image(file_path)
+                if encoded_img_str:
+                    current_user_llm_content.append({
+                        "type": "image_url",
+                        "image_url": {"url": f"data:image/jpeg;base64,{encoded_img_str}"}
                     })
+                else:
+                    print(f"Warning: Failed to encode image {file_path} for LLM.")
+    if not current_user_llm_content:
+        print("No content (text or valid files) in current user message for LLM.")
+        yield "" # Or some indicator of no action
+        return
+    # Augment system message with MCP tool info if enabled
+    augmented_sys_msg = system_message_prompt
+    if mcp_is_enabled and active_mcp_server_names:
+        mcp_tool_descriptions_for_llm = []
+        for server_name_iter in active_mcp_server_names:
+            if server_name_iter in mcp_connections:
+                # Use the more detailed list_mcp_tools output for the system prompt if desired
+                tools_list_str = list_mcp_tools(server_name_iter) # This returns markdown
+                mcp_tool_descriptions_for_llm.append(f"From server '{server_name_iter}':\n{tools_list_str}")
+        if mcp_tool_descriptions_for_llm:
+            full_tools_info_str = "\n\n".join(mcp_tool_descriptions_for_llm)
+            interaction_advice = ""
+            if mcp_interaction_mode_choice == "Command Mode":
+                interaction_advice = "The user can invoke these tools using '/mcp <server_name> <tool_name> <json_args>'."
+            # For Natural Language mode, the LLM doesn't need explicit instruction in system prompt
+            # as `analyze_message_for_tool_call` handles that part.
+            augmented_sys_msg += f"\n\nYou also have access to the following external tools via Model Context Protocol (MCP):\n{full_tools_info_str}\n{interaction_advice}"
+    # Prepare messages list for LLM
+    messages_for_llm_api = [{"role": "system", "content": augmented_sys_msg}]
+    for hist_user_turn, hist_assist_response in history_tuples:
+        hist_user_text, hist_user_files = hist_user_turn # Unpack ((text, [files]))
+        history_user_llm_content = []
+        if hist_user_text and hist_user_text.strip():
+            history_user_llm_content.append({"type": "text", "text": hist_user_text})
+        if hist_user_files:
+            for hist_file_path in hist_user_files:
+                encoded_hist_img = encode_image(hist_file_path)
+                if encoded_hist_img:
+                    history_user_llm_content.append({
+                        "type": "image_url",
+                        "image_url": {"url": f"data:image/jpeg;base64,{encoded_hist_img}"}
+                    })
+        if history_user_llm_content: # Only add if there's actual content
+            messages_for_llm_api.append({"role": "user", "content": history_user_llm_content})
+        if hist_assist_response and hist_assist_response.strip():
+            messages_for_llm_api.append({"role": "assistant", "content": hist_assist_response})
+    messages_for_llm_api.append({"role": "user", "content": current_user_llm_content})
+    # print(f"Final messages for LLM API: {json.dumps(messages_for_llm_api, indent=2)}")
+    llm_parameters = {
+        "max_tokens": max_tokens_val, "temperature": temperature_val, "top_p": top_p_val,
+        "frequency_penalty": frequency_penalty_val,
     }
+    if current_seed is not None:
+        llm_parameters["seed"] = current_seed
+    print(f"Sending request to LLM: Model={model_id_for_llm}, Params={llm_parameters}")
+    streamed_response_text = ""
     try:
+        llm_stream = llm_client_instance.chat_completion(
+            model=model_id_for_llm,
+            messages=messages_for_llm_api,
             stream=True,
+            **llm_parameters
         )
+        # print("Streaming LLM response: ", end="", flush=True)
+        for chunk in llm_stream:
             if hasattr(chunk, 'choices') and len(chunk.choices) > 0:
+                delta = chunk.choices.delta
+                if hasattr(delta, 'content') and delta.content:
+                    token = delta.content
+                    # print(token, end="", flush=True)
+                    streamed_response_text += token
+                    yield streamed_response_text
+        # print("\nLLM Stream finished.")
+    except Exception as e_llm:
+        error_msg = f"Error during LLM inference: {str(e_llm)}"
+        print(error_msg)
+        import traceback
+        traceback.print_exc()
+        streamed_response_text += f"\n{error_msg}" # Append error to existing stream if any
+        yield streamed_response_text
+    print(f"--- RESPOND FUNCTION COMPLETED ---")
 # GRADIO UI
+with gr.Blocks(theme="Nymbo/Nymbo_Theme", title="Serverless TextGen Hub + MCP") as demo:
+    gr.Markdown("# Serverless TextGen Hub with MCP Client")
     chatbot = gr.Chatbot(
+        label="Chat",
         height=600,
         show_copy_button=True,
+        placeholder="Select a model, connect MCP servers (optional), and start chatting!",
+        bubble_full_width=False,
+        avatar_images=(None, "https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo-square.png")
     )
+    with gr.Row():
+        msg_textbox = gr.MultimodalTextbox( # Changed from gr.Textbox to gr.MultimodalTextbox
+            placeholder="Type a message or upload images... (Use /mcp for commands)",
+            show_label=False,
+            container=False,
+            scale=12,
+            file_types=["image"], # Can add more types like "audio", "video" if supported by models
+            file_count="multiple" # Allow multiple image uploads
+        )
+        # submit_button = gr.Button("Send", variant="primary", scale=1, min_width=100) # Optional explicit send button
+    with gr.Accordion("LLM Settings", open=False):
+        system_message_prompt_box = gr.Textbox(
+            value="You are a helpful and versatile AI assistant. You can understand text and images. If you have access to MCP tools, you can use them when appropriate or when the user asks.",
+            label="System Prompt", lines=3
         )
         with gr.Row():
+            with gr.Column(scale=1):
+                max_tokens_slider_ui = gr.Slider(minimum=128, maximum=8192, value=1024, step=128, label="Max New Tokens")
+                temperature_slider_ui = gr.Slider(minimum=0.0, maximum=2.0, value=0.7, step=0.05, label="Temperature")
+                top_p_slider_ui = gr.Slider(minimum=0.0, maximum=1.0, value=0.95, step=0.05, label="Top-P (Nucleus Sampling)")
+            with gr.Column(scale=1):
+                frequency_penalty_slider_ui = gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty")
+                seed_slider_ui = gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)")
+        providers_list_ui = [
+            "hf-inference", "cerebras", "together", "sambanova", "novita",
+            "cohere", "fireworks-ai", "hyperbolic", "nebius",
         ]
+        provider_radio_ui = gr.Radio(choices=providers_list_ui, value="hf-inference", label="Inference Provider")
+        byok_textbox_ui = gr.Textbox(label="Your Hugging Face API Key (Optional)", placeholder="Enter HF Token if using non-hf-inference providers or private models", type="password")
+        custom_model_id_box = gr.Textbox(label="Custom Model ID (Overrides selection below)", placeholder="e.g., meta-llama/Llama-3-8B-Instruct")
+        model_search_box_ui = gr.Textbox(label="Filter Featured Models", placeholder="Search...", lines=1)
+        # More diverse model list, including some known multimodal ones
+        featured_models_list_data = [
+            "meta-llama/Meta-Llama-3.1-8B-Instruct", # Good default
+            "meta-llama/Meta-Llama-3.1-70B-Instruct",
             "mistralai/Mistral-Nemo-Instruct-2407",
+            "mistralai/Mixtral-8x22B-Instruct-v0.1",
+            "Qwen/Qwen2-7B-Instruct",
+            "microsoft/Phi-3-medium-128k-instruct",
+            # Multimodal
+            "Salesforce/blip-image-captioning-large", # Example, might not be chat
+            "llava-hf/llava-1.5-7b-hf", # LLaVA example
+            "microsoft/kosmos-2-patch14-224", # Kosmos-2
+            "google/paligemma-3b-mix-448", # PaliGemma
         ]
+        featured_model_radio_ui = gr.Radio(label="Select a Featured Model", choices=featured_models_list_data, value="meta-llama/Meta-Llama-3.1-8B-Instruct", interactive=True)
+        gr.Markdown("Tip: For multimodal chat, ensure selected model supports image inputs (e.g., LLaVA, PaliGemma, Kosmos-2).")
+    with gr.Accordion("MCP Client Settings", open=False):
+        mcp_enabled_checkbox_ui = gr.Checkbox(label="Enable MCP Support", value=False, info="Connect to external tools and services via MCP.")
         with gr.Row():
+            mcp_server_url_textbox = gr.Textbox(label="MCP Server URL", placeholder="e.g., https://your-mcp-server.hf.space/gradio_api/mcp/sse")
+            mcp_server_name_textbox = gr.Textbox(label="Friendly Server Name (Optional)", placeholder="MyTTS_Server")
+            mcp_connect_button_ui = gr.Button("Connect", variant="secondary")
+        mcp_connection_status_textbox = gr.Textbox(label="MCP Connection Status", placeholder="No MCP servers connected.", interactive=False, lines=2)
+        active_mcp_servers_dropdown = gr.Dropdown(
+            label="Use Tools From (Select Active MCP Servers)", choices=[], multiselect=True,
+            info="Choose which connected servers the LLM can use tools from."
         )
+        mcp_interaction_mode_radio = gr.Radio(
+            label="MCP Interaction Mode", choices=["Natural Language", "Command Mode"], value="Natural Language",
+            info="Natural Language: AI tries to detect tool use. Command Mode: Use '/mcp ...' syntax."
         )
+        gr.Markdown("Example MCP Command: `/mcp MyTTS text_to_audio {\"text\": \"Hello world!\"}`")
+    # --- Event Handlers ---
+    # Store history as list of tuples: [ ((user_text, [user_files]), assistant_response), ... ]
+    chat_history_state = gr.State([])
+    def user_interaction(user_multimodal_input, current_chat_history):
+        user_text = user_multimodal_input["text"] if user_multimodal_input and "text" in user_multimodal_input else ""
+        user_files = user_multimodal_input["files"] if user_multimodal_input and "files" in user_multimodal_input else []
+        # Only add to history if there's text or files
+        if user_text or user_files:
+            current_chat_history.append( ((user_text, user_files), None) ) # Append user turn, assistant response is None initially
+        return current_chat_history, gr.update(value={"text": "", "files": []}) # Clear input textbox
+    def bot_response_generator(
+        current_chat_history, system_prompt, max_tokens, temp, top_p_val, freq_penalty, seed_val,
+        provider_val, api_key_val, custom_model_val, selected_model_val, # Removed search_term as it's not directly used by respond
+        mcp_enabled_val, active_servers_val, mcp_mode_val
+    ):
+        if not current_chat_history or current_chat_history[-1] is not None: # If no user message or last message already has bot response
+            yield current_chat_history # Or simply `return current_chat_history` if not streaming
+            return
+        user_turn_content, _ = current_chat_history[-1] # Get the latest user turn: (text, [files])
+        message_text, message_files = user_turn_content
+        # The history passed to `respond` should be all turns *before* the current one
+        history_for_respond = current_chat_history[:-1]
+        response_stream = respond(
+            message_text, message_files, history_for_respond,
+            system_prompt, max_tokens, temp, top_p_val, freq_penalty, seed_val,
+            provider_val, api_key_val, custom_model_val, selected_model_val,
+            mcp_enabled_val, active_servers_val, mcp_mode_val
+        )
+        full_bot_message = ""
+        for chunk in response_stream:
+            full_bot_message = chunk
+            current_chat_history[-1] = (user_turn_content, full_bot_message) # Update last item's assistant part
+            yield current_chat_history
+    # Link UI components to functions
+    msg_textbox.submit(
+        user_interaction,
+        inputs=[msg_textbox, chat_history_state],
+        outputs=[chat_history_state, msg_textbox] # Update history and clear input
+    ).then(
+        bot_response_generator,
+        inputs=[
+            chat_history_state, system_message_prompt_box, max_tokens_slider_ui, temperature_slider_ui,
+            top_p_slider_ui, frequency_penalty_slider_ui, seed_slider_ui, provider_radio_ui,
+            byok_textbox_ui, custom_model_id_box, featured_model_radio_ui,
+            mcp_enabled_checkbox_ui, active_mcp_servers_dropdown, mcp_interaction_mode_radio
+        ],
+        outputs=[chatbot] # Stream to chatbot
     )
+    # MCP Connection
+    def handle_mcp_connect(url, name_suggestion):
+        if not url or not url.strip():
+            return "MCP Server URL cannot be empty.", gr.update(choices=list(mcp_connections.keys()))
+        _, status_msg = connect_to_mcp_server(url, name_suggestion)
+        # Update dropdown choices with current server names
+        new_choices = list(mcp_connections.keys())
+        # Preserve selected values if they are still valid connections
+        # current_selected = active_mcp_servers_dropdown.value # This might not work directly
+        # new_selected = [s for s in current_selected if s in new_choices]
+        return status_msg, gr.update(choices=new_choices) #, value=new_selected)
+    mcp_connect_button_ui.click(
+        handle_mcp_connect,
+        inputs=[mcp_server_url_textbox, mcp_server_name_textbox],
+        outputs=[mcp_connection_status_textbox, active_mcp_servers_dropdown]
     )
+    # Model Filtering
+    def filter_featured_models(search_query):
+        if not search_query:
+            return gr.update(choices=featured_models_list_data)
+        filtered = [m for m in featured_models_list_data if search_query.lower() in m.lower()]
+        return gr.update(choices=filtered if filtered else ["No models match your search"])
+    model_search_box_ui.change(filter_featured_models, inputs=model_search_box_ui, outputs=featured_model_radio_ui)
+    # Auto-select hf-inference if BYOK is empty and other provider is chosen
+    def validate_api_key_for_provider(api_key_text, current_provider):
+        if not api_key_text.strip() and current_provider != "hf-inference":
+            gr.Warning("API Key needed for non-hf-inference providers. Defaulting to hf-inference.")
+            return gr.update(value="hf-inference")
+        return current_provider # No change if key provided or hf-inference selected
+    byok_textbox_ui.change(validate_api_key_for_provider, inputs=[byok_textbox_ui, provider_radio_ui], outputs=provider_radio_ui)
+    provider_radio_ui.change(validate_api_key_for_provider, inputs=[byok_textbox_ui, provider_radio_ui], outputs=provider_radio_ui)
 if __name__ == "__main__":
+    print("Launching Gradio demo...")
+    demo.queue().launch(debug=True, show_api=False) # mcp_server=False as this is a client app