Spaces:

Nymbo
/

Serverless-TextGen-Hub

Running

App Files Files Community

Nymbo commited on May 18

Commit

02ec239

verified ·

1 Parent(s): 63903e4

Update app.py

Browse files

Files changed (1) hide show

app.py +249 -116

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ from huggingface_hub import InferenceClient
 import os
 import json
 import base64
 from PIL import Image
 import io
@@ -15,7 +16,7 @@ def encode_image(image_path):
         print("No image path provided")
         return None
-    try
         print(f"Encoding image from path: {image_path}")
         # If it's already a PIL Image
@@ -39,9 +40,61 @@ def encode_image(image_path):
         print(f"Error encoding image: {e}")
         return None
 def respond(
     message,
-    image_files,  # Changed parameter name and structure
     history: list[tuple[str, str]],
     system_message,
     max_tokens,
@@ -53,7 +106,9 @@ def respond(
     custom_api_key,
     custom_model,
     model_search_term,
-    selected_model
 ):
     print(f"Received message: {message}")
     print(f"Received {len(image_files) if image_files else 0} images")
@@ -66,6 +121,8 @@ def respond(
     print(f"Selected model (custom_model): {custom_model}")
     print(f"Model search term: {model_search_term}")
     print(f"Selected model from radio: {selected_model}")
     # Determine which token to use
     token_to_use = custom_api_key if custom_api_key.strip() != "" else ACCESS_TOKEN
@@ -209,6 +266,19 @@ def respond(
         response += f"\nError: {str(e)}"
         yield response
     print("Completed response generation.")
 # Function to validate provider selection based on BYOK
@@ -217,13 +287,30 @@ def validate_provider(api_key, provider):
         return gr.update(value="hf-inference")
     return gr.update(value=provider)
 # GRADIO UI
 with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
     # Create the chatbot component
     chatbot = gr.Chatbot(
         height=600,
         show_copy_button=True,
-        placeholder="Select a model and begin chatting. Now supports multiple inference providers and multimodal inputs",
         layout="panel"
     )
     print("Chatbot interface created.")
@@ -336,6 +423,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
         )
         # Featured models list
         models_list = [
             "meta-llama/Llama-3.2-11B-Vision-Instruct",
             "meta-llama/Llama-3.3-70B-Instruct",
@@ -370,42 +458,40 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
         )
         gr.Markdown("[View all Text-to-Text models](https://huggingface.co/models?inference_provider=all&pipeline_tag=text-generation&sort=trending) | [View all multimodal models](https://huggingface.co/models?inference_provider=all&pipeline_tag=image-text-to-text&sort=trending)")
-    # MCP Support Information Accordion
-    with gr.Accordion("MCP Support (for LLMs)", open=False):
-        gr.Markdown("""
-        ### Model Context Protocol (MCP) Support
-        This application can function as an MCP Server, allowing compatible AI models and agents (like Claude Desktop or custom MCP clients) to use its text and image generation capabilities as a tool.
-        When MCP is enabled, Gradio automatically exposes the relevant functions (likely based on the `bot` function in this app) as MCP tools.
-        **To connect an MCP client to this server:**
-        1. Ensure this Gradio application is running.
-        2. Use the following URL for the MCP server in your client configuration:
-           - If running locally: `http://127.0.0.1:7860/gradio_api/mcp/sse`
-           - If deployed on Hugging Face Spaces: `https://YOUR_USERNAME-YOUR_SPACENAME.hf.space/gradio_api/mcp/sse` (replace with your actual Space URL)
-        **Example MCP Client Configuration (`mcp.json` or similar):**
-        ```json
-        {
-          "mcpServers": {
-            "serverlessTextgenHub": {
-              "url": "http://127.0.0.1:7860/gradio_api/mcp/sse"
-            }
-          }
-        }
-        ```
-        **Tool Parameters:**
-        The exposed MCP tool will likely have parameters corresponding to the inputs of the `bot` function (e.g., `history`, `system_msg`, `max_tokens`, `temperature`, `model`, etc.).
-        *   **Important for `history` parameter:** For image inputs, the MCP client might need to format the `history` to include image references in a way the `bot` function can parse (e.g., markdown links `![Image](URL_or_base64_data_uri)` within the history's message part).
-        *   It's highly recommended to inspect the MCP schema for this server to understand the exact tool names, descriptions, and input/output schemas. You can usually find this at: `http://127.0.0.1:7860/gradio_api/mcp/schema` (or the equivalent URL for your deployed Space).
-        This allows for powerful integrations where an AI agent can programmatically request text or multimodal generations from this Serverless-TextGen-Hub.
-        """)
     # Chat history state
     chat_history = gr.State([])
@@ -424,99 +510,134 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
     # Function for the chat interface
     def user(user_message, history):
         print(f"User message received: {user_message}")
         if not user_message or (not user_message.get("text") and not user_message.get("files")):
             print("Empty message, skipping")
-            return history # Return immediately if message is empty
         text_content = user_message.get("text", "").strip()
         files = user_message.get("files", [])
         print(f"Text content: {text_content}")
         print(f"Files: {files}")
-        if not text_content and not files: # Check again after stripping text
             print("No content to display")
             return history
-        # Append text message first if it exists and is not empty
-        if text_content:
-            print(f"Adding text message: {text_content}")
-            history.append([text_content, None])
-        # Then append each image file as a separate message
-        if files:
             for file_path in files:
-                if file_path and isinstance(file_path, str): # Ensure file_path is valid
                     print(f"Adding image: {file_path}")
-                    history.append([f"![Image]({file_path})", None]) # Image as a new message
-        return history
     # Define bot response function
-    def bot(history, system_msg, max_tokens, temperature, top_p, freq_penalty, seed, provider, api_key, custom_model, search_term, selected_model):
-        if not history or not history[-1][0]: # Check if history or last message is empty
-            print("No history or empty last message to process for bot")
-            # Yield an empty update or the history itself to avoid errors
-            # depending on how Gradio handles empty yields.
-            # For safety, just return the history if it's in a bad state.
-            yield history
-            return
-        user_message_content = history[-1][0] # This is the user's latest message (text or image markdown)
-        print(f"Bot processing user message content: {user_message_content}")
-        # Determine if the current turn is primarily about an image or text
-        # This logic assumes images are added as separate history entries like "![Image](path)"
-        # and text prompts might precede them or be separate.
-        current_message_text_for_api = ""
-        current_image_files_for_api = []
-        # Check if the last entry is an image
-        if isinstance(user_message_content, str) and user_message_content.startswith("![Image]("):
-            image_path = user_message_content.replace("![Image](", "").replace(")", "")
-            current_image_files_for_api.append(image_path)
-            print(f"Bot identified image in last history entry: {image_path}")
-            # If it's an image, check the second to last entry for a text prompt
-            if len(history) > 1:
-                prev_content = history[-2][0]
-                if isinstance(prev_content, str) and not prev_content.startswith("![Image]("):
-                    current_message_text_for_api = prev_content
-                    print(f"Bot identified preceding text for image: {current_message_text_for_api}")
-        else: # Last entry is text
-            current_message_text_for_api = user_message_content
-            print(f"Bot identified text in last history entry: {current_message_text_for_api}")
-        # The history sent to `respond` should not include the current turn's input,
-        # as `respond` will add `message` (current_message_text_for_api) to its internal `messages` list.
-        # If an image is present, it's passed via `image_files`.
-        history_for_respond_func = history[:-1] # Pass history *before* the current turn
-        history[-1][1] = "" # Initialize assistant's response for the current turn
-        for response_chunk in respond(
-            message=current_message_text_for_api,
-            image_files=current_image_files_for_api,
-            history=history_for_respond_func, # Pass prior history
-            system_message=system_msg,
-            max_tokens=max_tokens,
-            temperature=temperature,
-            top_p=top_p,
-            frequency_penalty=freq_penalty,
-            seed=seed,
-            provider=provider,
-            custom_api_key=api_key,
-            custom_model=custom_model,
-            model_search_term=search_term, # Though these two might not be directly used by respond if model is fixed
-            selected_model=selected_model
-        ):
-            history[-1][1] = response_chunk
-            yield history
-    # Event handlers
     msg.submit(
         user,
         [msg, chatbot],
@@ -526,7 +647,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
         bot,
         [chatbot, system_message_box, max_tokens_slider, temperature_slider, top_p_slider,
          frequency_penalty_slider, seed_slider, provider_radio, byok_textbox, custom_model_box,
-         model_search_box, featured_model_radio],
         [chatbot]
     ).then(
         lambda: {"text": "", "files": []},  # Clear inputs after submission
@@ -534,6 +655,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
         [msg]
     )
     model_search_box.change(
         fn=filter_models,
         inputs=model_search_box,
@@ -541,6 +663,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
     )
     print("Model search box change event linked.")
     featured_model_radio.change(
         fn=set_custom_model_from_radio,
         inputs=featured_model_radio,
@@ -548,6 +671,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
     )
     print("Featured model radio button change event linked.")
     byok_textbox.change(
         fn=validate_provider,
         inputs=[byok_textbox, provider_radio],
@@ -555,15 +679,24 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
     )
     print("BYOK textbox change event linked.")
     provider_radio.change(
         fn=validate_provider,
         inputs=[byok_textbox, provider_radio],
         outputs=provider_radio
     )
     print("Provider radio button change event linked.")
 print("Gradio interface initialized.")
 if __name__ == "__main__":
     print("Launching the demo application.")
-    demo.launch(show_api=True, mcp_server=True) # MCP SERVER ENABLED HERE

 import os
 import json
 import base64
+import requests
 from PIL import Image
 import io
         print("No image path provided")
         return None
+    try:
         print(f"Encoding image from path: {image_path}")
         # If it's already a PIL Image
         print(f"Error encoding image: {e}")
         return None
+# Function to convert text to speech using Kokoro MCP server
+def text_to_speech(text, mcp_server_url, speed=1.0):
+    """
+    Convert text to speech using an MCP server with TTS capabilities.
+    Args:
+        text (str): The text to convert to speech
+        mcp_server_url (str): URL of the MCP server
+        speed (float): Speech speed multiplier
+    Returns:
+        str: Base64 encoded audio data
+    """
+    if not text or not mcp_server_url:
+        return None
+    try:
+        # Create JSON-RPC request for the TTS tool
+        payload = {
+            "jsonrpc": "2.0",
+            "id": 1,
+            "method": "tools/call",
+            "params": {
+                "name": "text_to_audio_b64",
+                "arguments": {
+                    "text": text,
+                    "speed": speed
+                }
+            }
+        }
+        # Send request to MCP server
+        response = requests.post(
+            mcp_server_url,
+            json=payload,
+            headers={"Content-Type": "application/json"}
+        )
+        if response.status_code == 200:
+            result = response.json()
+            if "result" in result:
+                return result["result"]
+            else:
+                print(f"Error in MCP server response: {result.get('error', 'Unknown error')}")
+        else:
+            print(f"Error calling MCP server: {response.status_code}")
+    except Exception as e:
+        print(f"Error in text_to_speech: {e}")
+    return None
 def respond(
     message,
+    image_files,
     history: list[tuple[str, str]],
     system_message,
     max_tokens,
     custom_api_key,
     custom_model,
     model_search_term,
+    selected_model,
+    mcp_server_url,
+    enable_tts
 ):
     print(f"Received message: {message}")
     print(f"Received {len(image_files) if image_files else 0} images")
     print(f"Selected model (custom_model): {custom_model}")
     print(f"Model search term: {model_search_term}")
     print(f"Selected model from radio: {selected_model}")
+    print(f"MCP Server URL: {mcp_server_url}")
+    print(f"TTS Enabled: {enable_tts}")
     # Determine which token to use
     token_to_use = custom_api_key if custom_api_key.strip() != "" else ACCESS_TOKEN
         response += f"\nError: {str(e)}"
         yield response
+    # If TTS is enabled and MCP server URL is provided, generate speech from the response
+    if enable_tts and mcp_server_url and response:
+        try:
+            print("Generating speech from response using MCP server...")
+            audio_b64 = text_to_speech(response, mcp_server_url)
+            if audio_b64:
+                # Add a hidden audio tag with the audio data
+                audio_html = f'<audio id="tts-audio" autoplay style="display:none"><source src="data:audio/wav;base64,{audio_b64}" type="audio/wav"></audio>'
+                response += f"\n\n{audio_html}"
+                yield response
+        except Exception as e:
+            print(f"Error generating speech: {e}")
     print("Completed response generation.")
 # Function to validate provider selection based on BYOK
         return gr.update(value="hf-inference")
     return gr.update(value=provider)
+# Function to validate MCP Server URL
+def validate_mcp_url(url):
+    if not url:
+        return gr.update(value="")
+    if not url.startswith(("http://", "https://")):
+        url = "https://" + url
+    # If gradio_api/mcp/sse is not in the URL, add it
+    if not url.endswith("/gradio_api/mcp/sse"):
+        if url.endswith("/"):
+            url = url + "gradio_api/mcp/sse"
+        else:
+            url = url + "/gradio_api/mcp/sse"
+    return gr.update(value=url)
 # GRADIO UI
 with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
     # Create the chatbot component
     chatbot = gr.Chatbot(
         height=600,
         show_copy_button=True,
+        placeholder="Select a model and begin chatting. Now supports multiple inference providers, multimodal inputs, and MCP servers",
         layout="panel"
     )
     print("Chatbot interface created.")
         )
         # Featured models list
+        # Updated to include multimodal models
         models_list = [
             "meta-llama/Llama-3.2-11B-Vision-Instruct",
             "meta-llama/Llama-3.3-70B-Instruct",
         )
         gr.Markdown("[View all Text-to-Text models](https://huggingface.co/models?inference_provider=all&pipeline_tag=text-generation&sort=trending) | [View all multimodal models](https://huggingface.co/models?inference_provider=all&pipeline_tag=image-text-to-text&sort=trending)")
+        # Add MCP Server configuration
+        with gr.Accordion("MCP Server Settings", open=False):
+            gr.Markdown("""
+            # Model Context Protocol (MCP) Integration
+            Connect to MCP servers to add advanced capabilities like Text-to-Speech to your chat.
+            [Learn more about MCP](https://modelcontextprotocol.io/)
+            """)
+            mcp_server_url = gr.Textbox(
+                label="MCP Server URL",
+                placeholder="https://your-tts-server.hf.space/gradio_api/mcp/sse",
+                info="URL to an MCP-compatible server (e.g., Kokoro TTS)"
+            )
+            enable_tts = gr.Checkbox(
+                label="Enable Text-to-Speech",
+                value=False,
+                info="When enabled, AI responses will be read aloud using the MCP server"
+            )
+            # Example servers
+            gr.Markdown("""
+            ### Example MCP Servers
+            Try these MCP servers for additional capabilities:
+            - **Kokoro TTS**: `https://fdaudens-kokoro-mcp.hf.space/gradio_api/mcp/sse`
+            - **More examples coming soon**
+            To use, copy the URL above and paste it into the MCP Server URL field.
+            """)
     # Chat history state
     chat_history = gr.State([])
     # Function for the chat interface
     def user(user_message, history):
+        # Debug logging for troubleshooting
         print(f"User message received: {user_message}")
+        # Skip if message is empty (no text and no files)
         if not user_message or (not user_message.get("text") and not user_message.get("files")):
             print("Empty message, skipping")
+            return history
+        # Prepare multimodal message format
         text_content = user_message.get("text", "").strip()
         files = user_message.get("files", [])
         print(f"Text content: {text_content}")
         print(f"Files: {files}")
+        # If both text and files are empty, skip
+        if not text_content and not files:
             print("No content to display")
             return history
+        # Add message with images to history
+        if files and len(files) > 0:
+            # Add text message first if it exists
+            if text_content:
+                # Add a separate text message
+                print(f"Adding text message: {text_content}")
+                history.append([text_content, None])
+            # Then add each image file separately
             for file_path in files:
+                if file_path and isinstance(file_path, str):
                     print(f"Adding image: {file_path}")
+                    # Add image as a separate message with no text
+                    history.append([f"![Image]({file_path})", None])
+            return history
+        else:
+            # For text-only messages
+            print(f"Adding text-only message: {text_content}")
+            history.append([text_content, None])
+            return history
     # Define bot response function
+    def bot(history, system_msg, max_tokens, temperature, top_p, freq_penalty, seed, provider, api_key, custom_model, search_term, selected_model, mcp_url, tts_enabled):
+        # Check if history is valid
+        if not history or len(history) == 0:
+            print("No history to process")
+            return history
+        # Get the most recent message and detect if it's an image
+        user_message = history[-1][0]
+        print(f"Processing user message: {user_message}")
+        is_image = False
+        image_path = None
+        text_content = user_message
+        # Check if this is an image message (marked with ![Image])
+        if isinstance(user_message, str) and user_message.startswith("![Image]("):
+            is_image = True
+            # Extract image path from markdown format ![Image](path)
+            image_path = user_message.replace("![Image](", "").replace(")", "")
+            print(f"Image detected: {image_path}")
+            text_content = ""  # No text for image-only messages
+        # Look back for text context if this is an image
+        text_context = ""
+        if is_image and len(history) > 1:
+            # Use the previous message as context if it's text
+            prev_message = history[-2][0]
+            if isinstance(prev_message, str) and not prev_message.startswith("![Image]("):
+                text_context = prev_message
+                print(f"Using text context from previous message: {text_context}")
+        # Process message through respond function
+        history[-1][1] = ""
+        # Validate and format MCP server URL
+        if mcp_url:
+            mcp_url = validate_mcp_url(mcp_url)["value"]
+        # Use either the image or text for the API
+        if is_image:
+            # For image messages
+            for response in respond(
+                text_context,           # Text context from previous message if any
+                [image_path],           # Current image
+                history[:-1],           # Previous history
+                system_msg,
+                max_tokens,
+                temperature,
+                top_p,
+                freq_penalty,
+                seed,
+                provider,
+                api_key,
+                custom_model,
+                search_term,
+                selected_model,
+                mcp_url,
+                tts_enabled
+            ):
+                history[-1][1] = response
+                yield history
+        else:
+            # For text-only messages
+            for response in respond(
+                text_content,           # Text message
+                None,                   # No image
+                history[:-1],           # Previous history
+                system_msg,
+                max_tokens,
+                temperature,
+                top_p,
+                freq_penalty,
+                seed,
+                provider,
+                api_key,
+                custom_model,
+                search_term,
+                selected_model,
+                mcp_url,
+                tts_enabled
+            ):
+                history[-1][1] = response
+                yield history
+    # Event handlers - only using the MultimodalTextbox's built-in submit functionality
     msg.submit(
         user,
         [msg, chatbot],
         bot,
         [chatbot, system_message_box, max_tokens_slider, temperature_slider, top_p_slider,
          frequency_penalty_slider, seed_slider, provider_radio, byok_textbox, custom_model_box,
+         model_search_box, featured_model_radio, mcp_server_url, enable_tts],
         [chatbot]
     ).then(
         lambda: {"text": "", "files": []},  # Clear inputs after submission
         [msg]
     )
+    # Connect the model filter to update the radio choices
     model_search_box.change(
         fn=filter_models,
         inputs=model_search_box,
     )
     print("Model search box change event linked.")
+    # Connect the featured model radio to update the custom model box
     featured_model_radio.change(
         fn=set_custom_model_from_radio,
         inputs=featured_model_radio,
     )
     print("Featured model radio button change event linked.")
+    # Connect the BYOK textbox to validate provider selection
     byok_textbox.change(
         fn=validate_provider,
         inputs=[byok_textbox, provider_radio],
     )
     print("BYOK textbox change event linked.")
+    # Also validate provider when the radio changes to ensure consistency
     provider_radio.change(
         fn=validate_provider,
         inputs=[byok_textbox, provider_radio],
         outputs=provider_radio
     )
     print("Provider radio button change event linked.")
+    # Connect the MCP server URL field to validate URL
+    mcp_server_url.change(
+        fn=validate_mcp_url,
+        inputs=mcp_server_url,
+        outputs=mcp_server_url
+    )
+    print("MCP server URL change event linked.")
 print("Gradio interface initialized.")
 if __name__ == "__main__":
     print("Launching the demo application.")
+    demo.launch(show_api=True, mcp_server=True)  # Enable MCP server for this app too