Spaces:

Nymbo
/

Serverless-TextGen-Hub

Running

App Files Files Community

Nymbo commited on May 18

Commit

b47b1e3

verified ·

1 Parent(s): 8f939dc

Update app.py

Browse files

Files changed (1) hide show

app.py +220 -92

app.py CHANGED Viewed

@@ -5,6 +5,10 @@ import json
 import base64
 from PIL import Image
 import io
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
 print("Access token loaded.")
@@ -39,69 +43,107 @@ def encode_image(image_path):
         print(f"Error encoding image: {e}")
         return None
-def text_generation(
-    message: str,
-    system_message: str = "You are a helpful AI assistant.",
-    max_tokens: int = 512,
-    temperature: float = 0.7,
-    top_p: float = 0.95,
-    frequency_penalty: float = 0.0,
-    provider: str = "hf-inference",
-    model: str = "meta-llama/Llama-3.2-11B-Vision-Instruct"
-) -> str:
-    """
-    Generate text based on the input message using the specified model and provider.
     Args:
-        message (str): The input text prompt.
-        system_message (str): The system prompt to guide the AI's behavior.
-        max_tokens (int): Maximum number of tokens to generate.
-        temperature (float): Sampling temperature for randomness.
-        top_p (float): Top-p sampling parameter.
-        frequency_penalty (float): Penalty for frequent tokens.
-        provider (str): Inference provider (e.g., 'hf-inference').
-        model (str): Model identifier (e.g., 'meta-llama/Llama-3.2-11B-Vision-Instruct').
     Returns:
-        str: The generated text response.
     """
-    print(f"Text generation called with message: {message}")
-    # Initialize the Inference Client
-    client = InferenceClient(token=ACCESS_TOKEN, provider=provider)
-    print(f"Inference Client initialized with {provider} provider.")
-    # Prepare messages
-    messages = [
-        {"role": "system", "content": system_message},
-        {"role": "user", "content": message}
-    ]
-    # Prepare parameters
-    parameters = {
-        "max_tokens": max_tokens,
-        "temperature": temperature,
-        "top_p": top_p,
-        "frequency_penalty": frequency_penalty,
-    }
     try:
-        # Perform chat completion (non-streaming for MCP simplicity)
-        response = client.chat_completion(
-            model=model,
-            messages=messages,
-            stream=False,
-            **parameters
-        )
-        if hasattr(response, 'choices') and len(response.choices) > 0:
-            generated_text = response.choices[0].message.content
-            print(f"Generated text: {generated_text}")
-            return generated_text
-        else:
-            raise ValueError("No valid response received from the model.")
     except Exception as e:
-        print(f"Error during text generation: {e}")
-        return f"Error: {str(e)}"
 def respond(
     message,
@@ -117,7 +159,10 @@ def respond(
     custom_api_key,
     custom_model,
     model_search_term,
-    selected_model
 ):
     print(f"Received message: {message}")
     print(f"Received {len(image_files) if image_files else 0} images")
@@ -130,6 +175,8 @@ def respond(
     print(f"Selected model (custom_model): {custom_model}")
     print(f"Model search term: {model_search_term}")
     print(f"Selected model from radio: {selected_model}")
     # Determine which token to use
     token_to_use = custom_api_key if custom_api_key.strip() != "" else ACCESS_TOKEN
@@ -274,6 +321,19 @@ def respond(
         yield response
     print("Completed response generation.")
 # Function to validate provider selection based on BYOK
 def validate_provider(api_key, provider):
@@ -281,6 +341,22 @@ def validate_provider(api_key, provider):
         return gr.update(value="hf-inference")
     return gr.update(value=provider)
 # GRADIO UI
 with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
     # Create the chatbot component
@@ -303,8 +379,6 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
         sources=["upload"]
     )
-    # Note: We're removing the separate submit button since MultimodalTextbox has its own
     # Create accordion for settings
     with gr.Accordion("Settings", open=False):
         # System message
@@ -402,6 +476,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
         )
         # Featured models list
         models_list = [
             "meta-llama/Llama-3.2-11B-Vision-Instruct",
             "meta-llama/Llama-3.3-70B-Instruct",
@@ -431,37 +506,67 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
         featured_model_radio = gr.Radio(
             label="Select a model below",
             choices=models_list,
-            value="meta-llama/Llama-3.2-11B-Vision-Instruct",
             interactive=True
         )
         gr.Markdown("[View all Text-to-Text models](https://huggingface.co/models?inference_provider=all&pipeline_tag=text-generation&sort=trending) | [View all multimodal models](https://huggingface.co/models?inference_provider=all&pipeline_tag=image-text-to-text&sort=trending)")
-    # Add MCP Support Section
-    with gr.Accordion("MCP Support (for LLMs)", open=False):
         gr.Markdown("""
-        ### MCP Support
-        This app supports the Model Context Protocol (MCP), allowing Large Language Models like Claude Desktop to use it as a text generation tool.
-        To use this app with an MCP client, add the following configuration:
-        ```json
-        {
-          "mcpServers": {
-            "textGen": {
-              "url": "https://YOUR_USERNAME-serverless-textgen-hub.hf.space/gradio_api/mcp/sse"
-            }
-          }
-        }
-        ```
-        Replace `YOUR_USERNAME` with your actual Hugging Face username.
         """)
     # Chat history state
     chat_history = gr.State([])
     # Function to filter models
     def filter_models(search_term):
         print(f"Filtering models with search term: {search_term}")
@@ -476,6 +581,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
     # Function for the chat interface
     def user(user_message, history):
         print(f"User message received: {user_message}")
         # Skip if message is empty (no text and no files)
@@ -499,6 +605,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
         if files and len(files) > 0:
             # Add text message first if it exists
             if text_content:
                 print(f"Adding text message: {text_content}")
                 history.append([text_content, None])
@@ -506,6 +613,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
             for file_path in files:
                 if file_path and isinstance(file_path, str):
                     print(f"Adding image: {file_path}")
                     history.append([f"![Image]({file_path})", None])
             return history
@@ -516,11 +624,13 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
             return history
     # Define bot response function
-    def bot(history, system_msg, max_tokens, temperature, top_p, freq_penalty, seed, provider, api_key, custom_model, search_term, selected_model):
         if not history or len(history) == 0:
             print("No history to process")
             return history
         user_message = history[-1][0]
         print(f"Processing user message: {user_message}")
@@ -528,26 +638,33 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
         image_path = None
         text_content = user_message
         if isinstance(user_message, str) and user_message.startswith("![Image]("):
             is_image = True
             image_path = user_message.replace("![Image](", "").replace(")", "")
             print(f"Image detected: {image_path}")
-            text_content = ""
         text_context = ""
         if is_image and len(history) > 1:
             prev_message = history[-2][0]
             if isinstance(prev_message, str) and not prev_message.startswith("![Image]("):
                 text_context = prev_message
                 print(f"Using text context from previous message: {text_context}")
         history[-1][1] = ""
         if is_image:
             for response in respond(
-                text_context,
-                [image_path],
-                history[:-1],
                 system_msg,
                 max_tokens,
                 temperature,
@@ -558,15 +675,19 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
                 api_key,
                 custom_model,
                 search_term,
-                selected_model
             ):
                 history[-1][1] = response
                 yield history
         else:
             for response in respond(
-                text_content,
-                None,
-                history[:-1],
                 system_msg,
                 max_tokens,
                 temperature,
@@ -577,12 +698,15 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
                 api_key,
                 custom_model,
                 search_term,
-                selected_model
             ):
                 history[-1][1] = response
                 yield history
-    # Event handlers
     msg.submit(
         user,
         [msg, chatbot],
@@ -592,14 +716,15 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
         bot,
         [chatbot, system_message_box, max_tokens_slider, temperature_slider, top_p_slider,
          frequency_penalty_slider, seed_slider, provider_radio, byok_textbox, custom_model_box,
-         model_search_box, featured_model_radio],
         [chatbot]
     ).then(
-        lambda: {"text": "", "files": []},
         None,
         [msg]
     )
     model_search_box.change(
         fn=filter_models,
         inputs=model_search_box,
@@ -607,13 +732,15 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
     )
     print("Model search box change event linked.")
     featured_model_radio.change(
         fn=set_custom_model_from_radio,
         inputs=featured_model_radio,
         outputs=custom_model_box
     )
     print("Featured model radio button change event linked.")
     byok_textbox.change(
         fn=validate_provider,
         inputs=[byok_textbox, provider_radio],
@@ -621,6 +748,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
     )
     print("BYOK textbox change event linked.")
     provider_radio.change(
         fn=validate_provider,
         inputs=[byok_textbox, provider_radio],
@@ -632,4 +760,4 @@ print("Gradio interface initialized.")
 if __name__ == "__main__":
     print("Launching the demo application.")
-    demo.launch(show_api=True, mcp_server=True)

 import base64
 from PIL import Image
 import io
+import requests
+from mcp.client.sse import SSEServerParameters
+from mcp.jsonrpc.client import JsonRpcClient
+from mcp.client.base import ServerCapabilities
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
 print("Access token loaded.")
         print(f"Error encoding image: {e}")
         return None
+# MCP Client class for handling MCP server connections
+class MCPClient:
+    def __init__(self, url):
+        self.url = url
+        self.client = None
+        self.capabilities = None
+        self.tools = None
+    def connect(self):
+        try:
+            # Connect to the MCP server using SSE
+            server_params = SSEServerParameters(url=self.url)
+            self.client = JsonRpcClient(server_params)
+            self.client.connect()
+            # Get server capabilities
+            self.capabilities = ServerCapabilities(self.client)
+            # List available tools
+            self.tools = self.capabilities.list_tools()
+            print(f"Connected to MCP Server. Available tools: {[tool.name for tool in self.tools]}")
+            return True
+        except Exception as e:
+            print(f"Error connecting to MCP server: {e}")
+            return False
+    def call_tool(self, tool_name, **kwargs):
+        if not self.client or not self.tools:
+            print("MCP client not initialized or no tools available")
+            return None
+        # Find the tool with the given name
+        tool = next((t for t in self.tools if t.name == tool_name), None)
+        if not tool:
+            print(f"Tool '{tool_name}' not found")
+            return None
+        try:
+            # Call the tool with the given arguments
+            result = self.client.call_method("tools/call", {"name": tool_name, "arguments": kwargs})
+            return result
+        except Exception as e:
+            print(f"Error calling tool '{tool_name}': {e}")
+            return None
+    def close(self):
+        if self.client:
+            try:
+                self.client.close()
+                print("MCP client connection closed")
+            except Exception as e:
+                print(f"Error closing MCP client connection: {e}")
+# Function to convert text to audio using Kokoro MCP server
+def text_to_audio(text, speed=1.0, mcp_url=None):
+    """Convert text to audio using Kokoro MCP server if available.
     Args:
+        text (str): Text to convert to speech
+        speed (float): Speed multiplier for speech
+        mcp_url (str): URL of the Kokoro MCP server
     Returns:
+        tuple: (sample_rate, audio_array) or None if conversion fails
     """
+    if not text or not mcp_url:
+        return None
     try:
+        # Connect to MCP server
+        mcp_client = MCPClient(mcp_url)
+        if not mcp_client.connect():
+            return None
+        # Call the text_to_audio tool
+        result = mcp_client.call_tool("text_to_audio", text=text, speed=speed)
+        mcp_client.close()
+        if not result:
+            return None
+        # Process the result - convert base64 audio to numpy array
+        import numpy as np
+        import base64
+        # Assuming the result contains base64-encoded WAV data
+        audio_b64 = result
+        audio_data = base64.b64decode(audio_b64)
+        # Convert to numpy array - this is simplified and may need adjustment
+        # based on the actual output format from the Kokoro MCP server
+        import io
+        import soundfile as sf
+        audio_io = io.BytesIO(audio_data)
+        audio_array, sample_rate = sf.read(audio_io)
+        return (sample_rate, audio_array)
     except Exception as e:
+        print(f"Error converting text to audio: {e}")
+        return None
 def respond(
     message,
     custom_api_key,
     custom_model,
     model_search_term,
+    selected_model,
+    mcp_server_url=None,
+    tts_enabled=False,
+    tts_speed=1.0
 ):
     print(f"Received message: {message}")
     print(f"Received {len(image_files) if image_files else 0} images")
     print(f"Selected model (custom_model): {custom_model}")
     print(f"Model search term: {model_search_term}")
     print(f"Selected model from radio: {selected_model}")
+    print(f"MCP Server URL: {mcp_server_url}")
+    print(f"TTS Enabled: {tts_enabled}")
     # Determine which token to use
     token_to_use = custom_api_key if custom_api_key.strip() != "" else ACCESS_TOKEN
         yield response
     print("Completed response generation.")
+    # If TTS is enabled and we have a valid MCP server URL, convert response to audio
+    if tts_enabled and mcp_server_url and response:
+        try:
+            print(f"Converting response to audio using MCP server: {mcp_server_url}")
+            audio_data = text_to_audio(response, tts_speed, mcp_server_url)
+            if audio_data:
+                # Here we would need to handle returning both text and audio
+                # This would require modifying the Gradio interface to support this
+                print("Successfully converted text to audio")
+                # For now, we'll just return the text response
+        except Exception as e:
+            print(f"Error converting text to audio: {e}")
 # Function to validate provider selection based on BYOK
 def validate_provider(api_key, provider):
         return gr.update(value="hf-inference")
     return gr.update(value=provider)
+# Function to test MCP server connection
+def test_mcp_connection(mcp_url):
+    if not mcp_url or not mcp_url.strip():
+        return "Please enter an MCP server URL"
+    try:
+        mcp_client = MCPClient(mcp_url)
+        if mcp_client.connect():
+            tools = [tool.name for tool in mcp_client.tools]
+            mcp_client.close()
+            return f"Successfully connected to MCP server. Available tools: {', '.join(tools)}"
+        else:
+            return "Failed to connect to MCP server"
+    except Exception as e:
+        return f"Error connecting to MCP server: {str(e)}"
 # GRADIO UI
 with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
     # Create the chatbot component
         sources=["upload"]
     )
     # Create accordion for settings
     with gr.Accordion("Settings", open=False):
         # System message
         )
         # Featured models list
+        # Updated to include multimodal models
         models_list = [
             "meta-llama/Llama-3.2-11B-Vision-Instruct",
             "meta-llama/Llama-3.3-70B-Instruct",
         featured_model_radio = gr.Radio(
             label="Select a model below",
             choices=models_list,
+            value="meta-llama/Llama-3.2-11B-Vision-Instruct",  # Default to a multimodal model
             interactive=True
         )
         gr.Markdown("[View all Text-to-Text models](https://huggingface.co/models?inference_provider=all&pipeline_tag=text-generation&sort=trending) | [View all multimodal models](https://huggingface.co/models?inference_provider=all&pipeline_tag=image-text-to-text&sort=trending)")
+    # New Accordion for MCP Settings
+    with gr.Accordion("MCP Server Settings", open=False):
+        mcp_server_url = gr.Textbox(
+            value="",
+            label="MCP Server URL",
+            info="Enter the URL of an MCP server to connect to (e.g., https://example-kokoro-mcp.hf.space/gradio_api/mcp/sse)",
+            placeholder="https://fdaudens-kokoro-mcp.hf.space/gradio_api/mcp/sse"
+        )
+        test_connection_btn = gr.Button("Test Connection")
+        connection_status = gr.Textbox(
+            label="Connection Status",
+            interactive=False
+        )
+        tts_enabled = gr.Checkbox(
+            label="Enable Text-to-Speech",
+            value=False,
+            info="Convert AI responses to speech using the Kokoro TTS service"
+        )
+        tts_speed = gr.Slider(
+            minimum=0.5,
+            maximum=2.0,
+            value=1.0,
+            step=0.1,
+            label="Speech Speed"
+        )
         gr.Markdown("""
+        ### About MCP Support
+        This app can connect to Model Context Protocol (MCP) servers to extend its capabilities.
+        For example, connecting to a Kokoro MCP server allows for text-to-speech conversion.
+        To use this feature:
+        1. Enter the MCP server URL
+        2. Test the connection
+        3. Enable the desired features (e.g., TTS)
+        4. Chat normally with the AI
+        Note: TTS functionality requires an active connection to a Kokoro MCP server.
         """)
     # Chat history state
     chat_history = gr.State([])
+    # Connect the test connection button
+    test_connection_btn.click(
+        fn=test_mcp_connection,
+        inputs=[mcp_server_url],
+        outputs=[connection_status]
+    )
     # Function to filter models
     def filter_models(search_term):
         print(f"Filtering models with search term: {search_term}")
     # Function for the chat interface
     def user(user_message, history):
+        # Debug logging for troubleshooting
         print(f"User message received: {user_message}")
         # Skip if message is empty (no text and no files)
         if files and len(files) > 0:
             # Add text message first if it exists
             if text_content:
+                # Add a separate text message
                 print(f"Adding text message: {text_content}")
                 history.append([text_content, None])
             for file_path in files:
                 if file_path and isinstance(file_path, str):
                     print(f"Adding image: {file_path}")
+                    # Add image as a separate message with no text
                     history.append([f"![Image]({file_path})", None])
             return history
             return history
     # Define bot response function
+    def bot(history, system_msg, max_tokens, temperature, top_p, freq_penalty, seed, provider, api_key, custom_model, search_term, selected_model, mcp_url, tts_on, tts_spd):
+        # Check if history is valid
         if not history or len(history) == 0:
             print("No history to process")
             return history
+        # Get the most recent message and detect if it's an image
         user_message = history[-1][0]
         print(f"Processing user message: {user_message}")
         image_path = None
         text_content = user_message
+        # Check if this is an image message (marked with ![Image])
         if isinstance(user_message, str) and user_message.startswith("![Image]("):
             is_image = True
+            # Extract image path from markdown format ![Image](path)
             image_path = user_message.replace("![Image](", "").replace(")", "")
             print(f"Image detected: {image_path}")
+            text_content = ""  # No text for image-only messages
+        # Look back for text context if this is an image
         text_context = ""
         if is_image and len(history) > 1:
+            # Use the previous message as context if it's text
             prev_message = history[-2][0]
             if isinstance(prev_message, str) and not prev_message.startswith("![Image]("):
                 text_context = prev_message
                 print(f"Using text context from previous message: {text_context}")
+        # Process message through respond function
         history[-1][1] = ""
+        # Use either the image or text for the API
         if is_image:
+            # For image messages
             for response in respond(
+                text_context,           # Text context from previous message if any
+                [image_path],           # Current image
+                history[:-1],           # Previous history
                 system_msg,
                 max_tokens,
                 temperature,
                 api_key,
                 custom_model,
                 search_term,
+                selected_model,
+                mcp_url,
+                tts_on,
+                tts_spd
             ):
                 history[-1][1] = response
                 yield history
         else:
+            # For text-only messages
             for response in respond(
+                text_content,           # Text message
+                None,                   # No image
+                history[:-1],           # Previous history
                 system_msg,
                 max_tokens,
                 temperature,
                 api_key,
                 custom_model,
                 search_term,
+                selected_model,
+                mcp_url,
+                tts_on,
+                tts_spd
             ):
                 history[-1][1] = response
                 yield history
+    # Event handlers - only using the MultimodalTextbox's built-in submit functionality
     msg.submit(
         user,
         [msg, chatbot],
         bot,
         [chatbot, system_message_box, max_tokens_slider, temperature_slider, top_p_slider,
          frequency_penalty_slider, seed_slider, provider_radio, byok_textbox, custom_model_box,
+         model_search_box, featured_model_radio, mcp_server_url, tts_enabled, tts_speed],
         [chatbot]
     ).then(
+        lambda: {"text": "", "files": []},  # Clear inputs after submission
         None,
         [msg]
     )
+    # Connect the model filter to update the radio choices
     model_search_box.change(
         fn=filter_models,
         inputs=model_search_box,
     )
     print("Model search box change event linked.")
+    # Connect the featured model radio to update the custom model box
     featured_model_radio.change(
         fn=set_custom_model_from_radio,
         inputs=featured_model_radio,
         outputs=custom_model_box
     )
     print("Featured model radio button change event linked.")
+    # Connect the BYOK textbox to validate provider selection
     byok_textbox.change(
         fn=validate_provider,
         inputs=[byok_textbox, provider_radio],
     )
     print("BYOK textbox change event linked.")
+    # Also validate provider when the radio changes to ensure consistency
     provider_radio.change(
         fn=validate_provider,
         inputs=[byok_textbox, provider_radio],
 if __name__ == "__main__":
     print("Launching the demo application.")
+    demo.launch(show_api=True)