Spaces:

Nymbo
/

Serverless-TextGen-Hub

Running

App Files Files Community

Nymbo commited on May 2

Commit

75bf974

verified ·

1 Parent(s): a9862a1

Update app.py

Browse files

Files changed (1) hide show

app.py +334 -174

app.py CHANGED Viewed

@@ -2,12 +2,39 @@ import gradio as gr
 from huggingface_hub import InferenceClient
 import os
 import json
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
 print("Access token loaded.")
 def respond(
     message,
     history: list[tuple[str, str]],
     system_message,
     max_tokens,
@@ -16,26 +43,26 @@ def respond(
     frequency_penalty,
     seed,
     provider,
-    custom_api_key,  # New parameter for BYOK
     custom_model,
     model_search_term,
     selected_model
 ):
     print(f"Received message: {message}")
     print(f"History: {history}")
     print(f"System message: {system_message}")
     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
     print(f"Selected provider: {provider}")
-    print(f"Custom API Key provided: {bool(custom_api_key.strip())}")  # Log whether a custom key was provided without printing the key
     print(f"Selected model (custom_model): {custom_model}")
     print(f"Model search term: {model_search_term}")
     print(f"Selected model from radio: {selected_model}")
-    # Determine which token to use - custom API key if provided, otherwise the ACCESS_TOKEN
     token_to_use = custom_api_key if custom_api_key.strip() != "" else ACCESS_TOKEN
-    # Log which token source we're using (without printing the actual token)
     if custom_api_key.strip() != "":
         print("USING CUSTOM API KEY: BYOK token provided by user is being used for authentication")
     else:
@@ -49,6 +76,33 @@ def respond(
     if seed == -1:
         seed = None
     # Prepare messages in the format expected by the API
     messages = [{"role": "system", "content": system_message}]
     print("Initial messages array constructed.")
@@ -59,14 +113,14 @@ def respond(
         assistant_part = val[1]
         if user_part:
             messages.append({"role": "user", "content": user_part})
-            print(f"Added user message to context: {user_part}")
         if assistant_part:
             messages.append({"role": "assistant", "content": assistant_part})
             print(f"Added assistant message to context: {assistant_part}")
     # Append the latest user message
-    messages.append({"role": "user", "content": message})
-    print("Latest user message appended.")
     # Determine which model to use, prioritizing custom_model if provided
     model_to_use = custom_model.strip() if custom_model.strip() != "" else selected_model
@@ -90,15 +144,13 @@ def respond(
     # Use the InferenceClient for making the request
     try:
         # Create a generator for the streaming response
-        # The provider is already set when initializing the client
         stream = client.chat_completion(
             model=model_to_use,
             messages=messages,
             stream=True,
-            **parameters  # Pass all other parameters
         )
-        # Print a starting message for token streaming
         print("Received tokens: ", end="", flush=True)
         # Process the streaming response
@@ -108,12 +160,10 @@ def respond(
                 if hasattr(chunk.choices[0], 'delta') and hasattr(chunk.choices[0].delta, 'content'):
                     token_text = chunk.choices[0].delta.content
                     if token_text:
-                        # Print tokens inline without newlines
                         print(token_text, end="", flush=True)
                         response += token_text
                         yield response
-        # Print a newline at the end of all tokens
         print()
     except Exception as e:
         print(f"Error during inference: {e}")
@@ -124,174 +174,284 @@ def respond(
 # Function to validate provider selection based on BYOK
 def validate_provider(api_key, provider):
-    # If no custom API key is provided, only "hf-inference" can be used
     if not api_key.strip() and provider != "hf-inference":
         return gr.update(value="hf-inference")
     return gr.update(value=provider)
 # GRADIO UI
-chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="Select a model and begin chatting", layout="panel")
-print("Chatbot interface created.")
-# Basic input components
-system_message_box = gr.Textbox(value="", placeholder="You are a helpful assistant.", label="System Prompt")
-max_tokens_slider = gr.Slider(
-    minimum=1,
-    maximum=4096,
-    value=512,
-    step=1,
-    label="Max tokens"
-)
-temperature_slider = gr.Slider(
-    minimum=0.1,
-    maximum=4.0,
-    value=0.7,
-    step=0.1,
-    label="Temperature"
-)
-top_p_slider = gr.Slider(
-    minimum=0.1,
-    maximum=1.0,
-    value=0.95,
-    step=0.05,
-    label="Top-P"
-)
-frequency_penalty_slider = gr.Slider(
-    minimum=-2.0,
-    maximum=2.0,
-    value=0.0,
-    step=0.1,
-    label="Frequency Penalty"
-)
-seed_slider = gr.Slider(
-    minimum=-1,
-    maximum=65535,
-    value=-1,
-    step=1,
-    label="Seed (-1 for random)"
-)
-# Provider selection
-providers_list = [
-    "hf-inference",  # Default Hugging Face Inference
-    "cerebras",      # Cerebras provider
-    "together",      # Together AI
-    "sambanova",     # SambaNova
-    "novita",        # Novita AI
-    "cohere",        # Cohere
-    "fireworks-ai",  # Fireworks AI
-    "hyperbolic",    # Hyperbolic
-    "nebius",        # Nebius
-]
-provider_radio = gr.Radio(
-    choices=providers_list,
-    value="hf-inference",
-    label="Inference Provider",
-    info="[View all models here](https://huggingface.co/models?inference_provider=all&pipeline_tag=text-generation&sort=trending)"
-)
-# New BYOK textbox - Added for the new feature
-byok_textbox = gr.Textbox(
-    value="",
-    label="BYOK (Bring Your Own Key)",
-    info="Enter a custom Hugging Face API key here. When empty, only 'hf-inference' provider can be used.",
-    placeholder="Enter your Hugging Face API token",
-    type="password"  # Hide the API key for security
-)
-# Custom model box
-custom_model_box = gr.Textbox(
-    value="",
-    label="Custom Model",
-    info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model.",
-    placeholder="meta-llama/Llama-3.3-70B-Instruct"
-)
-# Model selection components
-model_search_box = gr.Textbox(
-    label="Filter Models",
-    placeholder="Search for a featured model...",
-    lines=1
-)
-models_list = [
-    "meta-llama/Llama-3.3-70B-Instruct",
-    "meta-llama/Llama-3.1-70B-Instruct",
-    "meta-llama/Llama-3.0-70B-Instruct",
-    "meta-llama/Llama-3.2-3B-Instruct",
-    "meta-llama/Llama-3.2-1B-Instruct",
-    "meta-llama/Llama-3.1-8B-Instruct",
-    "NousResearch/Hermes-3-Llama-3.1-8B",
-    "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
-    "mistralai/Mistral-Nemo-Instruct-2407",
-    "mistralai/Mixtral-8x7B-Instruct-v0.1",
-    "mistralai/Mistral-7B-Instruct-v0.3",
-    "mistralai/Mistral-7B-Instruct-v0.2",
-    "Qwen/Qwen3-235B-A22B",
-    "Qwen/Qwen3-32B",
-    "Qwen/Qwen2.5-72B-Instruct",
-    "Qwen/Qwen2.5-3B-Instruct",
-    "Qwen/Qwen2.5-0.5B-Instruct",
-    "Qwen/QwQ-32B",
-    "Qwen/Qwen2.5-Coder-32B-Instruct",
-    "microsoft/Phi-3.5-mini-instruct",
-    "microsoft/Phi-3-mini-128k-instruct",
-    "microsoft/Phi-3-mini-4k-instruct",
-    "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
-    "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
-    "HuggingFaceH4/zephyr-7b-beta",
-    "HuggingFaceTB/SmolLM2-360M-Instruct",
-    "tiiuae/falcon-7b-instruct",
-    "01-ai/Yi-1.5-34B-Chat",
-]
-featured_model_radio = gr.Radio(
-    label="Select a model below",
-    choices=models_list,
-    value="meta-llama/Llama-3.3-70B-Instruct",
-    interactive=True
-)
-def filter_models(search_term):
-    print(f"Filtering models with search term: {search_term}")
-    filtered = [m for m in models_list if search_term.lower() in m.lower()]
-    print(f"Filtered models: {filtered}")
-    return gr.update(choices=filtered)
-def set_custom_model_from_radio(selected):
-    """
-    This function will get triggered whenever someone picks a model from the 'Featured Models' radio.
-    We will update the Custom Model text box with that selection automatically.
-    """
-    print(f"Featured model selected: {selected}")
-    return selected
-# Create the Gradio interface
-demo = gr.ChatInterface(
-    fn=respond,
-    additional_inputs=[
-        system_message_box,
-        max_tokens_slider,
-        temperature_slider,
-        top_p_slider,
-        frequency_penalty_slider,
-        seed_slider,
-        provider_radio,     # Provider selection
-        byok_textbox,       # New BYOK textbox
-        custom_model_box,   # Custom Model
-        model_search_box,   # Model search box
-        featured_model_radio # Featured model radio
-    ],
-    fill_height=True,
-    chatbot=chatbot,
-    theme="Nymbo/Nymbo_Theme",
-)
-print("ChatInterface object created.")
-with demo:
     # Connect the model filter to update the radio choices
     model_search_box.change(
         fn=filter_models,

 from huggingface_hub import InferenceClient
 import os
 import json
+import base64
+from PIL import Image
+import io
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
 print("Access token loaded.")
+# Function to encode image to base64
+def encode_image(image):
+    if image is None:
+        return None
+    # Convert to PIL Image if needed
+    if not isinstance(image, Image.Image):
+        try:
+            image = Image.open(image)
+        except Exception as e:
+            print(f"Error opening image: {e}")
+            return None
+    # Convert to RGB if image has an alpha channel (RGBA)
+    if image.mode == 'RGBA':
+        image = image.convert('RGB')
+    # Encode to base64
+    buffered = io.BytesIO()
+    image.save(buffered, format="JPEG")
+    img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
+    return img_str
 def respond(
     message,
+    images, # New parameter for uploaded images
     history: list[tuple[str, str]],
     system_message,
     max_tokens,
     frequency_penalty,
     seed,
     provider,
+    custom_api_key,
     custom_model,
     model_search_term,
     selected_model
 ):
     print(f"Received message: {message}")
+    print(f"Received {len(images) if images else 0} images")
     print(f"History: {history}")
     print(f"System message: {system_message}")
     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
     print(f"Selected provider: {provider}")
+    print(f"Custom API Key provided: {bool(custom_api_key.strip())}")
     print(f"Selected model (custom_model): {custom_model}")
     print(f"Model search term: {model_search_term}")
     print(f"Selected model from radio: {selected_model}")
+    # Determine which token to use
     token_to_use = custom_api_key if custom_api_key.strip() != "" else ACCESS_TOKEN
     if custom_api_key.strip() != "":
         print("USING CUSTOM API KEY: BYOK token provided by user is being used for authentication")
     else:
     if seed == -1:
         seed = None
+    # Create multimodal content if images are present
+    if images and any(images):
+        # Process the user message to include images
+        user_content = []
+        # Add text part if there is any
+        if message and message.strip():
+            user_content.append({
+                "type": "text",
+                "text": message
+            })
+        # Add image parts
+        for img in images:
+            if img is not None:
+                encoded_image = encode_image(img)
+                if encoded_image:
+                    user_content.append({
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/jpeg;base64,{encoded_image}"
+                        }
+                    })
+    else:
+        # Text-only message
+        user_content = message
     # Prepare messages in the format expected by the API
     messages = [{"role": "system", "content": system_message}]
     print("Initial messages array constructed.")
         assistant_part = val[1]
         if user_part:
             messages.append({"role": "user", "content": user_part})
+            print(f"Added user message to context (type: {type(user_part)})")
         if assistant_part:
             messages.append({"role": "assistant", "content": assistant_part})
             print(f"Added assistant message to context: {assistant_part}")
     # Append the latest user message
+    messages.append({"role": "user", "content": user_content})
+    print(f"Latest user message appended (content type: {type(user_content)})")
     # Determine which model to use, prioritizing custom_model if provided
     model_to_use = custom_model.strip() if custom_model.strip() != "" else selected_model
     # Use the InferenceClient for making the request
     try:
         # Create a generator for the streaming response
         stream = client.chat_completion(
             model=model_to_use,
             messages=messages,
             stream=True,
+            **parameters
         )
         print("Received tokens: ", end="", flush=True)
         # Process the streaming response
                 if hasattr(chunk.choices[0], 'delta') and hasattr(chunk.choices[0].delta, 'content'):
                     token_text = chunk.choices[0].delta.content
                     if token_text:
                         print(token_text, end="", flush=True)
                         response += token_text
                         yield response
         print()
     except Exception as e:
         print(f"Error during inference: {e}")
 # Function to validate provider selection based on BYOK
 def validate_provider(api_key, provider):
     if not api_key.strip() and provider != "hf-inference":
         return gr.update(value="hf-inference")
     return gr.update(value=provider)
 # GRADIO UI
+with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
+    # Create the chatbot component
+    chatbot = gr.Chatbot(
+        height=600,
+        show_copy_button=True,
+        placeholder="Select a model and begin chatting",
+        layout="panel"
+    )
+    print("Chatbot interface created.")
+    with gr.Row():
+        # Text input for messages
+        msg = gr.Textbox(
+            placeholder="Type a message...",
+            show_label=False,
+            container=False,
+            scale=9
+        )
+        # Image upload button
+        image_upload = gr.Image(
+            type="filepath",
+            label="Upload Image",
+            scale=1
+        )
+    # Send button for messages
+    submit_btn = gr.Button("Send", variant="primary")
+    # Create tabs for different settings
+    with gr.Accordion("Settings", open=False):
+        # Tab for general settings
+        with gr.Tab("General Settings"):
+            # System message
+            system_message_box = gr.Textbox(
+                value="You are a helpful AI assistant that can understand images and text.",
+                placeholder="You are a helpful assistant.",
+                label="System Prompt"
+            )
+            # Generation parameters
+            with gr.Row():
+                with gr.Column():
+                    max_tokens_slider = gr.Slider(
+                        minimum=1,
+                        maximum=4096,
+                        value=512,
+                        step=1,
+                        label="Max tokens"
+                    )
+                    temperature_slider = gr.Slider(
+                        minimum=0.1,
+                        maximum=4.0,
+                        value=0.7,
+                        step=0.1,
+                        label="Temperature"
+                    )
+                with gr.Column():
+                    top_p_slider = gr.Slider(
+                        minimum=0.1,
+                        maximum=1.0,
+                        value=0.95,
+                        step=0.05,
+                        label="Top-P"
+                    )
+                    frequency_penalty_slider = gr.Slider(
+                        minimum=-2.0,
+                        maximum=2.0,
+                        value=0.0,
+                        step=0.1,
+                        label="Frequency Penalty"
+                    )
+                    seed_slider = gr.Slider(
+                        minimum=-1,
+                        maximum=65535,
+                        value=-1,
+                        step=1,
+                        label="Seed (-1 for random)"
+                    )
+        # Tab for provider and model selection
+        with gr.Tab("Provider & Model"):
+            with gr.Row():
+                with gr.Column():
+                    # Provider selection
+                    providers_list = [
+                        "hf-inference",  # Default Hugging Face Inference
+                        "cerebras",      # Cerebras provider
+                        "together",      # Together AI
+                        "sambanova",     # SambaNova
+                        "novita",        # Novita AI
+                        "cohere",        # Cohere
+                        "fireworks-ai",  # Fireworks AI
+                        "hyperbolic",    # Hyperbolic
+                        "nebius",        # Nebius
+                    ]
+                    provider_radio = gr.Radio(
+                        choices=providers_list,
+                        value="hf-inference",
+                        label="Inference Provider",
+                        info="[View all models here](https://huggingface.co/models?inference_provider=all&sort=trending)"
+                    )
+                    # New BYOK textbox
+                    byok_textbox = gr.Textbox(
+                        value="",
+                        label="BYOK (Bring Your Own Key)",
+                        info="Enter a custom Hugging Face API key here. When empty, only 'hf-inference' provider can be used.",
+                        placeholder="Enter your Hugging Face API token",
+                        type="password"  # Hide the API key for security
+                    )
+                with gr.Column():
+                    # Custom model box
+                    custom_model_box = gr.Textbox(
+                        value="",
+                        label="Custom Model",
+                        info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model.",
+                        placeholder="meta-llama/Llama-3.3-70B-Instruct"
+                    )
+                    # Model search
+                    model_search_box = gr.Textbox(
+                        label="Filter Models",
+                        placeholder="Search for a featured model...",
+                        lines=1
+                    )
+            # Featured models list
+            # Updated to include multimodal models
+            models_list = [
+                # Multimodal models
+                "meta-llama/Llama-3.3-70B-Vision",
+                "Alibaba-NLP/NephilaV-16B-Chat",
+                "mistralai/Mistral-Large-Vision-2407",
+                "OpenGVLab/InternVL-Chat-V1-5",
+                "microsoft/Phi-3.5-vision-instruct",
+                "Qwen/Qwen2.5-VL-7B-Instruct",
+                "liuhaotian/llava-v1.6-mistral-7b",
+                # Standard text models
+                "meta-llama/Llama-3.3-70B-Instruct",
+                "meta-llama/Llama-3.1-70B-Instruct",
+                "meta-llama/Llama-3.0-70B-Instruct",
+                "meta-llama/Llama-3.2-3B-Instruct",
+                "meta-llama/Llama-3.2-1B-Instruct",
+                "meta-llama/Llama-3.1-8B-Instruct",
+                "NousResearch/Hermes-3-Llama-3.1-8B",
+                "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
+                "mistralai/Mistral-Nemo-Instruct-2407",
+                "mistralai/Mixtral-8x7B-Instruct-v0.1",
+                "mistralai/Mistral-7B-Instruct-v0.3",
+                "mistralai/Mistral-7B-Instruct-v0.2",
+                "Qwen/Qwen3-235B-A22B",
+                "Qwen/Qwen3-32B",
+                "Qwen/Qwen2.5-72B-Instruct",
+                "Qwen/Qwen2.5-3B-Instruct",
+                "Qwen/Qwen2.5-0.5B-Instruct",
+                "Qwen/QwQ-32B",
+                "Qwen/Qwen2.5-Coder-32B-Instruct",
+                "microsoft/Phi-3.5-mini-instruct",
+                "microsoft/Phi-3-mini-128k-instruct",
+                "microsoft/Phi-3-mini-4k-instruct",
+            ]
+            featured_model_radio = gr.Radio(
+                label="Select a model below",
+                choices=models_list,
+                value="meta-llama/Llama-3.3-70B-Vision",  # Default to a multimodal model
+                interactive=True
+            )
+            gr.Markdown("[View all multimodal models](https://huggingface.co/models?pipeline_tag=image-to-text&sort=trending)")
+    # Chat history state
+    chat_history = gr.State([])
+    # Function to filter models
+    def filter_models(search_term):
+        print(f"Filtering models with search term: {search_term}")
+        filtered = [m for m in models_list if search_term.lower() in m.lower()]
+        print(f"Filtered models: {filtered}")
+        return gr.update(choices=filtered)
+    # Function to set custom model from radio
+    def set_custom_model_from_radio(selected):
+        print(f"Featured model selected: {selected}")
+        return selected
+    # Function for the chat interface
+    def user(user_message, image, history):
+        if user_message == "" and image is None:
+            return history
+        # Format image reference for display
+        img_placeholder = ""
+        if image is not None:
+            img_placeholder = f"![Image]({image})"
+        # Combine text and image reference for display
+        display_message = f"{user_message}\n{img_placeholder}" if img_placeholder else user_message
+        # Return updated history
+        return history + [[display_message, None]]
+    # Define chat interface
+    def bot(history, images, system_msg, max_tokens, temperature, top_p, freq_penalty, seed, provider, api_key, custom_model, search_term, selected_model):
+        # Extract the last user message
+        user_message = history[-1][0] if history and len(history) > 0 else ""
+        # Clean up the user message to remove image reference
+        if "![Image]" in user_message:
+            text_parts = user_message.split("![Image]")[0].strip()
+        else:
+            text_parts = user_message
+        # Process message through respond function
+        history[-1][1] = ""
+        for response in respond(
+            text_parts,  # Send only the text part
+            [images],    # Send images separately
+            history[:-1],
+            system_msg,
+            max_tokens,
+            temperature,
+            top_p,
+            freq_penalty,
+            seed,
+            provider,
+            api_key,
+            custom_model,
+            search_term,
+            selected_model
+        ):
+            history[-1][1] = response
+            yield history
+    # Event handlers
+    msg.submit(
+        user,
+        [msg, image_upload, chatbot],
+        [chatbot],
+        queue=False
+    ).then(
+        bot,
+        [chatbot, image_upload, system_message_box, max_tokens_slider, temperature_slider, top_p_slider,
+         frequency_penalty_slider, seed_slider, provider_radio, byok_textbox, custom_model_box,
+         model_search_box, featured_model_radio],
+        [chatbot]
+    )
+    submit_btn.click(
+        user,
+        [msg, image_upload, chatbot],
+        [chatbot],
+        queue=False
+    ).then(
+        bot,
+        [chatbot, image_upload, system_message_box, max_tokens_slider, temperature_slider, top_p_slider,
+         frequency_penalty_slider, seed_slider, provider_radio, byok_textbox, custom_model_box,
+         model_search_box, featured_model_radio],
+        [chatbot]
+    ).then(
+        lambda: (None, "", None),  # Clear inputs after submission
+        None,
+        [msg, msg, image_upload]
+    )
     # Connect the model filter to update the radio choices
     model_search_box.change(
         fn=filter_models,