Spaces:

Nymbo
/

Serverless-TextGen-Hub

Running

App Files Files Community

Nymbo commited on Jan 4

Commit

542c2ac

verified ·

1 Parent(s): 4c18bfc

Update app.py

Browse files

Files changed (1) hide show

app.py +227 -55

app.py CHANGED Viewed

@@ -2,8 +2,19 @@ import gradio as gr
 from openai import OpenAI
 import os
-# Retrieve the access token from the environment variable
-ACCESS_TOKEN = os.getenv("HF_TOKEN")
 print("Access token loaded.")
 # Initialize the OpenAI client with the Hugging Face Inference API endpoint
@@ -21,104 +32,265 @@ def respond(
     temperature,
     top_p,
     frequency_penalty,
-    seed
 ):
     """
-    This function handles the chatbot response. It takes in:
-    - message: the user's new message
-    - history: the list of previous messages, each as a tuple (user_msg, assistant_msg)
-    - system_message: the system prompt
-    - max_tokens: the maximum number of tokens to generate in the response
-    - temperature: sampling temperature
-    - top_p: top-p (nucleus) sampling
-    - frequency_penalty: penalize repeated tokens in the output
-    - seed: a fixed seed for reproducibility; -1 will mean 'random'
     """
     print(f"Received message: {message}")
     print(f"History: {history}")
     print(f"System message: {system_message}")
     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
     # Convert seed to None if -1 (meaning random)
     if seed == -1:
         seed = None
-    # Construct the messages array required by the API
-    messages = [{"role": "system", "content": system_message}]
-    # Add conversation history to the context
     for val in history:
         user_part = val[0]
         assistant_part = val[1]
         if user_part:
             messages.append({"role": "user", "content": user_part})
-            print(f"Added user message to context: {user_part}")
         if assistant_part:
             messages.append({"role": "assistant", "content": assistant_part})
-            print(f"Added assistant message to context: {assistant_part}")
     # Append the latest user message
     messages.append({"role": "user", "content": message})
     # Start with an empty string to build the response as tokens stream in
     response = ""
-    print("Sending request to OpenAI API.")
     # Make the streaming request to the HF Inference API via openai-like client
     for message_chunk in client.chat.completions.create(
-        model="meta-llama/Llama-3.3-70B-Instruct",   # You can update this to your specific model
         max_tokens=max_tokens,
-        stream=True,  # Stream the response
         temperature=temperature,
         top_p=top_p,
-        frequency_penalty=frequency_penalty,  # <-- NEW
-        seed=seed,                             # <-- NEW
         messages=messages,
     ):
-        # Extract the token text from the response chunk
         token_text = message_chunk.choices[0].delta.content
-        print(f"Received token: {token_text}")
         response += token_text
         yield response
     print("Completed response generation.")
 # Create a Chatbot component with a specified height
 chatbot = gr.Chatbot(height=600)
-print("Chatbot interface created.")
-# Create the Gradio ChatInterface
-# We add two new sliders for Frequency Penalty and Seed
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="", label="System message"),
-        gr.Slider(minimum=1,   maximum=4096, value=512, step=1,   label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0,  value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(minimum=0.1, maximum=1.0,  value=0.95, step=0.05, label="Top-P"),
-        gr.Slider(
-            minimum=-2.0,
-            maximum=2.0,
-            value=0.0,
-            step=0.1,
-            label="Frequency Penalty"
-        ),
-        gr.Slider(
-            minimum=-1,
-            maximum=65535,  # Arbitrary upper limit for demonstration
-            value=-1,
-            step=1,
-            label="Seed (-1 for random)"
-        ),
-    ],
-    fill_height=True,
-    chatbot=chatbot,
-    theme="Nymbo/Nymbo_Theme",
-)
 print("Gradio interface initialized.")
 if __name__ == "__main__":
-    print("Launching the demo application.")
     demo.launch()

 from openai import OpenAI
 import os
+# --------------------------------------------------------------------------------
+#  Serverless-TextGen-Hub
+#  This application is a Gradio-based UI for text generation using
+#  Hugging Face's serverless Inference API. We also incorporate features
+#  inspired by the ImgGen-Hub, such as:
+#    - A "Featured Models" accordion with text filtering.
+#    - A "Custom Model" textbox for specifying a non-featured model.
+#    - An "Information" tab with accordions for "Featured Models" and
+#      "Parameters Overview" containing helpful user guides.
+# --------------------------------------------------------------------------------
+# Retrieve the access token from environment variables
+ACCESS_TOKEN = os.getenv("HF_TOKEN")  # HF_TOKEN is your Hugging Face Inference API key
 print("Access token loaded.")
 # Initialize the OpenAI client with the Hugging Face Inference API endpoint
     temperature,
     top_p,
     frequency_penalty,
+    seed,
+    # NEW inputs for model selection
+    model_search,
+    selected_model,
+    custom_model
 ):
     """
+    This function handles the chatbot response.
+    Parameters:
+    - message: The user's newest message (string).
+    - history: The list of previous messages in the conversation, each as a tuple (user_msg, assistant_msg).
+    - system_message: The system prompt provided.
+    - max_tokens: The maximum number of tokens to generate in the response.
+    - temperature: Sampling temperature (float).
+    - top_p: Top-p (nucleus) sampling (float).
+    - frequency_penalty: Penalize repeated tokens in the output (float).
+    - seed: A fixed seed for reproducibility; -1 means 'random'.
+    - model_search: The text used to filter the "Featured Models" Radio button list (unused here directly, but updated by the UI).
+    - selected_model: The model selected via the "Featured Models" Radio button.
+    - custom_model: If not empty, overrides selected_model with this custom path.
     """
+    # DEBUG LOGGING
     print(f"Received message: {message}")
     print(f"History: {history}")
     print(f"System message: {system_message}")
     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
+    print(f"Model search text: {model_search}")
+    print(f"Selected featured model: {selected_model}")
+    print(f"Custom model (overrides if not empty): {custom_model}")
     # Convert seed to None if -1 (meaning random)
     if seed == -1:
         seed = None
+    # Determine the final model name to use
+    # If the custom_model textbox is non-empty, we use that.
+    # Otherwise, we use the selected model from the Radio buttons.
+    if custom_model.strip():
+        model_to_use = custom_model.strip()
+    else:
+        model_to_use = selected_model
+    # Construct the messages array required by the OpenAI-like HF API
+    messages = [{"role": "system", "content": system_message}]  # System prompt
+    # Add conversation history to context
     for val in history:
         user_part = val[0]
         assistant_part = val[1]
         if user_part:
             messages.append({"role": "user", "content": user_part})
         if assistant_part:
             messages.append({"role": "assistant", "content": assistant_part})
     # Append the latest user message
     messages.append({"role": "user", "content": message})
     # Start with an empty string to build the response as tokens stream in
     response = ""
+    print(f"Using model: {model_to_use}")
+    print("Sending request to OpenAI API...")
     # Make the streaming request to the HF Inference API via openai-like client
+    # Below, we pass 'model_to_use' instead of a hard-coded model
     for message_chunk in client.chat.completions.create(
+        model=model_to_use,             # <-- model is now dynamically selected
         max_tokens=max_tokens,
+        stream=True,                    # Stream the response
         temperature=temperature,
         top_p=top_p,
+        frequency_penalty=frequency_penalty,
+        seed=seed,
         messages=messages,
     ):
+        # Extract token text from the response chunk
         token_text = message_chunk.choices[0].delta.content
         response += token_text
+        # As we get new tokens, we stream them back to the user
         yield response
     print("Completed response generation.")
 # Create a Chatbot component with a specified height
 chatbot = gr.Chatbot(height=600)
+# ------------------------------------------------------------
+# Below: We define the UI with additional features integrated.
+# We'll replicate some of the style from the ImgGen-Hub code:
+#  - A "Featured Models" accordion with the ability to filter
+#  - A "Custom Model" text box
+#  - An "Information" tab with "Featured Models" table and
+#    "Parameters Overview" containing markdown descriptions.
+# ------------------------------------------------------------
+# List of placeholder "Featured Models" for demonstration
+featured_models_list = [
+    "meta-llama/Llama-3.3-70B-Instruct",
+    "meta-llama/Llama-2-70B-chat-hf",
+    "meta-llama/Llama-2-13B-chat-hf",
+    "bigscience/bloom",
+    "google/flan-t5-xxl",
+]
+# This function filters the models in featured_models_list based on user input
+def filter_models(search_term):
+    """
+    Filters featured_models_list based on the text in 'search_term'.
+    """
+    filtered = [m for m in featured_models_list if search_term.lower() in m.lower()]
+    return gr.update(choices=filtered)
+print("Initializing Gradio interface...")  # Debug log
+# We build a custom Blocks layout to incorporate tabs and advanced UI elements
+with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
+    # Top-level heading for clarity
+    gr.Markdown("# Serverless-TextGen-Hub\nA Comprehensive UI for Text Generation")
+    with gr.Tab("Chat"):
+        # We'll place the ChatInterface within this tab
+        # Create the additional UI elements in a collapsible or visible layout
+        with gr.Accordion("Featured Models", open=False):
+            with gr.Row():
+                model_search = gr.Textbox(
+                    label="Filter Models",
+                    placeholder="Search for a featured model...",
+                    lines=1,
+                )
+            with gr.Row():
+                model_radio = gr.Radio(
+                    label="Select a featured model below",
+                    choices=featured_models_list,
+                    value="meta-llama/Llama-3.3-70B-Instruct",
+                    interactive=True,
+                )
+            # On change of model_search, we update the radio choices
+            model_search.change(
+                filter_models,
+                inputs=model_search,
+                outputs=model_radio
+            )
+        # Textbox for specifying a custom model that overrides the featured selection if not empty
+        custom_model = gr.Textbox(
+            label="Custom Model Path (overrides Featured Models if not empty)",
+            placeholder="e.g. meta-llama/Llama-2-13B-chat-hf",
+            lines=1
+        )
+        # Build the chat interface itself
+        # We'll pass "model_search", "model_radio", and "custom_model" as additional inputs
+        # so that the 'respond' function can see them and decide which model to use
+        chatbot_interface = gr.ChatInterface(
+            fn=respond,  # The function that generates the text
+            additional_inputs=[
+                gr.Textbox(
+                    value="You are a helpful AI assistant.",
+                    label="System message",
+                    lines=2
+                ),  # system_message
+                gr.Slider(minimum=1,   maximum=4096, value=512,  step=1,   label="Max new tokens"),  # max_tokens
+                gr.Slider(minimum=0.1, maximum=4.0,   value=0.7,  step=0.1, label="Temperature"),      # temperature
+                gr.Slider(minimum=0.1, maximum=1.0,   value=0.95, step=0.05,label="Top-P"),           # top_p
+                gr.Slider(
+                    minimum=-2.0,
+                    maximum=2.0,
+                    value=0.0,
+                    step=0.1,
+                    label="Frequency Penalty"
+                ),  # frequency_penalty
+                gr.Slider(
+                    minimum=-1,
+                    maximum=65535,
+                    value=-1,
+                    step=1,
+                    label="Seed (-1 for random)"
+                ),  # seed
+                model_search,  # Exposed but won't be typed into during conversation,
+                model_radio,
+                custom_model
+            ],
+            chatbot=chatbot,
+            title="Serverless-TextGen-Hub",
+            # The fill_height ensures the chat area expands
+            fill_height=True
+        )
+    # A new tab for "Information" about Featured Models and Parameters
+    with gr.Tab("Information"):
+        gr.Markdown("## Learn More About the Parameters and Models")
+        # Accordion for "Featured Models"
+        with gr.Accordion("Featured Models (WiP)", open=False):
+            gr.HTML(
+                """
+                <p>Below is a small table of example models. In practice, you can pick from
+                thousands of available text generation models on Hugging Face.
+                <br>
+                Use the <b>Filter Models</b> box under the <b>Featured Models</b> accordion
+                in the Chat tab to search by name, or enter a <b>Custom Model</b> path.</p>
+                <table style="width:100%; text-align:center; margin:auto;">
+                    <tr>
+                        <th>Model Name</th>
+                        <th>Is It Large?</th>
+                        <th>Notes</th>
+                    </tr>
+                    <tr>
+                        <td>meta-llama/Llama-3.3-70B-Instruct</td>
+                        <td>Yes</td>
+                        <td>Placeholder example</td>
+                    </tr>
+                    <tr>
+                        <td>meta-llama/Llama-2-13B-chat-hf</td>
+                        <td>Medium</td>
+                        <td>Placeholder example</td>
+                    </tr>
+                    <tr>
+                        <td>google/flan-t5-xxl</td>
+                        <td>Yes</td>
+                        <td>Placeholder example</td>
+                    </tr>
+                </table>
+                """
+            )
+        # Accordion for "Parameters Overview"
+        with gr.Accordion("Parameters Overview", open=False):
+            gr.Markdown(
+                """
+                ### Max New Tokens
+                Controls how many tokens can be generated in the response. A token is roughly a word or a piece of a word. If you need longer answers, increase this.
+                ### Temperature
+                A higher temperature makes the AI more 'creative' and random in its responses. Lower temperature keeps it more focused and deterministic.
+                ### Top-P
+                This is 'nucleus sampling.' It dictates the proportion of probability mass the model considers. At 1.0, it considers all words. Lower it to focus on the most likely words.
+                ### Frequency Penalty
+                Penalizes repeated tokens in the output. If you see a lot of repetition, increase this slightly to reduce the repetition.
+                ### Seed
+                If set to -1, the randomness is different each time. Setting a specific number ensures the same result each run, making responses reproducible.
+                ### Custom Model
+                If this field is filled, it overrides the selection from Featured Models. This way, you can try out any model on the HF Hub, e.g.
+                <code>meta-llama/Llama-2-70B-chat-hf</code> or <code>bigscience/bloom</code>.
+                """
+            )
 print("Gradio interface initialized.")
+# ------------------------------------------------------------
+# Finally, we launch the app if the script is run directly.
+# ------------------------------------------------------------
 if __name__ == "__main__":
+    print("Launching the demo application...")
     demo.launch()