Spaces:

kdevoe
/

DialoGPT

Sleeping

App Files Files Community

kdevoe commited on Oct 16, 2024

Commit

ee87074

verified ·

1 Parent(s): d51dabc

Updating to new Gradio chat interface

Browse files

Files changed (1) hide show

app.py +51 -110

app.py CHANGED Viewed

@@ -1,123 +1,64 @@
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForCausalLM
-import torch
-from langchain.memory import ConversationBufferMemory
-# Move model to device (GPU if available)
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# Load all three DialoGPT models (small, medium, large)
-models = {
-    "small": AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-small").to(device),
-    "medium": AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium").to(device),
-    "large": AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-large").to(device)
 }
-# Load the tokenizer (same tokenizer for all models)
-tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
-# Initialize conversation history
-conversation_history = []
-# Function to clear the chat history
-def clear_history():
-    global conversation_history
-    conversation_history = []
-    return ""
-# Define the chatbot function with memory and additional parameters
-def chat_with_dialogpt(input_text, temperature, top_p, top_k, model_size):
-    global conversation_history
-    # Encode the user input and append the end-of-text token
-    new_user_input_ids = tokenizer.encode(input_text + tokenizer.eos_token, return_tensors='pt').to(device)
-    # Append the user input to the conversation history
-    conversation_history.append(new_user_input_ids)
-    # Concatenate conversation history
-    bot_input_ids = torch.cat(conversation_history, dim=-1)
-    # Truncate input_ids to the last 100 tokens if necessary
-    max_length = 100
-    if bot_input_ids.size(-1) > max_length:
-        bot_input_ids = bot_input_ids[:, -max_length:]
-    # Get the model corresponding to the selected size
-    model = models[model_size]
-    # Generate a response
-    response_ids = model.generate(
-        bot_input_ids,
-        max_length=bot_input_ids.shape[-1] + 50,
-        pad_token_id=tokenizer.eos_token_id,
-        eos_token_id=tokenizer.eos_token_id,
         temperature=temperature,
         top_p=top_p,
-        top_k=top_k,
-        no_repeat_ngram_size=3,
-        repetition_penalty=1.2,
-        early_stopping=True,
     )
-    # Extract only the new tokens generated
-    new_response_ids = response_ids[:, bot_input_ids.shape[-1]:]
-    # Decode the response
-    response = tokenizer.decode(new_response_ids[0], skip_special_tokens=True)
-    # Append the bot response to the conversation history
-    conversation_history.append(new_response_ids)
-    # Format the chat history for display
-    # For display purposes, reconstruct the conversation
-    display_conversation = ""
-    for i in range(0, len(conversation_history), 2):
-        user_input = tokenizer.decode(conversation_history[i], skip_special_tokens=True)
-        display_conversation += f"You: {user_input}\n"
-        if i+1 < len(conversation_history):
-            bot_response = tokenizer.decode(conversation_history[i+1], skip_special_tokens=True)
-            display_conversation += f"Bot: {bot_response}\n"
-    return display_conversation
-# Set up the Gradio interface
-with gr.Blocks() as interface:
-    chatbot_output = gr.Textbox(label="Conversation", lines=15, placeholder="Chat history will appear here...", interactive=False)
-    # Add the instruction message above the input box
-    gr.Markdown("**Instructions:** Press `Shift + Enter` to submit, and `Enter` for a new line.")
-    # Add a dropdown for selecting the model size (small, medium, large)
-    model_selector = gr.Dropdown(choices=["small", "medium", "large"], value="medium", label="Select Model Size")
-    # Add a clear history button
-    clear_button = gr.Button("Clear History")
-    clear_button.click(fn=clear_history, outputs=[chatbot_output])
-    # Input box for the user
-    user_input = gr.Textbox(label="Your Input", placeholder="Type your message here...", lines=2, show_label=True)
-    # Sliders for temperature, top_p, and top_k
-    temperature_slider = gr.Slider(0.1, 1.0, step=0.1, value=1.0, label="Temperature")
-    top_p_slider = gr.Slider(0.0, 1.0, step=0.1, value=1.0, label="Top-p")
-    top_k_slider = gr.Slider(1, 100, step=1, value=50, label="Top-k")
-    # Define the function to update the chat
-    def update_chat(input_text, temperature, top_p, top_k, model_size):
-        updated_history = chat_with_dialogpt(input_text, temperature, top_p, top_k, model_size)
-        return updated_history, ""
-    # Submit when pressing Shift + Enter
-    user_input.submit(update_chat,
-                      inputs=[user_input, temperature_slider, top_p_slider, top_k_slider, model_selector],
-                      outputs=[chatbot_output, user_input])
-    # Layout for sliders and chatbot UI
-    gr.Row([temperature_slider, top_p_slider, top_k_slider])
-    # Layout for model selector and clear button in a row
-    gr.Row([model_selector, clear_button])
-# Launch the Gradio app
-interface.launch()

 import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer
+# Load the shared tokenizer (using a tokenizer from DialoGPT models)
+tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
+# Define the model names
+model_names = {
+    "DialoGPT-small": "microsoft/DialoGPT-small",
+    "DialoGPT-medium": "microsoft/DialoGPT-medium"
 }
+# Pre-load the models
+loaded_models = {
+    model_name: AutoModelForCausalLM.from_pretrained(model_path)
+    for model_name, model_path in model_names.items()
+}
+def respond(
+    message,
+    history: list[tuple[str, str]],
+    model_choice,
+    max_tokens,
+    temperature,
+    top_p,
+):
+    # Select the pre-loaded model based on user's choice
+    model = loaded_models[model_choice]
+    # Prepare the input by concatenating the history into a dialogue format
+    input_text = ""
+    for user_msg, bot_msg in history:
+        input_text += f"User: {user_msg}\nAssistant: {bot_msg}\n"
+    input_text += f"User: {message}\nAssistant:"
+    # Tokenize the input text using the shared tokenizer
+    inputs = tokenizer(input_text, return_tensors="pt", truncation=True)
+    # Generate the response using the selected DialoGPT model
+    output_tokens = model.generate(
+        inputs["input_ids"],
+        max_length=len(inputs["input_ids"][0]) + max_tokens,
         temperature=temperature,
         top_p=top_p,
+        do_sample=True,
     )
+    # Decode and return the assistant's response
+    response = tokenizer.decode(output_tokens[0][inputs['input_ids'].shape[-1]:], skip_special_tokens=True)
+    yield response
+# Define the Gradio interface
+demo = gr.ChatInterface(
+    respond,
+    additional_inputs=[
+        gr.Dropdown(choices=["DialoGPT-small", "DialoGPT-medium"], value="DialoGPT-small", label="Model"),
+        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
+        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
+    ],
+)
+if __name__ == "__main__":
+    demo.launch()