Spaces:

kdevoe
/

DialoGPT

Sleeping

App Files Files Community

kdevoe commited on Oct 14, 2024

Commit

d51dabc

verified ·

1 Parent(s): 0c0cab0

Fixing memory issue

Browse files

Files changed (1) hide show

app.py +65 -67

app.py CHANGED Viewed

@@ -1,89 +1,87 @@
 import gradio as gr
-from transformers import GPT2Tokenizer, GPT2LMHeadModel
 import torch
 from langchain.memory import ConversationBufferMemory
 # Move model to device (GPU if available)
-device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
 # Load all three DialoGPT models (small, medium, large)
 models = {
-    "small": GPT2LMHeadModel.from_pretrained("microsoft/DialoGPT-small").to(device),
-    "medium": GPT2LMHeadModel.from_pretrained("microsoft/DialoGPT-medium").to(device),
-    "large": GPT2LMHeadModel.from_pretrained("microsoft/DialoGPT-large").to(device)
 }
 # Load the tokenizer (same tokenizer for all models)
-tokenizer = GPT2Tokenizer.from_pretrained("microsoft/DialoGPT-medium")
-# Set up conversational memory using LangChain's ConversationBufferMemory
-memory = ConversationBufferMemory()
-# Function to truncate tokens to the last 100 tokens
-def truncate_history_to_100_tokens(history, tokenizer, max_tokens=100):
-    # Tokenize the history
-    tokenized_history = tokenizer.encode(history)
-    # Truncate to the last 100 tokens if necessary
-    if len(tokenized_history) > max_tokens:
-        tokenized_history = tokenized_history[-max_tokens:]
-    return tokenized_history
 # Define the chatbot function with memory and additional parameters
 def chat_with_dialogpt(input_text, temperature, top_p, top_k, model_size):
-    # Retrieve conversation history
-    conversation_history = memory.load_memory_variables({})['history']
-    # Combine the (possibly summarized) history with the current user input
-    full_history = conversation_history + f">> User: {input_text}"
-    # Truncate history to the most recent 100 tokens
-    truncated_input_ids = truncate_history_to_100_tokens(full_history, tokenizer)
-    # Tokenize the user input and append to truncated history
-    input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)
-    truncated_input_ids_tensor = torch.tensor([truncated_input_ids]).to(device)
-    # Concatenate truncated history with the new input
-    final_input_ids = torch.cat((truncated_input_ids_tensor, input_ids), dim=1)
     # Get the model corresponding to the selected size
     model = models[model_size]
-    # Generate the response using the model with adjusted parameters
-    outputs = model.generate(
-        final_input_ids,
-        max_length=final_input_ids.shape[1] + 50,  # Limit total length
-        max_new_tokens=15,
-        num_return_sequences=1,
         no_repeat_ngram_size=3,
         repetition_penalty=1.2,
         early_stopping=True,
-        pad_token_id=tokenizer.eos_token_id,
-        eos_token_id=tokenizer.eos_token_id,
-        temperature=temperature,  # Add temperature from slider
-        top_p=top_p,              # Add top_p from slider
-        top_k=top_k               # Add top_k from slider
     )
-    # Decode the model output
-    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Update the memory with the user input and model response
-    memory.save_context({"input": input_text}, {"output": response})
     # Format the chat history for display
-    chat_history = full_history + f"\nBot: {response}\n"
-    return chat_history
-# Function to clear the chat history
-def clear_history():
-    memory.clear()  # Clear the memory object
-    return ""  # Return empty string to reset the chat display
-# Set up the Gradio interface with the input box below the output box
 with gr.Blocks() as interface:
     chatbot_output = gr.Textbox(label="Conversation", lines=15, placeholder="Chat history will appear here...", interactive=False)
@@ -92,34 +90,34 @@ with gr.Blocks() as interface:
     # Add a dropdown for selecting the model size (small, medium, large)
     model_selector = gr.Dropdown(choices=["small", "medium", "large"], value="medium", label="Select Model Size")
     # Add a clear history button
-    clear_button = gr.Button("Clear History", scale=0)
     clear_button.click(fn=clear_history, outputs=[chatbot_output])
     # Input box for the user
     user_input = gr.Textbox(label="Your Input", placeholder="Type your message here...", lines=2, show_label=True)
     # Sliders for temperature, top_p, and top_k
-    temperature_slider = gr.Slider(0.1, 1.0, step=0.1, value=1.0, label="Temperature", scale=0)
-    top_p_slider = gr.Slider(0.0, 1.0, step=0.1, value=1.0, label="Top-p", scale=0)
-    top_k_slider = gr.Slider(1, 100, step=1, value=50, label="Top-k", scale=0)
     # Define the function to update the chat
-    def update_chat(input_text, chat_history, temperature, top_p, top_k, model_size):
         updated_history = chat_with_dialogpt(input_text, temperature, top_p, top_k, model_size)
         return updated_history, ""
     # Submit when pressing Shift + Enter
     user_input.submit(update_chat,
-                      inputs=[user_input, chatbot_output, temperature_slider, top_p_slider, top_k_slider, model_selector],
                       outputs=[chatbot_output, user_input])
     # Layout for sliders and chatbot UI
     gr.Row([temperature_slider, top_p_slider, top_k_slider])
     # Layout for model selector and clear button in a row
     gr.Row([model_selector, clear_button])
 # Launch the Gradio app
 interface.launch()

 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 from langchain.memory import ConversationBufferMemory
 # Move model to device (GPU if available)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # Load all three DialoGPT models (small, medium, large)
 models = {
+    "small": AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-small").to(device),
+    "medium": AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium").to(device),
+    "large": AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-large").to(device)
 }
 # Load the tokenizer (same tokenizer for all models)
+tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
+# Initialize conversation history
+conversation_history = []
+# Function to clear the chat history
+def clear_history():
+    global conversation_history
+    conversation_history = []
+    return ""
 # Define the chatbot function with memory and additional parameters
 def chat_with_dialogpt(input_text, temperature, top_p, top_k, model_size):
+    global conversation_history
+    # Encode the user input and append the end-of-text token
+    new_user_input_ids = tokenizer.encode(input_text + tokenizer.eos_token, return_tensors='pt').to(device)
+    # Append the user input to the conversation history
+    conversation_history.append(new_user_input_ids)
+    # Concatenate conversation history
+    bot_input_ids = torch.cat(conversation_history, dim=-1)
+    # Truncate input_ids to the last 100 tokens if necessary
+    max_length = 100
+    if bot_input_ids.size(-1) > max_length:
+        bot_input_ids = bot_input_ids[:, -max_length:]
     # Get the model corresponding to the selected size
     model = models[model_size]
+    # Generate a response
+    response_ids = model.generate(
+        bot_input_ids,
+        max_length=bot_input_ids.shape[-1] + 50,
+        pad_token_id=tokenizer.eos_token_id,
+        eos_token_id=tokenizer.eos_token_id,
+        temperature=temperature,
+        top_p=top_p,
+        top_k=top_k,
         no_repeat_ngram_size=3,
         repetition_penalty=1.2,
         early_stopping=True,
     )
+    # Extract only the new tokens generated
+    new_response_ids = response_ids[:, bot_input_ids.shape[-1]:]
+    # Decode the response
+    response = tokenizer.decode(new_response_ids[0], skip_special_tokens=True)
+    # Append the bot response to the conversation history
+    conversation_history.append(new_response_ids)
     # Format the chat history for display
+    # For display purposes, reconstruct the conversation
+    display_conversation = ""
+    for i in range(0, len(conversation_history), 2):
+        user_input = tokenizer.decode(conversation_history[i], skip_special_tokens=True)
+        display_conversation += f"You: {user_input}\n"
+        if i+1 < len(conversation_history):
+            bot_response = tokenizer.decode(conversation_history[i+1], skip_special_tokens=True)
+            display_conversation += f"Bot: {bot_response}\n"
+    return display_conversation
+# Set up the Gradio interface
 with gr.Blocks() as interface:
     chatbot_output = gr.Textbox(label="Conversation", lines=15, placeholder="Chat history will appear here...", interactive=False)
     # Add a dropdown for selecting the model size (small, medium, large)
     model_selector = gr.Dropdown(choices=["small", "medium", "large"], value="medium", label="Select Model Size")
     # Add a clear history button
+    clear_button = gr.Button("Clear History")
     clear_button.click(fn=clear_history, outputs=[chatbot_output])
     # Input box for the user
     user_input = gr.Textbox(label="Your Input", placeholder="Type your message here...", lines=2, show_label=True)
     # Sliders for temperature, top_p, and top_k
+    temperature_slider = gr.Slider(0.1, 1.0, step=0.1, value=1.0, label="Temperature")
+    top_p_slider = gr.Slider(0.0, 1.0, step=0.1, value=1.0, label="Top-p")
+    top_k_slider = gr.Slider(1, 100, step=1, value=50, label="Top-k")
     # Define the function to update the chat
+    def update_chat(input_text, temperature, top_p, top_k, model_size):
         updated_history = chat_with_dialogpt(input_text, temperature, top_p, top_k, model_size)
         return updated_history, ""
     # Submit when pressing Shift + Enter
     user_input.submit(update_chat,
+                      inputs=[user_input, temperature_slider, top_p_slider, top_k_slider, model_selector],
                       outputs=[chatbot_output, user_input])
     # Layout for sliders and chatbot UI
     gr.Row([temperature_slider, top_p_slider, top_k_slider])
     # Layout for model selector and clear button in a row
     gr.Row([model_selector, clear_button])
 # Launch the Gradio app
 interface.launch()