TestDistilGPT2-FT

Sleeping

App Files Files Community

kdevoe commited on Oct 12, 2024

Commit

a47f900

verified ·

1 Parent(s): fa7af89

Reverting back to single model hosted. Comparison with baseline taking too long.

Browse files

Files changed (1) hide show

app.py +32 -54

app.py CHANGED Viewed

@@ -6,41 +6,41 @@ from langchain.memory import ConversationBufferMemory
 # Move model to device (GPU if available)
 device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
-# Load the tokenizer (same tokenizer for both models since both are GPT-2 based)
 tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
-# Load the baseline model (pre-trained DistilGPT2)
-baseline_model = GPT2LMHeadModel.from_pretrained("distilgpt2").to(device)
-# Load the fine-tuned model using its configuration and state dictionary
-# You should have a local fine-tuned model file for this (pytorch_model_100.bin)
-fine_tuned_config = GPT2Config.from_pretrained("distilgpt2")
-fine_tuned_model = GPT2LMHeadModel(fine_tuned_config)
-# Load the fine-tuned weights
-model_path = "./pytorch_model_100.bin"  # Path to your fine-tuned model file
-state_dict = torch.load(model_path, map_location=device)
-fine_tuned_model.load_state_dict(state_dict)
-fine_tuned_model.to(device)
 # Set up conversational memory using LangChain's ConversationBufferMemory
 memory = ConversationBufferMemory()
-# Define the chatbot function with both baseline and fine-tuned models
-def chat_with_both_models(input_text, temperature, top_p, top_k):
     # Retrieve conversation history
     conversation_history = memory.load_memory_variables({})['history']
-    # Combine the conversation history with the user input (or just use input directly)
     no_memory_input = f"Question: {input_text}\nAnswer:"
     # Tokenize the input and convert to tensor
     input_ids = tokenizer.encode(no_memory_input, return_tensors="pt").to(device)
-    # Generate response from baseline DistilGPT2
-    baseline_outputs = baseline_model.generate(
         input_ids,
-        max_length=input_ids.shape[1] + 50,
         max_new_tokens=15,
         num_return_sequences=1,
         no_repeat_ngram_size=3,
@@ -48,57 +48,35 @@ def chat_with_both_models(input_text, temperature, top_p, top_k):
         early_stopping=True,
         pad_token_id=tokenizer.eos_token_id,
         eos_token_id=tokenizer.eos_token_id,
-        temperature=temperature,
-        top_p=top_p,
-        top_k=top_k
     )
-    # Decode the baseline model output
-    baseline_response = tokenizer.decode(baseline_outputs[0], skip_special_tokens=True)
-    # Generate response from the fine-tuned DistilGPT2
-    fine_tuned_outputs = fine_tuned_model.generate(
-        input_ids,
-        max_length=input_ids.shape[1] + 50,
-        max_new_tokens=15,
-        num_return_sequences=1,
-        no_repeat_ngram_size=3,
-        repetition_penalty=1.2,
-        early_stopping=True,
-        pad_token_id=tokenizer.eos_token_id,
-        eos_token_id=tokenizer.eos_token_id,
-        temperature=temperature,
-        top_p=top_p,
-        top_k=top_k
-    )
-    # Decode the fine-tuned model output
-    fine_tuned_response = tokenizer.decode(fine_tuned_outputs[0], skip_special_tokens=True)
-    # Update the memory with the user input and responses from both models
-    memory.save_context({"input": input_text}, {"baseline_output": baseline_response, "fine_tuned_output": fine_tuned_response})
-    # Return both responses
-    return baseline_response, fine_tuned_response
 # Set up the Gradio interface with additional sliders
 interface = gr.Interface(
-    fn=chat_with_both_models,
     inputs=[
         gr.Textbox(label="Chat with DistilGPT-2"),  # User input text
         gr.Slider(0.1, 1.0, step=0.1, value=1.0, label="Temperature"),  # Slider for temperature
         gr.Slider(0.0, 1.0, step=0.1, value=1.0, label="Top-p"),       # Slider for top-p
         gr.Slider(1, 100, step=1, value=50, label="Top-k")             # Slider for top-k
     ],
-    outputs=[
-        gr.Textbox(label="Baseline DistilGPT-2's Response"),  # Baseline model response
-        gr.Textbox(label="Fine-tuned DistilGPT-2's Response")  # Fine-tuned model response
-    ],
-    title="DistilGPT-2 Chatbot: Baseline vs Fine-tuned",
-    description="This app compares the responses of a baseline DistilGPT-2 and a fine-tuned version for each input prompt. You can adjust temperature, top-p, and top-k using the sliders.",
 )
 # Launch the Gradio app
 interface.launch()

 # Move model to device (GPU if available)
 device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+# Load the tokenizer (you can use the pre-trained tokenizer for GPT-2 family)
 tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
+# Manually create a configuration for the model (since we don't have config.json)
+config = GPT2Config.from_pretrained("distilgpt2")
+# Initialize the model using the manually created configuration
+model = GPT2LMHeadModel(config)
+# Load the weights from the pytorch_model.bin file
+model_path = "./pytorch_model_100.bin"  # Path to local model file
+state_dict = torch.load(model_path, map_location=device)  # Load the state_dict
+model.load_state_dict(state_dict)  # Load the state dict into the model
+# Move model to the device (GPU or CPU)
+model.to(device)
 # Set up conversational memory using LangChain's ConversationBufferMemory
 memory = ConversationBufferMemory()
+# Define the chatbot function with memory and additional parameters
+def chat_with_distilgpt2(input_text, temperature, top_p, top_k):
     # Retrieve conversation history
     conversation_history = memory.load_memory_variables({})['history']
+    # Combine the (possibly summarized) history with the current user input
     no_memory_input = f"Question: {input_text}\nAnswer:"
     # Tokenize the input and convert to tensor
     input_ids = tokenizer.encode(no_memory_input, return_tensors="pt").to(device)
+    # Generate the response using the model with adjusted parameters
+    outputs = model.generate(
         input_ids,
+        max_length=input_ids.shape[1] + 50,  # Limit total length
         max_new_tokens=15,
         num_return_sequences=1,
         no_repeat_ngram_size=3,
         early_stopping=True,
         pad_token_id=tokenizer.eos_token_id,
         eos_token_id=tokenizer.eos_token_id,
+        temperature=temperature,  # Add temperature from slider
+        top_p=top_p,              # Add top_p from slider
+        top_k=top_k               # Add top_k from slider
     )
+    # Decode the model output
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Update the memory with the user input and model response
+    memory.save_context({"input": input_text}, {"output": response})
+    return response
 # Set up the Gradio interface with additional sliders
 interface = gr.Interface(
+    fn=chat_with_distilgpt2,
     inputs=[
         gr.Textbox(label="Chat with DistilGPT-2"),  # User input text
         gr.Slider(0.1, 1.0, step=0.1, value=1.0, label="Temperature"),  # Slider for temperature
         gr.Slider(0.0, 1.0, step=0.1, value=1.0, label="Top-p"),       # Slider for top-p
         gr.Slider(1, 100, step=1, value=50, label="Top-k")             # Slider for top-k
     ],
+    outputs=gr.Textbox(label="DistilGPT-2's Response"),  # Model response
+    title="DistilGPT-2 Chatbot with Memory and Adjustable Parameters",
+    description="This is a simple chatbot powered by the DistilGPT-2 model with conversational memory, using LangChain. You can adjust temperature, top-p, and top-k using the sliders.",
 )
 # Launch the Gradio app
 interface.launch()
+How can this be modified to give the results for both a baseline DistilGPT2 and the fine tuned version for each input prompt?