TestDistilGPT2-FT

Sleeping

App Files Files Community

kdevoe commited on Oct 12, 2024

Commit

fa7af89

verified ·

1 Parent(s): 90d8219

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -31

app.py CHANGED Viewed

@@ -6,41 +6,60 @@ from langchain.memory import ConversationBufferMemory
 # Move model to device (GPU if available)
 device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
-# Load the tokenizer (you can use the pre-trained tokenizer for GPT-2 family)
 tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
-# Manually create a configuration for the model (since we don't have config.json)
-config = GPT2Config.from_pretrained("distilgpt2")
-# Initialize the model using the manually created configuration
-model = GPT2LMHeadModel(config)
-# Load the weights from the pytorch_model.bin file
-model_path = "./pytorch_model_100.bin"  # Path to local model file
-state_dict = torch.load(model_path, map_location=device)  # Load the state_dict
-model.load_state_dict(state_dict)  # Load the state dict into the model
-# Move model to the device (GPU or CPU)
-model.to(device)
 # Set up conversational memory using LangChain's ConversationBufferMemory
 memory = ConversationBufferMemory()
-# Define the chatbot function with memory and additional parameters
-def chat_with_distilgpt2(input_text, temperature, top_p, top_k):
     # Retrieve conversation history
     conversation_history = memory.load_memory_variables({})['history']
-    # Combine the (possibly summarized) history with the current user input
     no_memory_input = f"Question: {input_text}\nAnswer:"
     # Tokenize the input and convert to tensor
     input_ids = tokenizer.encode(no_memory_input, return_tensors="pt").to(device)
-    # Generate the response using the model with adjusted parameters
-    outputs = model.generate(
         input_ids,
-        max_length=input_ids.shape[1] + 50,  # Limit total length
         max_new_tokens=15,
         num_return_sequences=1,
         no_repeat_ngram_size=3,
@@ -48,33 +67,38 @@ def chat_with_distilgpt2(input_text, temperature, top_p, top_k):
         early_stopping=True,
         pad_token_id=tokenizer.eos_token_id,
         eos_token_id=tokenizer.eos_token_id,
-        temperature=temperature,  # Add temperature from slider
-        top_p=top_p,              # Add top_p from slider
-        top_k=top_k               # Add top_k from slider
     )
-    # Decode the model output
-    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Update the memory with the user input and model response
-    memory.save_context({"input": input_text}, {"output": response})
-    return response
 # Set up the Gradio interface with additional sliders
 interface = gr.Interface(
-    fn=chat_with_distilgpt2,
     inputs=[
         gr.Textbox(label="Chat with DistilGPT-2"),  # User input text
         gr.Slider(0.1, 1.0, step=0.1, value=1.0, label="Temperature"),  # Slider for temperature
         gr.Slider(0.0, 1.0, step=0.1, value=1.0, label="Top-p"),       # Slider for top-p
         gr.Slider(1, 100, step=1, value=50, label="Top-k")             # Slider for top-k
     ],
-    outputs=gr.Textbox(label="DistilGPT-2's Response"),  # Model response
-    title="DistilGPT-2 Chatbot with Memory and Adjustable Parameters",
-    description="This is a simple chatbot powered by the DistilGPT-2 model with conversational memory, using LangChain. You can adjust temperature, top-p, and top-k using the sliders.",
 )
 # Launch the Gradio app
 interface.launch()

 # Move model to device (GPU if available)
 device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+# Load the tokenizer (same tokenizer for both models since both are GPT-2 based)
 tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
+# Load the baseline model (pre-trained DistilGPT2)
+baseline_model = GPT2LMHeadModel.from_pretrained("distilgpt2").to(device)
+# Load the fine-tuned model using its configuration and state dictionary
+# You should have a local fine-tuned model file for this (pytorch_model_100.bin)
+fine_tuned_config = GPT2Config.from_pretrained("distilgpt2")
+fine_tuned_model = GPT2LMHeadModel(fine_tuned_config)
+# Load the fine-tuned weights
+model_path = "./pytorch_model_100.bin"  # Path to your fine-tuned model file
+state_dict = torch.load(model_path, map_location=device)
+fine_tuned_model.load_state_dict(state_dict)
+fine_tuned_model.to(device)
 # Set up conversational memory using LangChain's ConversationBufferMemory
 memory = ConversationBufferMemory()
+# Define the chatbot function with both baseline and fine-tuned models
+def chat_with_both_models(input_text, temperature, top_p, top_k):
     # Retrieve conversation history
     conversation_history = memory.load_memory_variables({})['history']
+    # Combine the conversation history with the user input (or just use input directly)
     no_memory_input = f"Question: {input_text}\nAnswer:"
     # Tokenize the input and convert to tensor
     input_ids = tokenizer.encode(no_memory_input, return_tensors="pt").to(device)
+    # Generate response from baseline DistilGPT2
+    baseline_outputs = baseline_model.generate(
+        input_ids,
+        max_length=input_ids.shape[1] + 50,
+        max_new_tokens=15,
+        num_return_sequences=1,
+        no_repeat_ngram_size=3,
+        repetition_penalty=1.2,
+        early_stopping=True,
+        pad_token_id=tokenizer.eos_token_id,
+        eos_token_id=tokenizer.eos_token_id,
+        temperature=temperature,
+        top_p=top_p,
+        top_k=top_k
+    )
+    # Decode the baseline model output
+    baseline_response = tokenizer.decode(baseline_outputs[0], skip_special_tokens=True)
+    # Generate response from the fine-tuned DistilGPT2
+    fine_tuned_outputs = fine_tuned_model.generate(
         input_ids,
+        max_length=input_ids.shape[1] + 50,
         max_new_tokens=15,
         num_return_sequences=1,
         no_repeat_ngram_size=3,
         early_stopping=True,
         pad_token_id=tokenizer.eos_token_id,
         eos_token_id=tokenizer.eos_token_id,
+        temperature=temperature,
+        top_p=top_p,
+        top_k=top_k
     )
+    # Decode the fine-tuned model output
+    fine_tuned_response = tokenizer.decode(fine_tuned_outputs[0], skip_special_tokens=True)
+    # Update the memory with the user input and responses from both models
+    memory.save_context({"input": input_text}, {"baseline_output": baseline_response, "fine_tuned_output": fine_tuned_response})
+    # Return both responses
+    return baseline_response, fine_tuned_response
 # Set up the Gradio interface with additional sliders
 interface = gr.Interface(
+    fn=chat_with_both_models,
     inputs=[
         gr.Textbox(label="Chat with DistilGPT-2"),  # User input text
         gr.Slider(0.1, 1.0, step=0.1, value=1.0, label="Temperature"),  # Slider for temperature
         gr.Slider(0.0, 1.0, step=0.1, value=1.0, label="Top-p"),       # Slider for top-p
         gr.Slider(1, 100, step=1, value=50, label="Top-k")             # Slider for top-k
     ],
+    outputs=[
+        gr.Textbox(label="Baseline DistilGPT-2's Response"),  # Baseline model response
+        gr.Textbox(label="Fine-tuned DistilGPT-2's Response")  # Fine-tuned model response
+    ],
+    title="DistilGPT-2 Chatbot: Baseline vs Fine-tuned",
+    description="This app compares the responses of a baseline DistilGPT-2 and a fine-tuned version for each input prompt. You can adjust temperature, top-p, and top-k using the sliders.",
 )
 # Launch the Gradio app
 interface.launch()