TestDistilGPT2-FT

Sleeping

App Files Files Community

kdevoe commited on Oct 5, 2024

Commit

684c258

verified ·

1 Parent(s): f0b621d

Using locally saved fine tuned model

Browse files

Files changed (1) hide show

app.py +15 -21

app.py CHANGED Viewed

@@ -6,23 +6,23 @@ from langchain.memory import ConversationBufferMemory
 # Move model to device (GPU if available)
 device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
-# Load the tokenizer and model for DistilGPT-2
 tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
-model = GPT2LMHeadModel.from_pretrained("distilgpt2")
-model.to(device)
-# # Load summarization model (e.g., T5-small)
-# summarizer_tokenizer = AutoTokenizer.from_pretrained("t5-small")
-# summarizer_model = AutoModelForSeq2SeqLM.from_pretrained("t5-small").to(device)
-# def summarize_history(history):
-#     input_ids = summarizer_tokenizer.encode(
-#         "summarize: " + history,
-#         return_tensors="pt"
-#     ).to(device)
-#     summary_ids = summarizer_model.generate(input_ids, max_length=50, min_length=25, length_penalty=5., num_beams=2)
-#     summary = summarizer_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
-#     return summary
 # Set up conversational memory using LangChain's ConversationBufferMemory
 memory = ConversationBufferMemory()
@@ -32,10 +32,6 @@ def chat_with_distilgpt2(input_text):
     # Retrieve conversation history
     conversation_history = memory.load_memory_variables({})['history']
-    # # Summarize if history exceeds certain length
-    # if len(conversation_history.split()) > 200:
-    #     conversation_history = summarize_history(conversation_history)
     # Combine the (possibly summarized) history with the current user input
     full_input = f"{conversation_history}\nUser: {input_text}\nAssistant:"
@@ -50,9 +46,6 @@ def chat_with_distilgpt2(input_text):
         num_return_sequences=1,
         no_repeat_ngram_size=3,
         repetition_penalty=1.2,
-        # temperature=0.9,
-        # top_k=20,
-        # top_p=0.8,
         early_stopping=True,
         pad_token_id=tokenizer.eos_token_id,
         eos_token_id=tokenizer.eos_token_id
@@ -79,3 +72,4 @@ interface = gr.Interface(
 interface.launch()

 # Move model to device (GPU if available)
 device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+# Load the tokenizer (use pre-trained tokenizer for GPT-2 family)
 tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
+# Load the fine-tuned model from the local safetensors file
+model_path = "./model.safetensors"  # Path to your local model file
+model = GPT2LMHeadModel.from_pretrained(
+    pretrained_model_name_or_path=None,  # None because it's not from a model name
+    config="distilgpt2",                 # Specify the config for distilgpt2
+    local_files_only=True,               # Only look for local files
+    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
+)
+# Load the safetensors weights
+model.load_state_dict(torch.load(model_path, map_location=device))
+# Move model to the device (GPU or CPU)
+model.to(device)
 # Set up conversational memory using LangChain's ConversationBufferMemory
 memory = ConversationBufferMemory()
     # Retrieve conversation history
     conversation_history = memory.load_memory_variables({})['history']
     # Combine the (possibly summarized) history with the current user input
     full_input = f"{conversation_history}\nUser: {input_text}\nAssistant:"
         num_return_sequences=1,
         no_repeat_ngram_size=3,
         repetition_penalty=1.2,
         early_stopping=True,
         pad_token_id=tokenizer.eos_token_id,
         eos_token_id=tokenizer.eos_token_id
 interface.launch()