TestDistilGPT2-FT

Sleeping

kdevoe commited on Oct 5, 2024

Commit

a1f6cc4

verified ·

1 Parent(s): 684c258

Loading model weights from saved file manually to prevent issue when using load_pretrained

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import gradio as gr
-from transformers import GPT2Tokenizer, GPT2LMHeadModel, AutoModelForSeq2SeqLM, AutoTokenizer
 import torch
 from langchain.memory import ConversationBufferMemory
 # Move model to device (GPU if available)
@@ -9,17 +10,16 @@ device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cp
 # Load the tokenizer (use pre-trained tokenizer for GPT-2 family)
 tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
-# Load the fine-tuned model from the local safetensors file
-model_path = "./model.safetensors"  # Path to your local model file
-model = GPT2LMHeadModel.from_pretrained(
-    pretrained_model_name_or_path=None,  # None because it's not from a model name
-    config="distilgpt2",                 # Specify the config for distilgpt2
-    local_files_only=True,               # Only look for local files
-    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
-)
-# Load the safetensors weights
-model.load_state_dict(torch.load(model_path, map_location=device))
 # Move model to the device (GPU or CPU)
 model.to(device)
@@ -73,3 +73,4 @@ interface.launch()

 import gradio as gr
+from transformers import GPT2Tokenizer, GPT2LMHeadModel, AutoModelForSeq2SeqLM, AutoTokenizer, GPT2Config
 import torch
+from safetensors.torch import load_file as safetensors_load_file  # Import safetensors loading function
 from langchain.memory import ConversationBufferMemory
 # Move model to device (GPU if available)
 # Load the tokenizer (use pre-trained tokenizer for GPT-2 family)
 tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
+# Load the configuration for the model (DistilGPT2 is a smaller GPT-2)
+config = GPT2Config.from_pretrained("distilgpt2")
+# Initialize the model using the configuration
+model = GPT2LMHeadModel(config)
+# Load the weights from the safetensors file
+model_path = "./model.safetensors"  # Path to your local model file
+state_dict = safetensors_load_file(model_path)  # Use safetensors loader
+model.load_state_dict(state_dict)  # Load the state dict into the model
 # Move model to the device (GPU or CPU)
 model.to(device)