ChatGPT-Tune / model_loader.py
prelington's picture
Update model_loader.py
e71c280 verified
# model_loader.py
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from safetensors.torch import load_file
from config import DEVICE, MODEL_LIST
def load_model(model_name):
"""
Load a model efficiently with memory optimization.
Supports:
- Hugging Face repos
- Local safetensor weights
Optimizations:
- FP16/BF16
- CPU offloading if GPU memory is low
"""
try:
if model_name.endswith(".safetensors"):
print(f"[INFO] Loading safetensor model: {model_name}")
tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = AutoModelForCausalLM.from_pretrained(
"gpt2",
state_dict=load_file(model_name),
device_map="auto", # Automatically places layers on GPU/CPU
torch_dtype=torch.float16
)
else:
print(f"[INFO] Loading Hugging Face model: {model_name}")
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="auto",
torch_dtype=torch.float16
)
except RuntimeError as e:
print(f"[WARN] GPU memory insufficient, switching to CPU offload. {e}")
# CPU offload
from accelerate import init_empty_weights, load_checkpoint_and_dispatch
from transformers import AutoConfig
config = AutoConfig.from_pretrained(model_name)
with init_empty_weights():
model = AutoModelForCausalLM.from_config(config)
model = load_checkpoint_and_dispatch(
model,
model_name,
device_map={"": "cpu"},
no_split_module_classes=["GPT2Block"]
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
model.to(DEVICE)
return tokenizer, model