prelington
/

ChatGPT-Tune

Text Classification

Model card Files Files and versions

ChatGPT-Tune / model_loader.py

prelington's picture

Update model_loader.py

e71c280 verified 3 months ago

history blame contribute delete

1.91 kB

	# model_loader.py
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import torch
	from safetensors.torch import load_file
	from config import DEVICE, MODEL_LIST

	def load_model(model_name):
	"""
	Load a model efficiently with memory optimization.
	Supports:
	- Hugging Face repos
	- Local safetensor weights
	Optimizations:
	- FP16/BF16
	- CPU offloading if GPU memory is low
	"""
	try:
	if model_name.endswith(".safetensors"):
	print(f"[INFO] Loading safetensor model: {model_name}")
	tokenizer = AutoTokenizer.from_pretrained("gpt2")
	model = AutoModelForCausalLM.from_pretrained(
	"gpt2",
	state_dict=load_file(model_name),
	device_map="auto", # Automatically places layers on GPU/CPU
	torch_dtype=torch.float16
	)
	else:
	print(f"[INFO] Loading Hugging Face model: {model_name}")
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	device_map="auto",
	torch_dtype=torch.float16
	)
	except RuntimeError as e:
	print(f"[WARN] GPU memory insufficient, switching to CPU offload. {e}")
	# CPU offload
	from accelerate import init_empty_weights, load_checkpoint_and_dispatch
	from transformers import AutoConfig

	config = AutoConfig.from_pretrained(model_name)
	with init_empty_weights():
	model = AutoModelForCausalLM.from_config(config)
	model = load_checkpoint_and_dispatch(
	model,
	model_name,
	device_map={"": "cpu"},
	no_split_module_classes=["GPT2Block"]
	)
	tokenizer = AutoTokenizer.from_pretrained(model_name)

	model.to(DEVICE)
	return tokenizer, model