Spaces:

alon-albalak
/

collaborative-decoding

Sleeping

collaborative-decoding / src /utils /model_loader.py

Alon Albalak

major update: all data saved on HF (prompts, results), unified utilities

57be184 3 months ago

1.29 kB

	"""Unified model loading and device management"""

	import os
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM

	os.environ["TOKENIZERS_PARALLELISM"] = "false"

	class ModelLoader:
	"""Handles device detection and model/tokenizer loading"""

	@staticmethod
	def get_device_and_dtype():
	"""Determine the best available device and dtype"""
	if torch.cuda.is_available():
	return "cuda", torch.float16
	elif torch.backends.mps.is_available():
	return "mps", torch.float16
	else:
	return "cpu", torch.float32

	@staticmethod
	def load_model_and_tokenizer(model_name="meta-llama/Llama-3.2-1B-Instruct"):
	"""Load model and tokenizer with optimal device/dtype settings"""
	device, dtype = ModelLoader.get_device_and_dtype()

	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	dtype=dtype,
	low_cpu_mem_usage=True
	)
	model = model.to(device)

	# Set pad token if needed
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token

	return model, tokenizer, device, dtype