Spaces:

davebulaval
/

cole

Running

cole / src /language_model /huggingface_language_model_factory.py

8fa3acc 2 months ago

1.89 kB

	import torch
	from transformers import (
	AutoTokenizer,
	BitsAndBytesConfig,
	AutoModelForCausalLM,
	)
	from unsloth import FastLanguageModel


	def hugging_face_language_model_tokenizer_factory(
	model_name,
	huggingface_token: str,
	):
	if (
	"chocolatine" in model_name.lower()
	or "lucie" in model_name.lower()
	or "mixtral" in model_name.lower()
	):

	compute_dtype = getattr(torch, "bfloat16")
	bnb_configs = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=compute_dtype,
	bnb_4bit_use_double_quant=True,
	)
	if "chocolatine" in model_name.lower() or "mixtral" in model_name.lower():
	attn_implementation = "flash_attention_2"
	else:
	attn_implementation = "sdpa"
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	token=huggingface_token,
	quantization_config=bnb_configs,
	load_in_8bit=False, # Since we use 4bits
	trust_remote_code=True,
	attn_implementation=attn_implementation,
	torch_dtype=torch.float16,
	)
	if "chocolatine" in model_name.lower():
	extra_args = {"padding_side": "left"}
	else:
	extra_args = {}
	tokenizer = AutoTokenizer.from_pretrained(
	model_name, token=huggingface_token, **extra_args
	)

	if tokenizer.pad_token is None:
	tokenizer.pad_token_id = model.config.eos_token_id
	else:
	model, tokenizer = FastLanguageModel.from_pretrained(
	model_name,
	max_seq_length=4096,
	device_map="sequential",
	dtype=None,
	load_in_4bit=True,
	token=huggingface_token,
	)

	model.eval()
	return model, tokenizer