Spaces:

sunil448832
/

retrieval-augment-generation

Runtime error

App Files Files Community

retrieval-augment-generation / models /llms.py

sunil448832

Initial Commit

eccde2c over 2 years ago

raw

history blame contribute delete

2.24 kB

	from transformers import AutoModelForCausalLM, AutoTokenizer
	import torch

	# Define a Language Model class
	class LLM:
	def __init__(self, model_name):
	# Determine the device to use (GPU if available, otherwise CPU)
	device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

	# Load the pre-trained language model with specific settings
	self.model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype=torch.float16, # Set the data type to float16
	load_in_8bit=True, # Load in 8-bit format if available
	device_map='auto' # Automatically select the device
	).bfloat16() # Convert the model to bfloat16 for lower precision

	# Initialize the tokenizer for the same model
	self.tokenizer = AutoTokenizer.from_pretrained(model_name)

	# Set custom padding token and padding side
	self.tokenizer.pad_token = "[PAD]"
	self.tokenizer.padding_side = "left"

	def generate_response(self, messages, max_tokens=100, do_sample=True):
	# Tokenize the input messages and move them to the selected device (GPU or CPU)
	input_ids = self.tokenizer(
	messages,
	max_length=512,
	padding=True,
	truncation=True,
	return_tensors='pt'
	).input_ids.cuda()

	with torch.no_grad():
	# Generate a response using the loaded model
	generated_ids = self.model.generate(
	input_ids,
	pad_token_id=self.tokenizer.pad_token_id,
	max_new_tokens=max_tokens,
	do_sample=do_sample,
	temperature=0.3 # Adjust the sampling temperature
	)
	# Decode the generated tokens into a human-readable response
	response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=False)[0]

	return response

	# Main program
	if __name__ == '__main__':
	# Specify the model name to use
	model_name = "mistralai/Mistral-7B-Instruct-v0.1"

	# Create an instance of the Language Model class with the specified model
	llm = LLM(model_name)