vinay0123
/

travis_en_response

Model card Files Files and versions

travis_en_response / inference.py

vinay0123's picture

Upload 4 files

788daf6 verified 8 months ago

history blame contribute delete

1.32 kB

	# inference.py

	import torch
	from model import GPTModel, ScratchTokenizer

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	# Initialize tokenizer
	tokenizer = ScratchTokenizer()

	# You must rebuild vocab manually or load a saved vocab
	# For Hugging Face Spaces, it is recommended you hardcode or load a saved vocab here
	# Example: loading vocab from a file if you saved earlier.
	import json
	with open("vocab.json", "r") as f:
	vocab = json.load(f)

	tokenizer.word2idx = vocab["word2idx"]
	tokenizer.idx2word = {int(k): v for k, v in vocab["idx2word"].items()}
	tokenizer.vocab_size = vocab["vocab_size"]

	# Load model
	model = GPTModel(vocab_size=tokenizer.vocab_size)
	model.load_state_dict(torch.load("gpt_model.pth", map_location=device))
	model.to(device)
	model.eval()

	# Generation function
	def generate_response(query, max_length=200):
	src = torch.tensor(tokenizer.encode(query)).unsqueeze(0).to(device)
	tgt = torch.tensor([[1]]).to(device) # <SOS> token

	for _ in range(max_length):
	output = model(src, tgt)
	next_word = output.argmax(-1)[:, -1].unsqueeze(1)
	tgt = torch.cat([tgt, next_word], dim=1)
	if next_word.item() == 2: # <EOS>
	break

	return tokenizer.decode(tgt.squeeze(0).tolist())