travis_en_response / inference.py
vinay0123's picture
Upload 4 files
788daf6 verified
# inference.py
import torch
from model import GPTModel, ScratchTokenizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Initialize tokenizer
tokenizer = ScratchTokenizer()
# You must rebuild vocab manually or load a saved vocab
# For Hugging Face Spaces, it is recommended you hardcode or load a saved vocab here
# Example: loading vocab from a file if you saved earlier.
import json
with open("vocab.json", "r") as f:
vocab = json.load(f)
tokenizer.word2idx = vocab["word2idx"]
tokenizer.idx2word = {int(k): v for k, v in vocab["idx2word"].items()}
tokenizer.vocab_size = vocab["vocab_size"]
# Load model
model = GPTModel(vocab_size=tokenizer.vocab_size)
model.load_state_dict(torch.load("gpt_model.pth", map_location=device))
model.to(device)
model.eval()
# Generation function
def generate_response(query, max_length=200):
src = torch.tensor(tokenizer.encode(query)).unsqueeze(0).to(device)
tgt = torch.tensor([[1]]).to(device) # <SOS> token
for _ in range(max_length):
output = model(src, tgt)
next_word = output.argmax(-1)[:, -1].unsqueeze(1)
tgt = torch.cat([tgt, next_word], dim=1)
if next_word.item() == 2: # <EOS>
break
return tokenizer.decode(tgt.squeeze(0).tolist())