Spaces:
Sleeping
Sleeping
| import tiktoken | |
| import os | |
| import torch | |
| from torch.nn import functional as F | |
| from model import GPTConfig, GPT | |
| import gradio as gr | |
| device = 'cpu' | |
| if torch.cuda.is_available(): | |
| device = 'cuda' | |
| elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): | |
| device = "mps" | |
| print(f"using device: {device}") | |
| modelpath = '.' | |
| max_length = 250 | |
| enc = tiktoken.get_encoding('gpt2') | |
| ckpt_path = os.path.join(modelpath, 'model.pt') | |
| checkpoint = torch.load(ckpt_path, map_location=device) | |
| gptconf = GPTConfig(**checkpoint['model_args']) | |
| model = GPT(gptconf) | |
| state_dict = checkpoint['model'] | |
| unwanted_prefix = '_orig_mod.' | |
| for k,v in list(state_dict.items()): | |
| if k.startswith(unwanted_prefix): | |
| state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k) | |
| model.load_state_dict(state_dict) | |
| model.to(device) | |
| model = torch.compile(model) | |
| def generateText(inputText, num_tokens=500): | |
| start_tokens = enc.encode(inputText) | |
| start_tokens = torch.tensor(start_tokens) | |
| x = start_tokens.view(1, len(start_tokens)) | |
| x = x.to(device) | |
| while x.size(1) < max_length: | |
| # forward the model to get the logits | |
| with torch.no_grad(): | |
| logits = model(x)[0] # (B, T, vocab_size) | |
| # take the logits at the last position | |
| logits = logits[:, -1, :] # (B, vocab_size) | |
| # get the probabilities | |
| probs = F.softmax(logits, dim=-1) | |
| # do top-k sampling of 50 (huggingface pipeline default) | |
| # topk_probs here becomes (5, 50), topk_indices is (5, 50) | |
| topk_probs, topk_indices = torch.topk(probs, 50, dim=-1) | |
| # select a token from the top-k probabilities | |
| # note: multinomial does not demand the input to sum to 1 | |
| ix = torch.multinomial(topk_probs, 1) # (B, 1) | |
| # gather the corresponding indices | |
| xcol = torch.gather(topk_indices, -1, ix) # (B, 1) | |
| # append to the sequence | |
| x = torch.cat((x, xcol), dim=1) | |
| # print(x.size(1)) | |
| tokens = x[0, :max_length].tolist() | |
| decoded = enc.decode(tokens) | |
| return decoded | |
| title = "Training GPT-2 from scratch on TinyShakespeare dataset" | |
| demo = gr.Interface( | |
| generateText, | |
| inputs = [ | |
| gr.Textbox(label="Enter intital text"), | |
| gr.Slider(100, 2000, value = 500, step=100, label="Maximum number od characters"), | |
| ], | |
| outputs = [ | |
| gr.Text(), | |
| ], | |
| title = title | |
| ) | |
| demo.launch() |