Spaces:
Runtime error
Runtime error
| import math | |
| import yaml | |
| import gradio as gr | |
| import huggingface_hub | |
| import torch | |
| import torch.nn as nn | |
| import torch.nn.functional as F | |
| mlp_config_path = huggingface_hub.hf_hub_download( | |
| "jefsnacker/surname_generator", | |
| "torch_mlp_config.yaml") | |
| mlp_weights_path = huggingface_hub.hf_hub_download( | |
| "jefsnacker/surname_generator", | |
| "mlp_weights.pt") | |
| wavenet_config_path = huggingface_hub.hf_hub_download( | |
| "jefsnacker/surname_generator", | |
| "wavenet_config.yaml") | |
| wavenet_weights_path = huggingface_hub.hf_hub_download( | |
| "jefsnacker/surname_generator", | |
| "wavenet_weights.pt") | |
| gpt_micro_config_path = huggingface_hub.hf_hub_download( | |
| "jefsnacker/surname_generator", | |
| "micro_gpt_config.yaml") | |
| gpt_micro_weights_path = huggingface_hub.hf_hub_download( | |
| "jefsnacker/surname_generator", | |
| "micro_gpt_weights.pt") | |
| gpt_rev_config_path = huggingface_hub.hf_hub_download( | |
| "jefsnacker/surname_generator", | |
| "rev_gpt_config.yaml") | |
| gpt_rev_weights_path = huggingface_hub.hf_hub_download( | |
| "jefsnacker/surname_generator", | |
| "rev_gpt_weights.pt") | |
| gpt_first_rev_config_path = huggingface_hub.hf_hub_download( | |
| "jefsnacker/surname_generator", | |
| "first_name_gpt_config.yaml") | |
| gpt_first_rev_weights_path = huggingface_hub.hf_hub_download( | |
| "jefsnacker/surname_generator", | |
| "first_name_gpt_weights.pt") | |
| with open(mlp_config_path, 'r') as file: | |
| mlp_config = yaml.safe_load(file) | |
| with open(wavenet_config_path, 'r') as file: | |
| wavenet_config = yaml.safe_load(file) | |
| with open(gpt_micro_config_path, 'r') as file: | |
| gpt_micro_config = yaml.safe_load(file) | |
| with open(gpt_rev_config_path, 'r') as file: | |
| gpt_rev_config = yaml.safe_load(file) | |
| with open(gpt_first_rev_config_path, 'r') as file: | |
| gpt_first_rev_config = yaml.safe_load(file) | |
| ################################################################################## | |
| ## MLP | |
| ################################################################################## | |
| class MLP(nn.Module): | |
| def __init__(self, num_char, hidden_nodes, embeddings, window, num_layers): | |
| super(MLP, self).__init__() | |
| self.window = window | |
| self.hidden_nodes = hidden_nodes | |
| self.embeddings = embeddings | |
| self.C = nn.Parameter(torch.randn((num_char, embeddings)) * 0.1, requires_grad=True) | |
| self.first = nn.Linear(embeddings*window, hidden_nodes) | |
| self.layers = nn.Sequential() | |
| for i in range(num_layers): | |
| self.layers = self.layers.extend(nn.Sequential( | |
| nn.Linear(hidden_nodes, hidden_nodes, bias=False), | |
| nn.BatchNorm1d(hidden_nodes), | |
| nn.Tanh())) | |
| self.final = nn.Linear(hidden_nodes, num_char) | |
| def forward(self, x): | |
| x = self.C[x] | |
| x = self.first(x.view(-1, self.window*self.embeddings)) | |
| x = self.layers(x) | |
| x = self.final(x) | |
| return x | |
| def sample_char(self, x): | |
| logits = self(x) | |
| probs = F.softmax(logits, dim=1) | |
| return torch.multinomial(probs, num_samples=1).item() | |
| mlp = MLP(mlp_config['num_char'], | |
| mlp_config['hidden_nodes'], | |
| mlp_config['embeddings'], | |
| mlp_config['window'], | |
| mlp_config['num_layers']) | |
| mlp.load_state_dict(torch.load(mlp_weights_path)) | |
| mlp.eval() | |
| ################################################################################## | |
| ## WaveNet | |
| ################################################################################## | |
| class WaveNet(nn.Module): | |
| def __init__(self, num_char, hidden_nodes, embeddings, window, num_layers): | |
| super(WaveNet, self).__init__() | |
| self.window = window | |
| self.hidden_nodes = hidden_nodes | |
| self.embeddings = embeddings | |
| self.layers = nn.Sequential( | |
| nn.Embedding(num_char, embeddings) | |
| ) | |
| for i in range(num_layers): | |
| if i == 0: | |
| nodes = window | |
| else: | |
| nodes = hidden_nodes | |
| self.layers = self.layers.extend(nn.Sequential( | |
| nn.Conv1d(nodes, hidden_nodes, kernel_size=2, stride=1, bias=False), | |
| nn.BatchNorm1d(hidden_nodes), | |
| nn.Tanh())) | |
| self.layers = self.layers.extend(nn.Sequential( | |
| nn.Flatten(), | |
| nn.Linear(hidden_nodes*(embeddings-num_layers), num_char) | |
| )) | |
| def forward(self, x): | |
| return self.layers(x) | |
| def sample_char(self, x): | |
| logits = self(x) | |
| probs = F.softmax(logits, dim=1) | |
| return torch.multinomial(probs, num_samples=1).item() | |
| wavenet = WaveNet(wavenet_config['num_char'], | |
| wavenet_config['hidden_nodes'], | |
| wavenet_config['embeddings'], | |
| wavenet_config['window'], | |
| wavenet_config['num_layers']) | |
| wavenet.load_state_dict(torch.load(wavenet_weights_path)) | |
| wavenet.eval() | |
| ################################################################################## | |
| ## Transformer | |
| ################################################################################## | |
| class NewGELU(nn.Module): | |
| """ | |
| Gaussian Error Linear Units (GELU) paper: https://arxiv.org/abs/1606.08415 | |
| """ | |
| def forward(self, x): | |
| return 0.5 * x * (1.0 + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0)))) | |
| class GptAttention(nn.Module): | |
| """ | |
| For this attention module k = v = q are all the same. | |
| It's for encoder/decoder only transfomers. | |
| """ | |
| def __init__(self, config): | |
| super(GptAttention, self).__init__() | |
| self.config = config | |
| assert self.config["d_model"] % self.config["heads"] == 0 | |
| self.heads = self.config["heads"] | |
| self.w_attn = nn.Linear(self.config["d_model"], 3*self.config["d_model"]) | |
| self.head = nn.Linear(self.config["d_model"], self.config["d_model"]) | |
| self.attn_dropout = nn.Dropout(config["attn_pdrop"]) | |
| self.resid_dropout = nn.Dropout(config["resid_pdrop"]) | |
| # causal mask to ensure that attention is only applied to the left in the input sequence | |
| self.register_buffer( | |
| "bias", | |
| torch.tril( | |
| torch.ones( | |
| self.config["window"], | |
| self.config["window"]) | |
| ).view(1, 1, self.config["window"], self.config["window"]) | |
| ) | |
| def forward(self, x): | |
| B, window, embs = x.shape | |
| q, v, k = self.w_attn(x).split(self.config["d_model"], dim=2) | |
| # (B, heads, window, embs) | |
| q = q.view( | |
| B, | |
| window, | |
| self.config["heads"], | |
| embs // self.config["heads"] | |
| ).transpose(1, 2) | |
| k = k.view( | |
| B, | |
| window, | |
| self.config["heads"], | |
| embs // self.config["heads"] | |
| ).transpose(1, 2) | |
| v = v.view( | |
| B, | |
| window, | |
| self.config["heads"], | |
| embs // self.config["heads"] | |
| ).transpose(1, 2) | |
| # Self-attend: (B, heads, window, embs) x (B, heads, embs, window) -> (B, heads, window, window) | |
| scores = q @ k.transpose(-2, -1) / math.sqrt(k.size(-1)) | |
| mask = scores.masked_fill(self.bias[:,:,:window,:window] == 0, float('-inf')) | |
| probs = F.softmax(mask, dim=-1) | |
| attn = self.attn_dropout(probs) | |
| attn = probs @ v | |
| attn = attn.transpose(1, 2).contiguous().view(B, window, embs) | |
| return self.resid_dropout(self.head(attn)) | |
| class FeedForward(nn.Module): | |
| def __init__(self, config): | |
| super(FeedForward, self).__init__() | |
| self.l1 = nn.Linear(config["d_model"], 4*config["d_model"]) | |
| self.l2 = nn.Linear(4*config["d_model"], config["d_model"]) | |
| self.dropout = nn.Dropout(config["resid_pdrop"]) | |
| def forward(self, x): | |
| x = NewGELU()(self.l1(x)) | |
| return self.dropout(self.l2(x)) | |
| class Block(nn.Module): | |
| def __init__(self, config): | |
| super(Block, self).__init__() | |
| self.attn = GptAttention(config) | |
| self.norm1 = nn.LayerNorm(config["d_model"]) | |
| self.ff = FeedForward(config) | |
| self.norm2 = nn.LayerNorm(config["d_model"]) | |
| def forward(self, x): | |
| x = self.norm1(x + self.attn(x)) | |
| x = self.norm2(x + self.ff(x)) | |
| return x | |
| class GPT(nn.Module): | |
| def __init__(self, config): | |
| super(GPT, self).__init__() | |
| self.config = config | |
| self.vocab_emb = nn.Embedding(self.config["vocab"], self.config["d_model"]) | |
| self.pos_emb = nn.Embedding(self.config["window"], self.config["d_model"]) | |
| self.emb_dropout = nn.Dropout(config["embd_pdrop"]) | |
| self.blocks = nn.ModuleList([Block(self.config) for _ in range(self.config["blocks"])]) | |
| self.head_layer_norm = nn.LayerNorm(config["d_model"]) | |
| self.head = nn.Linear(self.config["d_model"], self.config["vocab"]) | |
| def forward(self, x): | |
| vocab_emb = self.vocab_emb(x) | |
| pos_emb = self.pos_emb(torch.arange(0, x.shape[1], dtype=torch.long, device=x.device)) | |
| x = self.emb_dropout(vocab_emb + pos_emb) | |
| for b in self.blocks: | |
| x = b(x) | |
| x = self.head_layer_norm(x) | |
| x = self.head(x) | |
| return x | |
| def configure_opt(self): | |
| p_decay = set() | |
| p_no_decay = set() | |
| whitelist_weight_modules = (torch.nn.Linear, ) | |
| blacklist_weight_modules = (torch.nn.LayerNorm, torch.nn.Embedding) | |
| for mn, m in self.named_modules(): | |
| for pn, p in m.named_parameters(): | |
| fpn = '%s.%s' % (mn, pn) if mn else pn # full param name | |
| # random note: because named_modules and named_parameters are recursive | |
| # we will see the same tensors p many many times. but doing it this way | |
| # allows us to know which parent module any tensor p belongs to... | |
| if pn.endswith('bias'): | |
| # all biases will not be decayed | |
| p_no_decay.add(fpn) | |
| elif pn.endswith('weight') and isinstance(m, whitelist_weight_modules): | |
| # weights of whitelist modules will be weight decayed | |
| p_decay.add(fpn) | |
| elif pn.endswith('weight') and isinstance(m, blacklist_weight_modules): | |
| # weights of blacklist modules will NOT be weight decayed | |
| p_no_decay.add(fpn) | |
| # validate that we considered every parameter | |
| param_dict = {pn: p for pn, p in self.named_parameters()} | |
| inter_params = p_decay & p_no_decay | |
| union_params = p_decay | p_no_decay | |
| assert len(inter_params) == 0, "parameters %s made it into both decay/no_decay sets!" % (str(inter_params), ) | |
| assert len(param_dict.keys() - union_params) == 0, "parameters %s were not separated into either decay/no_decay set!" \ | |
| % (str(param_dict.keys() - union_params), ) | |
| # create the pytorch optimizer object | |
| optim_groups = [ | |
| {"params": [param_dict[pn] for pn in sorted(list(p_decay))], "weight_decay": self.config["weight_decay"]}, | |
| {"params": [param_dict[pn] for pn in sorted(list(p_no_decay))], "weight_decay": 0.0}, | |
| ] | |
| optimizer = torch.optim.AdamW( | |
| optim_groups, | |
| lr=self.config["lr"], | |
| betas=(self.config["b1"], self.config["b2"]) | |
| ) | |
| return optimizer | |
| def sample_char(self, x): | |
| logits = self(x) | |
| probs = F.softmax(logits[:,-1,:], dim=1) | |
| return torch.multinomial(probs, num_samples=1).item() | |
| gpt_micro = GPT(gpt_micro_config) | |
| gpt_micro.load_state_dict(torch.load(gpt_micro_weights_path)) | |
| gpt_micro.eval() | |
| gpt_rev = GPT(gpt_rev_config) | |
| gpt_rev.load_state_dict(torch.load(gpt_rev_weights_path)) | |
| gpt_rev.eval() | |
| gpt_first_rev = GPT(gpt_first_rev_config) | |
| gpt_first_rev.load_state_dict(torch.load(gpt_first_rev_weights_path)) | |
| gpt_first_rev.eval() | |
| ################################################################################## | |
| ## Gradio App | |
| ################################################################################## | |
| def generate_names(name_start, name_end, number_of_names, model): | |
| if number_of_names < 0: | |
| return "Error: Please enter a positive number of names to generate!" | |
| # Select model | |
| if model == "MLP": | |
| config = mlp_config | |
| sample_fcn = mlp.sample_char | |
| elif model == "WaveNet": | |
| config = wavenet_config | |
| sample_fcn = wavenet.sample_char | |
| elif model == "GPT Micro": | |
| config = gpt_micro_config | |
| sample_fcn = gpt_micro.sample_char | |
| elif model == "GPT Rev": | |
| config = gpt_rev_config | |
| sample_fcn = gpt_rev.sample_char | |
| elif model == "GPT First Rev": | |
| config = gpt_first_rev_config | |
| sample_fcn = gpt_first_rev.sample_char | |
| else: | |
| return "Error: Model not selected" | |
| stoi = config['stoi'] | |
| itos = {s:i for i,s in stoi.items()} | |
| output = "" | |
| # Sanitize user inputs, and append errors to output | |
| name_end = name_end.lower() | |
| name_start = name_start.lower() | |
| for c in name_end: | |
| if c not in stoi: | |
| return "Please change name end. \"" + c + "\" not included in the training set." | |
| for c in name_start: | |
| if c not in stoi: | |
| return "Please change name start. \"" + c + "\" not included in the training set." | |
| if "num_final_chars_in_dataset" in config and len(name_end) > config["num_final_chars_in_dataset"]: | |
| name_end = name_end[-config["num_final_chars_in_dataset"]:] | |
| output += "Only accepts up to " + str(config["num_final_chars_in_dataset"]) + " final chars. Using: " + str(name_end) + "\n" | |
| elif "num_final_chars_in_dataset" not in config and name_end != "": | |
| output += "Final chars not used. Need to use a \"Rev\" model trained with this feature.\n" | |
| ## Print requested names | |
| for _ in range((int)(number_of_names)): | |
| name = "" | |
| context = [0] * config['window'] | |
| if "num_final_chars_in_dataset" in config: | |
| for c in name_end: | |
| context = context[1:] + [stoi[c]] | |
| context = context[1:] + [stoi['.']] | |
| # Initialize name with user input | |
| for c in name_start: | |
| name += c | |
| context = context[1:] + [stoi[c]] | |
| # Run inference to finish off the name | |
| while True: | |
| x = torch.tensor(context).view(1, -1) | |
| ix = sample_fcn(x) | |
| context = context[1:] + [ix] | |
| name += itos[ix] | |
| if ix == 0: | |
| break | |
| output += name + "\n" | |
| return output | |
| demo = gr.Interface( | |
| fn=generate_names, | |
| inputs=[ | |
| gr.Textbox(placeholder="Start name with..."), | |
| gr.Textbox(placeholder="End name with... (only works for rev model)"), | |
| gr.Number(value=5), | |
| gr.Dropdown(["MLP", "WaveNet", "GPT Micro", "GPT Rev", "GPT First Rev"], value="GPT Rev"), | |
| ], | |
| outputs="text", | |
| ) | |
| demo.launch() |