import gradio as gr import torch import torch.nn.functional as F import tiktoken import torch import torch.nn as nn from torch.utils.data import DataLoader, Dataset from transformers import AutoTokenizer import os # Load the model from Hugging Face Hub device = 'cuda' if torch.cuda.is_available() else 'cpu' tokenizer = AutoTokenizer.from_pretrained("gpt2") tokenizer.pad_token = tokenizer.eos_token # Define the SmolLM2-135M model (a simplified version of a Transformer) class SmolLM(nn.Module): def __init__(self, vocab_size, embed_dim, num_heads, num_layers, max_seq_len): super(SmolLM, self).__init__() self.embedding = nn.Embedding(vocab_size, embed_dim) self.pos_embedding = nn.Parameter(torch.zeros(1, max_seq_len, embed_dim)) self.layers = nn.ModuleList([ nn.TransformerEncoderLayer(d_model=embed_dim, nhead=num_heads, batch_first=True) for _ in range(num_layers) ]) self.fc_out = nn.Linear(embed_dim, vocab_size) def forward(self, x): seq_len = x.size(1) x = self.embedding(x) + self.pos_embedding[:, :seq_len, :] for layer in self.layers: x = layer(x) return self.fc_out(x) def load_model(): checkpoint_path = 'final_checkpoint.pth' embed_dim = 512 num_heads = 8 num_layers = 4 max_seq_len = 128 vocab_size = 50257 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #model = SmolLM(vocab_size, embed_dim, num_heads, num_layers, max_seq_len).to(device) #model.load_state_dict(torch.load(checkpoint_path, map_location=torch.device('cpu'))) #checkpoint = torch.load(checkpoint_path, map_location=device) #config = checkpoint['config'] #model = SmolLM(vocab_size, embed_dim, num_heads, num_layers, max_seq_len) #model.load_state_dict(checkpoint) model = torch.load(checkpoint_path, map_location=device, weights_only=False) model = model.to(device) model.eval() # Set to evaluation mode # Disable gradient computation for param in model.parameters(): param.requires_grad = False return model model = load_model() # Force model to stay in eval mode model.train(False) def generate_text(prompt, max_length=100, num_samples=1, temperature=0.8): input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device) outputs = model(input_ids) predictions = torch.argmax(outputs, dim=-1) decoded = tokenizer.decode(predictions[0], skip_special_tokens=True) return decoded # Create Gradio interface iface = gr.Interface( fn=generate_text, inputs=[ gr.Textbox(label="Prompt", value="Good night, good night! Parting is such sweet sorrow"), gr.Slider(minimum=10, maximum=200, value=100, step=1, label="Max Length"), gr.Slider(minimum=1, maximum=5, value=1, step=1, label="Number of Samples"), ], outputs=gr.Textbox(label="Generated Text"), title="Shakesphere Text Generator", description="Enter text for Shakesphere way of text and continue the same", examples=[ ["There are more things in heaven and earth, Horatio, than are dreamt of in your philosophy.", 100, 1], ["Love all, trust a few, do wrong to none.", 60, 2], ["It's not enough to speak, but to speak true", 50, 3], ["To be, or not to be: that is the question.", 100, 1], ["If you can look into the seeds of time, and say which grain will grow and which will not, speak then to me", 100, 1], ["Love sought is good, but given unsought is better.", 100, 1], ] ) if __name__ == "__main__": iface.launch()