from transformers import AutoTokenizer, AutoModelForCausalLM from peft import PeftModel import gradio as gr import torch device = "cuda" if torch.cuda.is_available() else "cpu" base_model = AutoModelForCausalLM.from_pretrained("gpt2").to(device) lora_model = PeftModel.from_pretrained(base_model, "WalidEAGLE/lora-finetuned-gpt2-shakespeare") lora_model.to(device) lora_model.eval() tokenizer = AutoTokenizer.from_pretrained("WalidEAGLE/lora-finetuned-gpt2-shakespeare") def chat_stream(prompt, history): inputs = tokenizer(prompt, return_tensors="pt").to(device) with torch.no_grad(): outputs = lora_model.generate( **inputs, max_new_tokens=100, do_sample=True, temperature=0.7, pad_token_id=tokenizer.eos_token_id ) full_text = tokenizer.decode(outputs[0], skip_special_tokens=True) response = "" for word in full_text.split(): response += word + " " yield response # Create Gradio interface gr.ChatInterface( fn=chat_stream, title="🎭 Shakespearean GPT-2 Chatbot", description="Talk like Shakespeare! Enter a prompt and watch the Shakespearean text stream in real time.", ).launch()