import gradio as gr import torch from transformers import GPT2Tokenizer, AutoModelForCausalLM from peft import PeftModel # 1️⃣ Load fallback tokenizer (GPT2) tokenizer = GPT2Tokenizer.from_pretrained("gpt2") tokenizer.pad_token = tokenizer.eos_token # Required for causal LM # 2️⃣ Load base model base_model_name = "TRM-coding/PythonCopilot" device = "cuda" if torch.cuda.is_available() else "cpu" base_model = AutoModelForCausalLM.from_pretrained( base_model_name, torch_dtype=torch.float16 if device == "cuda" else torch.float32 ).to(device) # 3️⃣ Resize embeddings to match PEFT checkpoint vocab checkpoint_vocab_size = 50257 # From DSUDUDe/funfox PEFT model base_model.resize_token_embeddings(checkpoint_vocab_size) # 4️⃣ Load PEFT/LoRA adapter peft_model_name = "DSDUDEd/funfox" model = PeftModel.from_pretrained(base_model, peft_model_name) model.eval() # 5️⃣ Define generation function def generate_text(prompt, max_tokens=50): inputs = tokenizer(prompt, return_tensors="pt").to(model.device) outputs = model.generate( **inputs, max_new_tokens=max_tokens, do_sample=True, top_p=0.9, temperature=0.8 ) return tokenizer.decode(outputs[0], skip_special_tokens=True) # 6️⃣ Build Gradio interface iface = gr.Interface( fn=generate_text, inputs=[ gr.Textbox(label="Enter Prompt", lines=2, placeholder="Type something..."), gr.Slider(minimum=10, maximum=200, step=10, label="Max Tokens") ], outputs=gr.Textbox(label="Generated Text"), title="FunFox PEFT Model", description="FunFox LoRA model fine-tuned on PythonCopilot base." ) # 7️⃣ Launch iface.launch(share=True)