File size: 1,710 Bytes
cc7216b 988f875 40e0bac f032346 988f875 81b66ff 40e0bac 81b66ff 40e0bac f032346 40e0bac 988f875 40e0bac 81b66ff 40e0bac f032346 81b66ff f032346 988f875 81b66ff cc7216b f032346 40e0bac f032346 988f875 81b66ff cc7216b f032346 cc7216b 988f875 40e0bac cc7216b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import gradio as gr
import torch
from transformers import GPT2Tokenizer, AutoModelForCausalLM
from peft import PeftModel
# 1️⃣ Load fallback tokenizer (GPT2)
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token # Required for causal LM
# 2️⃣ Load base model
base_model_name = "TRM-coding/PythonCopilot"
device = "cuda" if torch.cuda.is_available() else "cpu"
base_model = AutoModelForCausalLM.from_pretrained(
base_model_name,
torch_dtype=torch.float16 if device == "cuda" else torch.float32
).to(device)
# 3️⃣ Resize embeddings to match PEFT checkpoint vocab
checkpoint_vocab_size = 50257 # From DSUDUDe/funfox PEFT model
base_model.resize_token_embeddings(checkpoint_vocab_size)
# 4️⃣ Load PEFT/LoRA adapter
peft_model_name = "DSDUDEd/funfox"
model = PeftModel.from_pretrained(base_model, peft_model_name)
model.eval()
# 5️⃣ Define generation function
def generate_text(prompt, max_tokens=50):
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(
**inputs,
max_new_tokens=max_tokens,
do_sample=True,
top_p=0.9,
temperature=0.8
)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# 6️⃣ Build Gradio interface
iface = gr.Interface(
fn=generate_text,
inputs=[
gr.Textbox(label="Enter Prompt", lines=2, placeholder="Type something..."),
gr.Slider(minimum=10, maximum=200, step=10, label="Max Tokens")
],
outputs=gr.Textbox(label="Generated Text"),
title="FunFox PEFT Model",
description="FunFox LoRA model fine-tuned on PythonCopilot base."
)
# 7️⃣ Launch
iface.launch(share=True)
|