Spaces:
Runtime error
Runtime error
File size: 1,717 Bytes
2b80913 4e98186 2b80913 4e98186 2b80913 4e98186 d5ac8a3 4e98186 2b80913 4e98186 2b80913 4e98186 2b80913 4e98186 2b80913 4e98186 3451045 4e98186 3451045 4e98186 3451045 e451b9e 3451045 4e98186 2b80913 4e98186 2b80913 d5ac8a3 4e98186 2b80913 4e98186 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import gradio as gr
from unsloth import FastLanguageModel
import torch
# ----------------------------
# Load LoRA-finetuned model
# ----------------------------
max_seq_length = 1024
model, tokenizer = FastLanguageModel.from_pretrained(
model_name="umarfarzan/my-finetuned-model2-lora",
max_seq_length=max_seq_length,
dtype=None,
load_in_4bit=True # still works on CPU with int4 quantization
)
FastLanguageModel.for_inference(model)
# ----------------------------
# Inference function
# ----------------------------
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
{}
### Input:
{}
### Response:
{}"""
def generate_response(instruction, input_text=""):
prompt = alpaca_prompt.format(instruction, input_text, "")
inputs = tokenizer([prompt], return_tensors="pt").to("cpu")
outputs = model.generate(
**inputs,
max_new_tokens=512,
temperature=0.7,
top_p=0.9,
do_sample=True,
use_cache=True
)
return tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
# ----------------------------
# Gradio UI
# ----------------------------
with gr.Blocks() as demo:
gr.Markdown("## LoRA Qwen2.5-7B Demo (CPU)")
instruction_input = gr.Textbox(label="Instruction", lines=3)
context_input = gr.Textbox(label="Input (Optional)", lines=2)
output_box = gr.Textbox(label="Output", lines=10)
submit_btn = gr.Button("Generate")
submit_btn.click(
generate_response,
inputs=[instruction_input, context_input],
outputs=output_box
)
demo.launch()
|