File size: 1,717 Bytes
2b80913
4e98186
2b80913
 
 
4e98186
2b80913
4e98186
 
 
 
 
 
 
d5ac8a3
4e98186
2b80913
 
4e98186
2b80913
4e98186
 
2b80913
4e98186
 
 
 
 
2b80913
4e98186
3451045
4e98186
 
 
3451045
 
4e98186
 
 
3451045
e451b9e
3451045
4e98186
2b80913
 
4e98186
2b80913
d5ac8a3
4e98186
 
 
 
 
 
 
 
 
 
2b80913
 
4e98186
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import gradio as gr
from unsloth import FastLanguageModel
import torch

# ----------------------------
# Load LoRA-finetuned model
# ----------------------------
max_seq_length = 1024
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="umarfarzan/my-finetuned-model2-lora",
    max_seq_length=max_seq_length,
    dtype=None,
    load_in_4bit=True  # still works on CPU with int4 quantization
)

FastLanguageModel.for_inference(model)

# ----------------------------
# Inference function
# ----------------------------
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

def generate_response(instruction, input_text=""):
    prompt = alpaca_prompt.format(instruction, input_text, "")
    inputs = tokenizer([prompt], return_tensors="pt").to("cpu")
    outputs = model.generate(
        **inputs,
        max_new_tokens=512,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        use_cache=True
    )
    return tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]

# ----------------------------
# Gradio UI
# ----------------------------
with gr.Blocks() as demo:
    gr.Markdown("## LoRA Qwen2.5-7B Demo (CPU)")
    instruction_input = gr.Textbox(label="Instruction", lines=3)
    context_input = gr.Textbox(label="Input (Optional)", lines=2)
    output_box = gr.Textbox(label="Output", lines=10)
    submit_btn = gr.Button("Generate")

    submit_btn.click(
        generate_response,
        inputs=[instruction_input, context_input],
        outputs=output_box
    )

demo.launch()