Spaces:

umarfarzan
/

Inspaire

Runtime error

App Files Files Community

Inspaire / app.py

umarfarzan

Update app.py

4e98186 verified 4 months ago

raw

history blame contribute delete

1.72 kB

	import gradio as gr
	from unsloth import FastLanguageModel
	import torch

	# ----------------------------
	# Load LoRA-finetuned model
	# ----------------------------
	max_seq_length = 1024
	model, tokenizer = FastLanguageModel.from_pretrained(
	model_name="umarfarzan/my-finetuned-model2-lora",
	max_seq_length=max_seq_length,
	dtype=None,
	load_in_4bit=True # still works on CPU with int4 quantization
	)

	FastLanguageModel.for_inference(model)

	# ----------------------------
	# Inference function
	# ----------------------------
	alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

	### Instruction:
	{}

	### Input:
	{}

	### Response:
	{}"""

	def generate_response(instruction, input_text=""):
	prompt = alpaca_prompt.format(instruction, input_text, "")
	inputs = tokenizer([prompt], return_tensors="pt").to("cpu")
	outputs = model.generate(
	**inputs,
	max_new_tokens=512,
	temperature=0.7,
	top_p=0.9,
	do_sample=True,
	use_cache=True
	)
	return tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]

	# ----------------------------
	# Gradio UI
	# ----------------------------
	with gr.Blocks() as demo:
	gr.Markdown("## LoRA Qwen2.5-7B Demo (CPU)")
	instruction_input = gr.Textbox(label="Instruction", lines=3)
	context_input = gr.Textbox(label="Input (Optional)", lines=2)
	output_box = gr.Textbox(label="Output", lines=10)
	submit_btn = gr.Button("Generate")

	submit_btn.click(
	generate_response,
	inputs=[instruction_input, context_input],
	outputs=output_box
	)

	demo.launch()