Spaces:

rat45
/

sql-sft-lora-model

Running

sql-sft-lora-model / app.py

Upload folder using huggingface_hub

b507220 verified 10 months ago

1.55 kB

	import gradio as gr
	import torch
	from peft import PeftModel
	from transformers import AutoTokenizer, AutoModelForCausalLM

	# Select device: GPU if available, else CPU
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	# Load tokenizer and model from local directory
	tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
	model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0").to(device)

	# Load LoRA adapter
	model = PeftModel.from_pretrained(model, "LoRA_model")


	# Define generation function
	def generate_sql(prompt):
	inputs = tokenizer(prompt, return_tensors="pt").to(device)
	outputs = model.generate(
	**inputs,
	max_new_tokens=64, # speed things up
	do_sample=True,
	temperature=0.7,
	top_p=0.95,
	eos_token_id=tokenizer.eos_token_id,
	early_stopping=True,
	num_beams=5,
	)
	full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
	return full_output[len(prompt):].strip().split(';', 1)[0] + ';' # remove prompt from beginning and only the first SQL statement


	# Gradio UI
	interface = gr.Interface(
	fn=generate_sql,
	inputs=gr.Textbox(lines=3, placeholder="Enter instruction, e.g. 'Show all users with age > 30' or 'Show all users where gender is female.'"),
	outputs="text",
	title="SQL Generator",
	description="Type a natural language prompt and get a SQL query generated by the fine-tuned TinyLlama model.",
	theme="default"
	)

	interface.launch(share=True)