Spaces:

saadkhi
/

SQL_chatbot_API

Runtime error

App Files Files Community

SQL_chatbot_API / app.py

saadkhi

Update app.py

e95c2d3 verified about 1 month ago

raw

history blame

2.54 kB

	# CPU SAFE HuggingFace Space (2026 stable)

	import warnings
	warnings.filterwarnings("ignore")

	import torch
	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForCausalLM
	from peft import PeftModel

	# reduce CPU overload on free tier
	torch.set_num_threads(1)

	# ─────────────────────────
	# Config
	# ─────────────────────────
	BASE_MODEL = "unsloth/Phi-3-mini-4k-instruct"
	LORA_PATH = "saadkhi/SQL_Chat_finetuned_model"

	MAX_NEW_TOKENS = 180

	print("Loading model...")

	# ─────────────────────────
	# Load base model
	# ─────────────────────────
	model = AutoModelForCausalLM.from_pretrained(
	BASE_MODEL,
	device_map="cpu",
	torch_dtype=torch.float32,
	trust_remote_code=True,
	low_cpu_mem_usage=True,
	)

	print("Loading LoRA...")
	model = PeftModel.from_pretrained(model, LORA_PATH)

	print("Merging LoRA...")
	model = model.merge_and_unload()

	tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)

	model.eval()
	print("Model ready")

	# ─────────────────────────
	# Inference
	# ─────────────────────────
	def generate_sql(question):
	if not question:
	return "Enter a SQL question."

	messages = [{"role": "user", "content": question}]

	input_ids = tokenizer.apply_chat_template(
	messages,
	tokenize=True,
	add_generation_prompt=True,
	return_tensors="pt",
	)

	with torch.no_grad():
	output = model.generate(
	input_ids,
	max_new_tokens=MAX_NEW_TOKENS,
	temperature=0,
	do_sample=False,
	pad_token_id=tokenizer.eos_token_id,
	)

	text = tokenizer.decode(output[0], skip_special_tokens=True)

	# clean artifacts
	for t in ["<\|assistant\|>", "<\|user\|>", "<\|end\|>"]:
	text = text.replace(t, "")

	return text.strip()

	# ─────────────────────────
	# UI
	# ─────────────────────────
	demo = gr.Interface(
	fn=generate_sql,
	inputs=gr.Textbox(lines=3, label="SQL Question"),
	outputs=gr.Textbox(lines=8, label="Generated SQL"),
	title="SQL Chat – Phi-3 mini",
	description="Free CPU Space. First response may take ~90s",
	cache_examples=False,
	)

	demo.launch()