Spaces:
Runtime error
Runtime error
File size: 2,537 Bytes
e95c2d3 e62bece 7f3026b c7c0d53 7f424d1 02976e0 e62bece 7f424d1 00c8a57 e95c2d3 e62bece 4bc3e8b 02976e0 4bc3e8b e95c2d3 7f3026b e95c2d3 7f3026b e95c2d3 8b67be0 e95c2d3 8b67be0 e95c2d3 02976e0 e95c2d3 7f3026b e95c2d3 a2f39c6 7f3026b 4bc3e8b e62bece 22df2c5 e95c2d3 e62bece e95c2d3 7f3026b e62bece 7f424d1 22df2c5 e95c2d3 02976e0 e95c2d3 7f3026b e95c2d3 84031c5 e95c2d3 8b67be0 e95c2d3 84031c5 e62bece 7f3026b e95c2d3 84031c5 1344c31 e95c2d3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 | # CPU SAFE HuggingFace Space (2026 stable)
import warnings
warnings.filterwarnings("ignore")
import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
# reduce CPU overload on free tier
torch.set_num_threads(1)
# βββββββββββββββββββββββββ
# Config
# βββββββββββββββββββββββββ
BASE_MODEL = "unsloth/Phi-3-mini-4k-instruct"
LORA_PATH = "saadkhi/SQL_Chat_finetuned_model"
MAX_NEW_TOKENS = 180
print("Loading model...")
# βββββββββββββββββββββββββ
# Load base model
# βββββββββββββββββββββββββ
model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL,
device_map="cpu",
torch_dtype=torch.float32,
trust_remote_code=True,
low_cpu_mem_usage=True,
)
print("Loading LoRA...")
model = PeftModel.from_pretrained(model, LORA_PATH)
print("Merging LoRA...")
model = model.merge_and_unload()
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
model.eval()
print("Model ready")
# βββββββββββββββββββββββββ
# Inference
# βββββββββββββββββββββββββ
def generate_sql(question):
if not question:
return "Enter a SQL question."
messages = [{"role": "user", "content": question}]
input_ids = tokenizer.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=True,
return_tensors="pt",
)
with torch.no_grad():
output = model.generate(
input_ids,
max_new_tokens=MAX_NEW_TOKENS,
temperature=0,
do_sample=False,
pad_token_id=tokenizer.eos_token_id,
)
text = tokenizer.decode(output[0], skip_special_tokens=True)
# clean artifacts
for t in ["<|assistant|>", "<|user|>", "<|end|>"]:
text = text.replace(t, "")
return text.strip()
# βββββββββββββββββββββββββ
# UI
# βββββββββββββββββββββββββ
demo = gr.Interface(
fn=generate_sql,
inputs=gr.Textbox(lines=3, label="SQL Question"),
outputs=gr.Textbox(lines=8, label="Generated SQL"),
title="SQL Chat β Phi-3 mini",
description="Free CPU Space. First response may take ~90s",
cache_examples=False,
)
demo.launch()
|