Spaces:

pihull
/

llmsql-interactive-q-a

Sleeping

App Files Files Community

llmsql-interactive-q-a / app.py

pihull

fix: markdown scale

d5d175f 9 days ago

raw

history blame contribute delete

9.16 kB

	import os
	import sqlite3

	import gradio as gr
	import pandas as pd
	import spaces
	import torch
	from huggingface_hub import hf_hub_download
	from transformers import AutoModelForCausalLM, AutoTokenizer

	# Import local prompt builders
	from build_prompt import (
	build_prompt_0shot,
	build_prompt_1shot,
	build_prompt_5shot,
	)

	# Import both versions of the dataset
	from dataset_generator import (
	questions_llmsql_1,
	questions_llmsql_2,
	tables_llmsql_1,
	tables_llmsql_2,
	)
	from dataset_generator import split_info as split_info_v1
	from dataset_generator import split_info as split_info_v2
	from evaluate import evaluate_sample

	# Global variables for caching
	model = None
	tokenizer = None
	current_model_id = None
	conn = None
	current_db_path = None

	# Mapping for dynamic access
	DATASETS = {
	"LLMSQL 1.0": {
	"questions": questions_llmsql_1,
	"tables": tables_llmsql_1,
	"split_info": split_info_v1,
	"repo": "llmsql-bench/llmsql-benchmark",
	"folder": "llmsql_1.0",
	},
	"LLMSQL 2.0": {
	"questions": questions_llmsql_2,
	"tables": tables_llmsql_2,
	"split_info": split_info_v2,
	"repo": "llmsql-bench/llmsql-2.0",
	"folder": "llmsql_2.0",
	},
	}


	# =====================
	# Initialization Logic
	# =====================
	def initialize_data(version):
	"""Downloads and connects to the version-specific database."""
	global conn, current_db_path

	config = DATASETS[version]
	APP_DIR = os.getcwd()
	VER_DIR = os.path.join(APP_DIR, config["folder"])
	os.makedirs(VER_DIR, exist_ok=True)

	DB_FILE = os.path.join(VER_DIR, "sqlite_tables.db")

	# Only reconnect if the version changed or conn is None
	if current_db_path != DB_FILE:
	if not os.path.exists(DB_FILE):
	print(f"Downloading database for {version}...")
	hf_hub_download(
	repo_id=config["repo"],
	repo_type="dataset",
	filename="sqlite_tables.db",
	local_dir=VER_DIR,
	)

	if conn:
	conn.close()
	conn = sqlite3.connect(DB_FILE, check_same_thread=False)
	current_db_path = DB_FILE


	def load_model_if_needed(model_id):
	"""Handles switching models inside the GPU space."""
	global model, tokenizer, current_model_id

	if current_model_id == model_id and model is not None:
	return

	print(f"Loading model: {model_id}...")
	tokenizer = AutoTokenizer.from_pretrained(model_id)
	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	torch_dtype="auto",
	device_map="cuda",
	trust_remote_code=True,
	)
	current_model_id = model_id
	print(f"Model {model_id} loaded successfully.")


	few_shot_selection = {
	"0": build_prompt_0shot,
	"1": build_prompt_1shot,
	"5": build_prompt_5shot,
	}


	# =====================
	# Main Logic
	# =====================
	@spaces.GPU
	def run_inference(version, model_id, question_idx, few_shots):
	initialize_data(version)
	load_model_if_needed(model_id)

	dataset = DATASETS[version]
	qs = dataset["questions"]
	ts = dataset["tables"]

	try:
	idx = int(question_idx)
	q_data = qs[idx]
	except:
	return "Invalid ID", "", "", None, None, None, False

	question = q_data["question"]
	ground_truth_sql = q_data["sql"]
	table = ts.get(q_data["table_id"])

	if not table:
	return "Table data missing", "", "", None, None, None, False

	example_row = table["rows"][0] if table["rows"] else []
	raw_prompt = few_shot_selection[few_shots](
	question, table["header"], table["types"], example_row
	)

	messages = [{"role": "user", "content": raw_prompt}]
	text_input = tokenizer.apply_chat_template(
	messages, tokenize=False, add_generation_prompt=True
	)
	model_inputs = tokenizer([text_input], return_tensors="pt").to("cuda")

	generated_ids = model.generate(
	**model_inputs, max_new_tokens=512, temperature=0.0, do_sample=False
	)
	new_tokens = generated_ids[0][len(model_inputs.input_ids[0]) :]
	completion = tokenizer.decode(new_tokens, skip_special_tokens=True)

	is_match, mismatch_info, _ = evaluate_sample(
	item={"question_id": idx, "completion": completion},
	questions=qs,
	conn=conn,
	)

	return (
	question,
	completion,
	ground_truth_sql,
	mismatch_info["prediction_results"],
	mismatch_info["gold_results"],
	pd.DataFrame(table["rows"], columns=table["header"]),
	bool(is_match),
	)


	# =====================
	# UI Helpers
	# =====================
	def get_range_display(version):
	info_dict = DATASETS[version]["split_info"]
	lines = []
	for s, info in info_dict.items():
	lines.append(
	f"{s.capitalize()}: IDs {info.get('first')} to {info.get('last')} (Total: {info.get('count')})"
	)
	return "\n".join(lines)


	with gr.Blocks(title="Text-to-SQL Debugger", theme=gr.themes.Soft()) as app:
	gr.Markdown("## 🔍 Text-to-SQL Interactive Debugger")

	with gr.Row():
	version_dropdown = gr.Dropdown(
	choices=["LLMSQL 1.0", "LLMSQL 2.0"],
	value="LLMSQL 2.0",
	label="Dataset Version",
	scale=1,
	)
	model_dropdown = gr.Dropdown(
	choices=["Qwen/Qwen2.5-1.5B-Instruct", "openai/gpt-oss-20b"],
	value="Qwen/Qwen2.5-1.5B-Instruct",
	label="1. Select Model",
	scale=1,
	)
	few_shot_dropdown = gr.Dropdown(
	choices=["0", "1", "5"],
	value="5",
	label="2. Few-shot Examples",
	scale=1,
	)

	with gr.Row():
	question_input = gr.Textbox(
	label="3. Enter Question ID",
	value="1",
	lines=2,
	placeholder="e.g. 15001",
	scale=2,
	min_width=200,
	)
	with gr.Column(scale=1):
	range_md = gr.Markdown(
	get_range_display("LLMSQL 2.0"),
	line_breaks=True,
	padding=True,
	)

	run_button = gr.Button("Run Inference", variant="primary")
	question_box = gr.Textbox(
	label="Natural Language Question", lines=2, interactive=False
	)

	with gr.Row():
	generated_sql_box = gr.Code(label="Generated SQL", language="sql", lines=3)
	gt_sql_box = gr.Code(label="Ground Truth SQL", language="sql", lines=3)

	gr.Markdown("### Data Comparison")
	with gr.Row():
	generated_table = gr.Dataframe(label="Generated Result", type="pandas")
	gt_table = gr.Dataframe(label="Ground Truth Result", type="pandas")

	with gr.Accordion("See Full Source Table", open=False):
	full_table = gr.Dataframe(label="Full Table Content", type="pandas")

	def update_ui_on_version_or_id(version, q_id):
	"""Updates range text and pre-loads question data when version or ID changes."""
	dataset = DATASETS[version]
	range_text = get_range_display(version)

	try:
	idx = int(q_id) if (q_id and str(q_id).isdigit()) else 1
	q_data = dataset["questions"][idx]
	table_id = q_data["table_id"]
	raw_table = dataset["tables"].get(table_id, {})

	df = pd.DataFrame(
	raw_table.get("rows", []), columns=raw_table.get("header", [])
	)
	return (
	q_data["question"],
	q_data["sql"],
	df,
	gr.update(label="Generated SQL", value=""),
	range_text,
	)
	except Exception:
	return (
	"ID not found in this version",
	"",
	pd.DataFrame(),
	gr.update(label="Generated SQL"),
	range_text,
	)

	def handle_inference(version, model, few_shot, q_id):
	q_text, gen_sql, gt_sql, gen_df, gt_df, full_df, is_match = run_inference(
	version, model, q_id, few_shot
	)
	status = (
	"✅ Generated SQL (MATCH SUCCESS)"
	if is_match
	else "❌ Generated SQL (MATCH FAILED)"
	)
	return gr.update(label=status, value=gen_sql), gen_df, gt_df

	# Event Listeners
	version_dropdown.change(
	update_ui_on_version_or_id,
	inputs=[version_dropdown, question_input],
	outputs=[question_box, gt_sql_box, full_table, generated_sql_box, range_md],
	)

	question_input.change(
	update_ui_on_version_or_id,
	inputs=[version_dropdown, question_input],
	outputs=[question_box, gt_sql_box, full_table, generated_sql_box, range_md],
	)

	run_button.click(
	handle_inference,
	inputs=[version_dropdown, model_dropdown, few_shot_dropdown, question_input],
	outputs=[generated_sql_box, generated_table, gt_table],
	)

	app.load(
	update_ui_on_version_or_id,
	inputs=[version_dropdown, question_input],
	outputs=[question_box, gt_sql_box, full_table, generated_sql_box, range_md],
	)

	app.launch()