Spaces:

Dub973
/

text2sql

Sleeping

App Files Files Community

text2sql / app.py

Dub973

Upload 3 files

d5ccd98 verified 9 months ago

raw

history blame contribute delete

3.99 kB

	import os, sys, time, json, sqlite3, textwrap, requests
	import gradio as gr

	# -------------------------------------------------
	# 1. CONFIGURATION
	# -------------------------------------------------
	MODEL_ID = "gpt2" # always exists; later swap for sqlcoder
	API_URL = f"https://api-inference.huggingface.co/models/{MODEL_ID}"

	HF_TOKEN = os.getenv("HF_TOKEN")
	if not HF_TOKEN:
	raise RuntimeError(
	"HF_TOKEN not found. Go to Space → Settings → Secrets and add it."
	)

	HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}

	DB_PATH = "company.db"
	SCHEMA_FILE = "schema.sql"

	# -------------------------------------------------
	# 2. UTIL: BUILD DB IF NEEDED
	# -------------------------------------------------
	def create_db_if_needed():
	if os.path.exists(DB_PATH):
	return
	with open(SCHEMA_FILE) as f, sqlite3.connect(DB_PATH) as conn:
	conn.executescript(f.read())

	# -------------------------------------------------
	# 3. UTIL: CALL HF MODEL (with token debug)
	# -------------------------------------------------
	def nlp_to_sql(question: str, schema_ddl: str) -> str:
	prompt = textwrap.dedent(f"""
	Translate the following natural language question into a single valid SQLite SQL query.

	### Schema
	{schema_ddl}

	### Question
	{question}

	### SQL
	""")
	payload = {"inputs": prompt, "parameters": {"max_new_tokens": 64}}

	# ---------- DEBUG ----------
	print("=" * 60, file=sys.stderr)
	print("DEBUG URL:", API_URL, file=sys.stderr)
	print("DEBUG token starts with:", HF_TOKEN[:8], file=sys.stderr)
	# ---------------------------

	try:
	r = requests.post(API_URL, headers=HEADERS, json=payload, timeout=60)
	except Exception as e:
	return f"[ConnErr] {e}"

	# ---------- MORE DEBUG ----------
	print("DEBUG status:", r.status_code, file=sys.stderr)
	print("DEBUG first 200 bytes:", r.text[:200], file=sys.stderr)
	print("=" * 60, file=sys.stderr)
	# -------------------------------

	if r.status_code != 200:
	return f"[API {r.status_code}] {r.text[:100]}"

	try:
	generated = r.json()[0]["generated_text"]
	except Exception as e:
	return f"[JSONErr] {e}"

	return generated.split("### SQL")[-1].strip() or "[Empty SQL]"

	# -------------------------------------------------
	# 4. PIPELINE
	# -------------------------------------------------
	def run_pipeline(query: str):
	t0, trace = time.time(), []
	create_db_if_needed()

	with open(SCHEMA_FILE) as f:
	schema = f.read()
	trace.append(("Schema", "loaded"))

	sql = nlp_to_sql(query, schema)
	trace.append(("LLM", sql))

	try:
	with sqlite3.connect(DB_PATH) as conn:
	cur = conn.execute(sql)
	rows = cur.fetchall()
	cols = [d[0] for d in cur.description] if cur.description else []
	result = {"columns": cols, "rows": rows}
	trace.append(("Exec", f"{len(rows)} rows"))
	except Exception as e:
	result = {"error": str(e)}
	trace.append(("Exec error", str(e)))

	trace.append(("Time", f"{time.time() - t0:.2f}s"))
	return sql, json.dumps(result, indent=2), "\n".join(f"{s}: {m}" for s, m in trace)

	# -------------------------------------------------
	# 5. UI
	# -------------------------------------------------
	with gr.Blocks(title="Debug HF Token & API") as demo:
	gr.Markdown("### Debugging HF TOKEN → API (uses GPT-2)")
	q = gr.Textbox(label="Question", placeholder="e.g., How many employees?")
	with gr.Row():
	sql_box = gr.Code(label="SQL / debug output")
	res_box = gr.Code(label="Result / error")
	trace_box = gr.Textbox(label="Trace")
	btn = gr.Button("Run")
	btn.click(run_pipeline, q, [sql_box, res_box, trace_box])

	if __name__ == "__main__":
	demo.launch()