Dub973 commited on
Commit
aba90dc
·
verified ·
1 Parent(s): a9b473f

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +71 -0
  2. db_builder.py +13 -0
  3. requirements.txt +4 -0
  4. schema.sql +14 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, json, sqlite3, time, textwrap, requests
2
+ import gradio as gr
3
+ from dotenv import load_dotenv
4
+ load_dotenv()
5
+
6
+ HF_TOKEN = os.getenv("HF_TOKEN") # store in Space Secrets
7
+ MODEL_ID = "defog/sqlcoder-7b-2"
8
+ API_URL = f"https://api-inference.huggingface.co/models/{MODEL_ID}"
9
+ HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}
10
+ DB_PATH = "company.db"
11
+
12
+ def nlp_to_sql(nl_query: str, schema_ddl: str) -> str:
13
+ prompt = textwrap.dedent(f"""
14
+ ### Task
15
+ Translate the following natural language question into ONE valid SQLite SQL query.
16
+
17
+ ### Schema
18
+ {schema_ddl}
19
+
20
+ ### Question
21
+ {nl_query}
22
+
23
+ ### SQL
24
+ """)
25
+ payload = {"inputs": prompt, "parameters": {"max_new_tokens": 256}}
26
+ response = requests.post(API_URL, headers=HEADERS, json=payload, timeout=45)
27
+ sql = response.json()[0]["generated_text"].split("### SQL")[-1].strip()
28
+ return sql
29
+
30
+ def run_pipeline(nl_query):
31
+ trace = []
32
+ start = time.time()
33
+
34
+ # 1. load schema
35
+ with open("schema.sql") as f:
36
+ schema_ddl = f.read()
37
+ trace.append(("Load Schema", f"{len(schema_ddl.splitlines())} lines loaded"))
38
+
39
+ # 2. NL ➜ SQL
40
+ sql_query = nlp_to_sql(nl_query, schema_ddl)
41
+ trace.append(("LLM Output", sql_query))
42
+
43
+ # 3. Execute SQL
44
+ try:
45
+ with sqlite3.connect(DB_PATH) as conn:
46
+ cursor = conn.execute(sql_query)
47
+ rows = cursor.fetchall()
48
+ colnames = [d[0] for d in cursor.description] if cursor.description else []
49
+ result = {"columns": colnames, "rows": rows}
50
+ trace.append(("Execution", f"{len(rows)} rows returned"))
51
+ except Exception as e:
52
+ result = {"error": str(e)}
53
+ trace.append(("Execution Error", str(e)))
54
+
55
+ # 4. timing
56
+ trace.append(("Latency", f"{time.time() - start:0.2f}s"))
57
+
58
+ return sql_query, json.dumps(result, indent=2), "\n".join([f"{s}: {m}" for s, m in trace])
59
+
60
+ with gr.Blocks(title="NLP ➜ SQL Demo") as demo:
61
+ gr.Markdown("### NLP to SQL Query • SQLite • Trace Visibility")
62
+ nl_input = gr.Textbox(label="Natural-Language Question", placeholder="e.g. List employees in Engineering hired after 2021")
63
+ with gr.Row():
64
+ sql_out = gr.Code(label="Generated SQL")
65
+ result_out= gr.Code(label="Query Result")
66
+ trace_out = gr.Textbox(label="Trace", lines=6)
67
+ submit = gr.Button("Run")
68
+ submit.click(fn=run_pipeline, inputs=nl_input, outputs=[sql_out, result_out, trace_out])
69
+
70
+ if __name__ == "__main__":
71
+ demo.launch()
db_builder.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sqlite3, pathlib
2
+
3
+ DB_PATH = pathlib.Path("company.db")
4
+ SCHEMA_FILE = pathlib.Path("schema.sql")
5
+
6
+ def build_db():
7
+ DB_PATH.unlink(missing_ok=True)
8
+ with sqlite3.connect(DB_PATH) as conn, open(SCHEMA_FILE) as f:
9
+ conn.executescript(f.read())
10
+ print("Database created")
11
+
12
+ if __name__ == "__main__":
13
+ build_db()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio==4.28.3
2
+ requests
3
+ sqlite-utils
4
+ python-dotenv
schema.sql ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DROP TABLE IF EXISTS employees;
2
+ CREATE TABLE employees (
3
+ emp_id INTEGER PRIMARY KEY,
4
+ name TEXT,
5
+ department TEXT,
6
+ hire_date DATE,
7
+ salary INTEGER
8
+ );
9
+
10
+ INSERT INTO employees (name, department, hire_date, salary) VALUES
11
+ ('Alice', 'Sales', '2022-01-10', 95000),
12
+ ('Bob', 'Engineering','2023-03-14',115000),
13
+ ('Carlos', 'Finance', '2021-07-22',100000),
14
+ ('Dana', 'Engineering','2020-11-05',125000);