Dub973 commited on
Commit
03f92a0
·
verified ·
1 Parent(s): a796c9f

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -73
app.py CHANGED
@@ -1,42 +1,27 @@
1
- import os, time, json, sqlite3, textwrap, requests
2
  import gradio as gr
3
 
4
- # -------------------------------------------------
5
- # 1. CONFIGURATION
6
- # -------------------------------------------------
7
- MODEL_ID = "defog/sqlcoder-7b-nl2sql-beta" # working public model
8
  API_URL = f"https://api-inference.huggingface.co/models/{MODEL_ID}"
9
 
10
- HF_TOKEN = os.getenv("HF_TOKEN") # set in Space → Settings → Secrets
11
- if not HF_TOKEN:
12
- raise RuntimeError("HF_TOKEN secret not found. "
13
- "Add it in Space Settings → Secrets.")
14
-
15
- HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}
16
 
17
  DB_PATH = "company.db"
18
  SCHEMA_FILE = "schema.sql"
19
 
20
- # -------------------------------------------------
21
- # 2. UTIL: BUILD SQLITE DB IF NEEDED
22
- # -------------------------------------------------
23
  def create_db_if_needed():
24
- """Create SQLite DB from schema.sql the first time the app runs."""
25
  if os.path.exists(DB_PATH):
26
  return
27
- if not os.path.isfile(SCHEMA_FILE):
28
- raise FileNotFoundError("schema.sql file is missing in the Space.")
29
  with open(SCHEMA_FILE) as f, sqlite3.connect(DB_PATH) as conn:
30
  conn.executescript(f.read())
31
 
32
- # -------------------------------------------------
33
- # 3. UTIL: CALL HUGGING FACE MODEL
34
- # -------------------------------------------------
35
- def nlp_to_sql(question: str, schema_ddl: str) -> str:
36
- """Call HF model to convert NL question into SQL."""
37
  prompt = textwrap.dedent(f"""
38
- ### Task
39
- Translate the following natural language question into ONE valid SQLite SQL query.
40
 
41
  ### Schema
42
  {schema_ddl}
@@ -46,49 +31,51 @@ def nlp_to_sql(question: str, schema_ddl: str) -> str:
46
 
47
  ### SQL
48
  """)
49
- payload = {"inputs": prompt, "parameters": {"max_new_tokens": 256}}
 
 
 
 
 
 
50
 
51
  try:
52
  r = requests.post(API_URL, headers=HEADERS, json=payload, timeout=60)
53
  except Exception as e:
54
- return f" Connection error: {e}"
 
 
 
 
 
 
55
 
56
  if r.status_code != 200:
57
- return f"API error {r.status_code}: {r.text}"
58
 
59
- # Parse JSON
60
  try:
61
- generated = r.json()[0]["generated_text"]
62
- except Exception:
63
- return "✖ Invalid JSON response."
64
-
65
- # Extract SQL
66
- sql = generated.split("### SQL")[-1].strip()
67
- return sql or "✖ Empty SQL returned."
68
-
69
- # -------------------------------------------------
70
- # 4. PIPELINE: NL → SQL → EXECUTE
71
- # -------------------------------------------------
72
- def run_pipeline(nl_query: str):
73
- start = time.time()
74
- trace = []
75
-
76
- # DB setup
77
  create_db_if_needed()
78
 
79
- # Load schema
80
  with open(SCHEMA_FILE) as f:
81
- schema_ddl = f.read()
82
  trace.append(("Schema", "loaded"))
83
 
84
- # Convert NL → SQL
85
- sql_query = nlp_to_sql(nl_query, schema_ddl)
86
- trace.append(("LLM", sql_query))
87
 
88
- # Execute SQL
89
  try:
90
  with sqlite3.connect(DB_PATH) as conn:
91
- cur = conn.execute(sql_query)
92
  rows = cur.fetchall()
93
  cols = [d[0] for d in cur.description] if cur.description else []
94
  result = {"columns": cols, "rows": rows}
@@ -97,29 +84,19 @@ def run_pipeline(nl_query: str):
97
  result = {"error": str(e)}
98
  trace.append(("Exec error", str(e)))
99
 
100
- trace.append(("Time", f"{time.time() - start:.2f}s"))
101
-
102
- return (
103
- sql_query,
104
- json.dumps(result, indent=2, ensure_ascii=False),
105
- "\n".join(f"{s}: {m}" for s, m in trace),
106
- )
107
-
108
- # -------------------------------------------------
109
- # 5. GRADIO UI
110
- # -------------------------------------------------
111
- with gr.Blocks(title="NLP → SQL (SQLite, HF Hub)") as demo:
112
- gr.Markdown("### NLP → SQL demo • SQLite backend • Hugging Face Inference API")
113
- query_in = gr.Textbox(
114
- label="Natural-language question",
115
- placeholder="e.g. List all employees in Engineering hired after 2021",
116
- )
117
  with gr.Row():
118
- sql_out = gr.Code(label="Generated SQL", language="sql")
119
- res_out = gr.Code(label="Query result (JSON)")
120
- trace_out = gr.Textbox(label="Trace", lines=6)
121
- run_btn = gr.Button("Run")
122
- run_btn.click(run_pipeline, query_in, [sql_out, res_out, trace_out])
123
 
124
  if __name__ == "__main__":
125
  demo.launch()
 
1
+ import os, time, json, sqlite3, textwrap, requests, sys
2
  import gradio as gr
3
 
4
+ # ----------------- CONFIG -----------------
5
+ MODEL_ID = "gpt2" # always public; swap later for sqlcoder
 
 
6
  API_URL = f"https://api-inference.huggingface.co/models/{MODEL_ID}"
7
 
8
+ HF_TOKEN = os.getenv("HF_TOKEN")
9
+ HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
 
 
 
 
10
 
11
  DB_PATH = "company.db"
12
  SCHEMA_FILE = "schema.sql"
13
 
14
+ # -------------- UTIL: DB ------------------
 
 
15
  def create_db_if_needed():
 
16
  if os.path.exists(DB_PATH):
17
  return
 
 
18
  with open(SCHEMA_FILE) as f, sqlite3.connect(DB_PATH) as conn:
19
  conn.executescript(f.read())
20
 
21
+ # -------------- UTIL: CALL API ------------
22
+ def nlp_to_sql(question, schema_ddl):
 
 
 
23
  prompt = textwrap.dedent(f"""
24
+ Translate the natural language question to a SQL query.
 
25
 
26
  ### Schema
27
  {schema_ddl}
 
31
 
32
  ### SQL
33
  """)
34
+ payload = {"inputs": prompt, "parameters": {"max_new_tokens": 64}}
35
+
36
+ # ---------- DEBUG PRINTS ----------
37
+ print("=" * 60, file=sys.stderr)
38
+ print("DEBUG URL:", API_URL, file=sys.stderr)
39
+ print("DEBUG Token present?:", bool(HF_TOKEN), file=sys.stderr)
40
+ # ----------------------------------
41
 
42
  try:
43
  r = requests.post(API_URL, headers=HEADERS, json=payload, timeout=60)
44
  except Exception as e:
45
+ return f"[ConnErr] {e}"
46
+
47
+ # ---------- MORE DEBUG ----------
48
+ print("DEBUG Status code:", r.status_code, file=sys.stderr)
49
+ print("DEBUG Raw response (first 500 bytes):", r.text[:500], file=sys.stderr)
50
+ print("=" * 60, file=sys.stderr)
51
+ # ---------------------------------
52
 
53
  if r.status_code != 200:
54
+ return f"[API {r.status_code}] {r.text[:200]}"
55
 
 
56
  try:
57
+ out = r.json()
58
+ generated = out[0].get("generated_text", "No generated_text")
59
+ except Exception as e:
60
+ return f"[JSONErr] {e}"
61
+
62
+ return generated.split("### SQL")[-1].strip() or "[Empty SQL]"
63
+
64
+ # -------------- PIPELINE ------------------
65
+ def run(query):
66
+ t0, trace = time.time(), []
 
 
 
 
 
 
67
  create_db_if_needed()
68
 
 
69
  with open(SCHEMA_FILE) as f:
70
+ schema = f.read()
71
  trace.append(("Schema", "loaded"))
72
 
73
+ sql = nlp_to_sql(query, schema)
74
+ trace.append(("LLM", sql))
 
75
 
 
76
  try:
77
  with sqlite3.connect(DB_PATH) as conn:
78
+ cur = conn.execute(sql)
79
  rows = cur.fetchall()
80
  cols = [d[0] for d in cur.description] if cur.description else []
81
  result = {"columns": cols, "rows": rows}
 
84
  result = {"error": str(e)}
85
  trace.append(("Exec error", str(e)))
86
 
87
+ trace.append(("Time", f"{time.time()-t0:.2f}s"))
88
+ return sql, json.dumps(result, indent=2), "\n".join(f"{s}: {m}" for s, m in trace)
89
+
90
+ # -------------- UI ------------------------
91
+ with gr.Blocks(title="Debug NLP→SQL") as demo:
92
+ gr.Markdown("### Debugging Hugging Face Inference API calls")
93
+ q = gr.Textbox(label="Ask", placeholder="Example: List employees")
 
 
 
 
 
 
 
 
 
 
94
  with gr.Row():
95
+ sql_box = gr.Code(label="Generated SQL / debug output")
96
+ res_box = gr.Code(label="Query result")
97
+ tbox = gr.Textbox(label="Trace")
98
+ btn = gr.Button("Run")
99
+ btn.click(run, q, [sql_box, res_box, tbox])
100
 
101
  if __name__ == "__main__":
102
  demo.launch()