jtdearmon commited on
Commit
413107b
·
verified ·
1 Parent(s): 20c4c8c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +732 -145
app.py CHANGED
@@ -4,6 +4,7 @@
4
  # - Students practice SELECT, WHERE, JOINs (INNER/LEFT), aliases, views, CTAS/SELECT INTO.
5
  # - Validator enforces columns only when the prompt asks; otherwise focuses on rows.
6
  # - ERD shows all FK edges in light gray and dynamically HIGHLIGHTS edges implied by JOINs.
 
7
 
8
  import os
9
  import re
@@ -232,140 +233,686 @@ def init_progress_tables(con: sqlite3.Connection):
232
 
233
  init_progress_tables(CONN)
234
 
235
- # -------------------- Fallback dataset & questions --------------------
236
- FALLBACK_SCHEMA = {
237
- "domain": "bookstore",
238
- "tables": [
239
- {
240
- "name": "authors",
241
- "pk": ["author_id"],
242
- "columns": [
243
- {"name":"author_id","type":"INTEGER"},
244
- {"name":"name","type":"TEXT"},
245
- {"name":"country","type":"TEXT"},
246
- {"name":"birth_year","type":"INTEGER"},
247
- ],
248
- "fks": [],
249
- "rows": [
250
- {"author_id":1,"name":"Isaac Asimov","country":"USA","birth_year":1920},
251
- {"author_id":2,"name":"Ursula K. Le Guin","country":"USA","birth_year":1929},
252
- {"author_id":3,"name":"Haruki Murakami","country":"Japan","birth_year":1949},
253
- {"author_id":4,"name":"Chinua Achebe","country":"Nigeria","birth_year":1930},
254
- {"author_id":5,"name":"Jane Austen","country":"UK","birth_year":1775},
255
- {"author_id":6,"name":"J.K. Rowling","country":"UK","birth_year":1965},
256
- {"author_id":7,"name":"Yuval Noah Harari","country":"Israel","birth_year":1976},
257
- {"author_id":8,"name":"New Author","country":"Nowhere","birth_year":1990},
258
- ],
259
- },
260
- {
261
- "name": "bookstores",
262
- "pk": ["store_id"],
263
- "columns": [
264
- {"name":"store_id","type":"INTEGER"},
265
- {"name":"name","type":"TEXT"},
266
- {"name":"city","type":"TEXT"},
267
- {"name":"state","type":"TEXT"},
268
- ],
269
- "fks": [],
270
- "rows": [
271
- {"store_id":1,"name":"Downtown Books","city":"Oklahoma City","state":"OK"},
272
- {"store_id":2,"name":"Harbor Books","city":"Seattle","state":"WA"},
273
- {"store_id":3,"name":"Desert Pages","city":"Phoenix","state":"AZ"},
274
- ],
275
- },
276
- {
277
- "name": "books",
278
- "pk": ["book_id"],
279
- "columns": [
280
- {"name":"book_id","type":"INTEGER"},
281
- {"name":"title","type":"TEXT"},
282
- {"name":"author_id","type":"INTEGER"},
283
- {"name":"store_id","type":"INTEGER"},
284
- {"name":"category","type":"TEXT"},
285
- {"name":"price","type":"REAL"},
286
- {"name":"published_year","type":"INTEGER"},
287
- ],
288
- "fks": [
289
- {"columns":["author_id"],"ref_table":"authors","ref_columns":["author_id"]},
290
- {"columns":["store_id"],"ref_table":"bookstores","ref_columns":["store_id"]},
291
- ],
292
- "rows": [
293
- {"book_id":101,"title":"Foundation","author_id":1,"store_id":1,"category":"Sci-Fi","price":14.99,"published_year":1951},
294
- {"book_id":102,"title":"I, Robot","author_id":1,"store_id":1,"category":"Sci-Fi","price":12.50,"published_year":1950},
295
- {"book_id":103,"title":"The Left Hand of Darkness","author_id":2,"store_id":2,"category":"Sci-Fi","price":16.00,"published_year":1969},
296
- {"book_id":104,"title":"A Wizard of Earthsea","author_id":2,"store_id":2,"category":"Fantasy","price":11.50,"published_year":1968},
297
- {"book_id":105,"title":"Norwegian Wood","author_id":3,"store_id":3,"category":"Fiction","price":18.00,"published_year":1987},
298
- {"book_id":106,"title":"Kafka on the Shore","author_id":3,"store_id":1,"category":"Fiction","price":21.00,"published_year":2002},
299
- {"book_id":107,"title":"Things Fall Apart","author_id":4,"store_id":1,"category":"Fiction","price":10.00,"published_year":1958},
300
- {"book_id":108,"title":"Pride and Prejudice","author_id":5,"store_id":2,"category":"Fiction","price":9.00,"published_year":1813},
301
- {"book_id":109,"title":"Harry Potter and the Sorcerer's Stone","author_id":6,"store_id":3,"category":"Children","price":22.00,"published_year":1997},
302
- {"book_id":110,"title":"Harry Potter and the Chamber of Secrets","author_id":6,"store_id":3,"category":"Children","price":23.00,"published_year":1998},
303
- {"book_id":111,"title":"Sapiens","author_id":7,"store_id":1,"category":"History","price":26.00,"published_year":2011},
304
- {"book_id":112,"title":"Homo Deus","author_id":7,"store_id":2,"category":"History","price":28.00,"published_year":2015},
305
- ],
306
- },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
  ]
308
  }
309
 
310
- FALLBACK_QUESTIONS = [
311
- {"id":"Q01","category":"SELECT *","difficulty":1,
312
- "prompt_md":"Select all rows and columns from `authors`.",
313
- "answer_sql":["SELECT * FROM authors;"],
314
- "requires_aliases":False,"required_aliases":[]},
315
- {"id":"Q02","category":"SELECT columns","difficulty":1,
316
- "prompt_md":"Show `title` and `price` from `books`.",
317
- "answer_sql":["SELECT title, price FROM books;"],
318
- "requires_aliases":False,"required_aliases":[]},
319
- {"id":"Q03","category":"WHERE","difficulty":1,
320
- "prompt_md":"List Sci‑Fi books under $15 (show title, price).",
321
- "answer_sql":["SELECT title, price FROM books WHERE category='Sci-Fi' AND price < 15;"],
322
- "requires_aliases":False,"required_aliases":[]},
323
- {"id":"Q04","category":"Aliases","difficulty":1,
324
- "prompt_md":"Using aliases `b` and `a`, join `books` to `authors` and show `b.title` and `a.name` as `author_name`.",
325
- "answer_sql":["SELECT b.title, a.name AS author_name FROM books b JOIN authors a ON b.author_id=a.author_id;"],
326
- "requires_aliases":True,"required_aliases":["a","b"]},
327
- {"id":"Q05","category":"JOIN (INNER)","difficulty":2,
328
- "prompt_md":"Inner join `books` and `bookstores`. Return `title`, `name` as `store`.",
329
- "answer_sql":["SELECT b.title, s.name AS store FROM books b INNER JOIN bookstores s ON b.store_id=s.store_id;"],
330
- "requires_aliases":False,"required_aliases":[]},
331
- {"id":"Q06","category":"JOIN (LEFT)","difficulty":2,
332
- "prompt_md":"List each author and their number of books (include authors with zero): columns `name`, `book_count`.",
333
- "answer_sql":["SELECT a.name, COUNT(b.book_id) AS book_count FROM authors a LEFT JOIN books b ON a.author_id=b.author_id GROUP BY a.name;"],
334
- "requires_aliases":False,"required_aliases":[]},
335
- {"id":"Q07","category":"VIEW","difficulty":2,
336
- "prompt_md":"Create a view `vw_pricy` with `title`, `price` for books priced > 25.",
337
- "answer_sql":["CREATE VIEW vw_pricy AS SELECT title, price FROM books WHERE price > 25;"],
338
- "requires_aliases":False,"required_aliases":[]},
339
- {"id":"Q08","category":"CTAS / SELECT INTO","difficulty":2,
340
- "prompt_md":"Create a table `cheap_books` containing books priced < 12. Use CTAS or SELECT INTO.",
341
- "answer_sql":[
342
- "CREATE TABLE cheap_books AS SELECT * FROM books WHERE price < 12;",
343
- "SELECT * INTO cheap_books FROM books WHERE price < 12;"
344
- ],
345
- "requires_aliases":False,"required_aliases":[]},
346
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
347
 
348
- # --------------- OpenAI prompts + parsing helpers ---------------
349
- DOMAIN_AND_QUESTIONS_SCHEMA = {"required": ["domain", "tables", "questions"]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
350
 
351
- def _domain_prompt(prev_domain: Optional[str]) -> str:
352
- extra = f" Avoid using the previous domain '{prev_domain}' if possible." if prev_domain else ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
353
  return f"""
354
  Return ONLY a valid JSON object (no markdown, no prose).
355
- The JSON must have: domain (string), tables (3–4 table objects), and questions (8–12 question objects).{extra}
356
 
357
- Rules:
358
- - One domain chosen from: bookstore, retail sales, wholesaler, sales tax, oil and gas wells, marketing.
359
- - Tables: SQLite-friendly. Use snake_case. Each table has: name, pk (list of column names),
360
- columns (list of {{name,type}}), fks (list of {{columns,ref_table,ref_columns}}), rows (8–15 small seed rows).
361
- - Questions: categories among "SELECT *", "SELECT columns", "WHERE", "Aliases",
362
- "JOIN (INNER)", "JOIN (LEFT)", "Aggregation", "VIEW", "CTAS / SELECT INTO".
363
- Include at least one LEFT JOIN, one VIEW creation, one CTAS or SELECT INTO.
364
- Provide 1–3 'answer_sql' strings per question. Prefer SQLite-compatible SQL. Do NOT use RIGHT/FULL OUTER JOIN.
365
- For 1–2 questions, set requires_aliases=true and list required_aliases.
366
 
367
- Example top-level keys:
368
- {{"domain":"retail sales","tables":[...],"questions":[...]}}
 
 
 
 
 
 
 
 
369
  """
370
 
371
  def _loose_json_parse(s: str) -> Optional[dict]:
@@ -475,11 +1022,11 @@ def _canon_tables(tables: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
475
  })
476
  return out
477
 
478
- def llm_generate_domain_and_questions(prev_domain: Optional[str]):
479
  if not OPENAI_AVAILABLE or not os.getenv("OPENAI_API_KEY"):
480
  return None, "OpenAI client not available or OPENAI_API_KEY missing.", None, {"accepted_questions":0,"dropped_questions":0}
481
  errors = []
482
- prompt = _domain_prompt(prev_domain)
483
  for model in _candidate_models():
484
  try:
485
  try:
@@ -501,9 +1048,12 @@ def llm_generate_domain_and_questions(prev_domain: Optional[str]):
501
  obj_raw = _loose_json_parse(data_text or "")
502
  if not obj_raw:
503
  raise RuntimeError("Could not parse JSON from model output.")
504
- for k in DOMAIN_AND_QUESTIONS_SCHEMA["required"]:
505
  if k not in obj_raw:
506
  raise RuntimeError(f"Missing key '{k}'")
 
 
 
507
  tables = _canon_tables(obj_raw.get("tables", []))
508
  if not tables: raise RuntimeError("No usable tables in LLM output.")
509
  obj_raw["tables"] = tables
@@ -569,21 +1119,49 @@ def install_schema(con: sqlite3.Connection, schema: Dict[str,Any]):
569
  (schema.get("domain","unknown"), json.dumps(schema)))
570
  con.commit()
571
 
572
- def bootstrap_domain_with_llm_or_fallback(prev_domain: Optional[str]):
573
- obj, err, model_used, stats = llm_generate_domain_and_questions(prev_domain)
574
  if obj is None:
575
- return FALLBACK_SCHEMA, FALLBACK_QUESTIONS, {"source":"fallback","model":None,"error":err,"accepted":0,"dropped":0}
 
 
 
576
  return obj, obj["questions"], {"source":"openai","model":model_used,"error":None,"accepted":stats["accepted_questions"],"dropped":stats["dropped_questions"]}
577
 
578
- def install_schema_and_prepare_questions(prev_domain: Optional[str]):
579
- schema, questions, info = bootstrap_domain_with_llm_or_fallback(prev_domain)
580
  install_schema(CONN, schema)
581
  if not questions:
582
- questions = FALLBACK_QUESTIONS
 
 
 
583
  return schema, questions, info
584
 
585
- # -------------------- Session globals --------------------
586
- CURRENT_SCHEMA, CURRENT_QS, CURRENT_INFO = install_schema_and_prepare_questions(prev_domain=None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
587
 
588
  # -------------------- Progress + mastery --------------------
589
  def upsert_user(con: sqlite3.Connection, user_id: str, name: str):
@@ -617,7 +1195,7 @@ def fetch_attempts(con: sqlite3.Connection, user_id: str) -> pd.DataFrame:
617
  return pd.read_sql_query("SELECT * FROM attempts WHERE user_id=? ORDER BY id DESC", con, params=(user_id,))
618
 
619
  def pick_next_question(user_id: str) -> Dict[str,Any]:
620
- pool = CURRENT_QS if CURRENT_QS else FALLBACK_QUESTIONS
621
  df = fetch_attempts(CONN, user_id)
622
  stats = topic_stats(df)
623
  stats = stats.sort_values(by=["accuracy","attempts"], ascending=[True, True]) if not stats.empty else stats
@@ -910,8 +1488,19 @@ def show_hint(session: dict):
910
  }.get(cat, "Identify keys from the schema and join on them.")
911
  return gr.update(value=f"**Hint:** {hint}", visible=True)
912
 
 
 
 
 
 
 
 
 
 
 
 
913
  def _domain_status_md():
914
- if CURRENT_INFO.get("source","openai"):
915
  accepted = CURRENT_INFO.get("accepted",0); dropped = CURRENT_INFO.get("dropped",0)
916
  return (f"✅ **Domain via OpenAI** `{CURRENT_INFO.get('model','?')}` → **{CURRENT_SCHEMA.get('domain','?')}**. "
917
  f"Accepted questions: {accepted}, dropped: {dropped}. \n"
@@ -929,11 +1518,12 @@ def list_tables_for_preview():
929
  """)
930
  return df["name"].tolist() if not df.empty else ["(no tables)"]
931
 
932
- # >>> FIX: Always reseed a question on randomize (creates a guest session if needed)
933
  def regenerate_domain(session: dict):
934
  global CURRENT_SCHEMA, CURRENT_QS, CURRENT_INFO
935
  prev = CURRENT_SCHEMA.get("domain") if CURRENT_SCHEMA else None
936
- CURRENT_SCHEMA, CURRENT_QS, CURRENT_INFO = install_schema_and_prepare_questions(prev_domain=prev)
 
937
  erd = draw_dynamic_erd(CURRENT_SCHEMA)
938
  status = _domain_status_md()
939
 
@@ -947,11 +1537,8 @@ def regenerate_domain(session: dict):
947
  q = pick_next_question(session["user_id"])
948
  session.update({"qid": q["id"], "q": q, "start_ts": time.time()})
949
 
950
- # Fresh mastery and cleared result preview
951
  stats = topic_stats(fetch_attempts(CONN, session["user_id"]))
952
  empty_df = pd.DataFrame()
953
-
954
- # Refresh dropdown
955
  dd_update = gr.update(choices=list_tables_for_preview(), value=None)
956
 
957
  return (
@@ -1056,11 +1643,11 @@ with gr.Blocks(title="Adaptive SQL Trainer — Randomized Domains") as demo:
1056
  outputs=[feedback_md],
1057
  )
1058
  export_btn.click(
1059
- lambda user: os.path.abspath(os.path.join(EXPORT_DIR, f"{'-'.join((user or '').lower().split())[:64]}_progress.csv")),
1060
  inputs=[export_name],
1061
  outputs=[export_file],
1062
  )
1063
- regen_btn.click( # one callback: reseed question, refresh dropdown, clear previews
1064
  regenerate_domain,
1065
  inputs=[session_state],
1066
  outputs=[regen_fb, er_image, prompt_md, sql_input, tbl_dd, mastery_df, result_df, session_state],
 
4
  # - Students practice SELECT, WHERE, JOINs (INNER/LEFT), aliases, views, CTAS/SELECT INTO.
5
  # - Validator enforces columns only when the prompt asks; otherwise focuses on rows.
6
  # - ERD shows all FK edges in light gray and dynamically HIGHLIGHTS edges implied by JOINs.
7
+ # - Domain picker now round-robins across: bookstore, retail sales, wholesaler, sales tax, oil & gas wells, marketing.
8
 
9
  import os
10
  import re
 
233
 
234
  init_progress_tables(CONN)
235
 
236
+ # -------------------- Built-in fallback domain packs --------------------
237
+ # Each pack: {"schema": {...}, "questions": [...]}
238
+ FALLBACK_PACKS: Dict[str, Dict[str, Any]] = {}
239
+
240
+ # --- Bookstore (existing) ---
241
+ FALLBACK_PACKS["bookstore"] = {
242
+ "schema": {
243
+ "domain": "bookstore",
244
+ "tables": [
245
+ {
246
+ "name": "authors",
247
+ "pk": ["author_id"],
248
+ "columns": [
249
+ {"name":"author_id","type":"INTEGER"},
250
+ {"name":"name","type":"TEXT"},
251
+ {"name":"country","type":"TEXT"},
252
+ {"name":"birth_year","type":"INTEGER"},
253
+ ],
254
+ "fks": [],
255
+ "rows": [
256
+ {"author_id":1,"name":"Isaac Asimov","country":"USA","birth_year":1920},
257
+ {"author_id":2,"name":"Ursula K. Le Guin","country":"USA","birth_year":1929},
258
+ {"author_id":3,"name":"Haruki Murakami","country":"Japan","birth_year":1949},
259
+ {"author_id":4,"name":"Chinua Achebe","country":"Nigeria","birth_year":1930},
260
+ {"author_id":5,"name":"Jane Austen","country":"UK","birth_year":1775},
261
+ {"author_id":6,"name":"J.K. Rowling","country":"UK","birth_year":1965},
262
+ {"author_id":7,"name":"Yuval Noah Harari","country":"Israel","birth_year":1976},
263
+ {"author_id":8,"name":"New Author","country":"Nowhere","birth_year":1990},
264
+ ],
265
+ },
266
+ {
267
+ "name": "bookstores",
268
+ "pk": ["store_id"],
269
+ "columns": [
270
+ {"name":"store_id","type":"INTEGER"},
271
+ {"name":"name","type":"TEXT"},
272
+ {"name":"city","type":"TEXT"},
273
+ {"name":"state","type":"TEXT"},
274
+ ],
275
+ "fks": [],
276
+ "rows": [
277
+ {"store_id":1,"name":"Downtown Books","city":"Oklahoma City","state":"OK"},
278
+ {"store_id":2,"name":"Harbor Books","city":"Seattle","state":"WA"},
279
+ {"store_id":3,"name":"Desert Pages","city":"Phoenix","state":"AZ"},
280
+ ],
281
+ },
282
+ {
283
+ "name": "books",
284
+ "pk": ["book_id"],
285
+ "columns": [
286
+ {"name":"book_id","type":"INTEGER"},
287
+ {"name":"title","type":"TEXT"},
288
+ {"name":"author_id","type":"INTEGER"},
289
+ {"name":"store_id","type":"INTEGER"},
290
+ {"name":"category","type":"TEXT"},
291
+ {"name":"price","type":"REAL"},
292
+ {"name":"published_year","type":"INTEGER"},
293
+ ],
294
+ "fks": [
295
+ {"columns":["author_id"],"ref_table":"authors","ref_columns":["author_id"]},
296
+ {"columns":["store_id"],"ref_table":"bookstores","ref_columns":["store_id"]},
297
+ ],
298
+ "rows": [
299
+ {"book_id":101,"title":"Foundation","author_id":1,"store_id":1,"category":"Sci-Fi","price":14.99,"published_year":1951},
300
+ {"book_id":102,"title":"I, Robot","author_id":1,"store_id":1,"category":"Sci-Fi","price":12.50,"published_year":1950},
301
+ {"book_id":103,"title":"The Left Hand of Darkness","author_id":2,"store_id":2,"category":"Sci-Fi","price":16.00,"published_year":1969},
302
+ {"book_id":104,"title":"A Wizard of Earthsea","author_id":2,"store_id":2,"category":"Fantasy","price":11.50,"published_year":1968},
303
+ {"book_id":105,"title":"Norwegian Wood","author_id":3,"store_id":3,"category":"Fiction","price":18.00,"published_year":1987},
304
+ {"book_id":106,"title":"Kafka on the Shore","author_id":3,"store_id":1,"category":"Fiction","price":21.00,"published_year":2002},
305
+ {"book_id":107,"title":"Things Fall Apart","author_id":4,"store_id":1,"category":"Fiction","price":10.00,"published_year":1958},
306
+ {"book_id":108,"title":"Pride and Prejudice","author_id":5,"store_id":2,"category":"Fiction","price":9.00,"published_year":1813},
307
+ {"book_id":109,"title":"Harry Potter and the Sorcerer's Stone","author_id":6,"store_id":3,"category":"Children","price":22.00,"published_year":1997},
308
+ {"book_id":110,"title":"Harry Potter and the Chamber of Secrets","author_id":6,"store_id":3,"category":"Children","price":23.00,"published_year":1998},
309
+ {"book_id":111,"title":"Sapiens","author_id":7,"store_id":1,"category":"History","price":26.00,"published_year":2011},
310
+ {"book_id":112,"title":"Homo Deus","author_id":7,"store_id":2,"category":"History","price":28.00,"published_year":2015},
311
+ ],
312
+ },
313
+ ]
314
+ },
315
+ "questions": [
316
+ {"id":"B_Q01","category":"SELECT *","difficulty":1,
317
+ "prompt_md":"Select all rows and columns from `authors`.",
318
+ "answer_sql":["SELECT * FROM authors;"]},
319
+ {"id":"B_Q02","category":"SELECT columns","difficulty":1,
320
+ "prompt_md":"Show `title` and `price` from `books`.",
321
+ "answer_sql":["SELECT title, price FROM books;"]},
322
+ {"id":"B_Q03","category":"WHERE","difficulty":1,
323
+ "prompt_md":"List Sci‑Fi books under $15 (show title, price).",
324
+ "answer_sql":["SELECT title, price FROM books WHERE category='Sci-Fi' AND price < 15;"]},
325
+ {"id":"B_Q04","category":"Aliases","difficulty":1,
326
+ "prompt_md":"Using aliases `b` and `a`, join `books` to `authors` and show `b.title` and `a.name` as `author_name`.",
327
+ "answer_sql":["SELECT b.title, a.name AS author_name FROM books b JOIN authors a ON b.author_id=a.author_id;"],
328
+ "requires_aliases":True,"required_aliases":["a","b"]},
329
+ {"id":"B_Q05","category":"JOIN (INNER)","difficulty":2,
330
+ "prompt_md":"Inner join `books` and `bookstores`. Return `title`, `name` as `store`.",
331
+ "answer_sql":["SELECT b.title, s.name AS store FROM books b INNER JOIN bookstores s ON b.store_id=s.store_id;"]},
332
+ {"id":"B_Q06","category":"JOIN (LEFT)","difficulty":2,
333
+ "prompt_md":"List each author and their number of books (include authors with zero): columns `name`, `book_count`.",
334
+ "answer_sql":["SELECT a.name, COUNT(b.book_id) AS book_count FROM authors a LEFT JOIN books b ON a.author_id=b.author_id GROUP BY a.name;"]},
335
+ {"id":"B_Q07","category":"VIEW","difficulty":2,
336
+ "prompt_md":"Create a view `vw_pricy` with `title`, `price` for books priced > 25.",
337
+ "answer_sql":["CREATE VIEW vw_pricy AS SELECT title, price FROM books WHERE price > 25;"]},
338
+ {"id":"B_Q08","category":"CTAS / SELECT INTO","difficulty":2,
339
+ "prompt_md":"Create a table `cheap_books` containing books priced < 12. Use CTAS or SELECT INTO.",
340
+ "answer_sql":[
341
+ "CREATE TABLE cheap_books AS SELECT * FROM books WHERE price < 12;",
342
+ "SELECT * INTO cheap_books FROM books WHERE price < 12;"
343
+ ]},
344
  ]
345
  }
346
 
347
+ # --- Retail sales ---
348
+ FALLBACK_PACKS["retail sales"] = {
349
+ "schema": {
350
+ "domain": "retail sales",
351
+ "tables": [
352
+ {
353
+ "name":"customers","pk":["customer_id"],
354
+ "columns":[
355
+ {"name":"customer_id","type":"INTEGER"},
356
+ {"name":"name","type":"TEXT"},
357
+ {"name":"city","type":"TEXT"},
358
+ {"name":"state","type":"TEXT"}
359
+ ],
360
+ "fks":[],
361
+ "rows":[
362
+ {"customer_id":1,"name":"Ava Reed","city":"Seattle","state":"WA"},
363
+ {"customer_id":2,"name":"Mason Ortiz","city":"Portland","state":"OR"},
364
+ {"customer_id":3,"name":"Noah Patel","city":"Phoenix","state":"AZ"},
365
+ {"customer_id":4,"name":"Emma Kim","city":"San Diego","state":"CA"},
366
+ {"customer_id":5,"name":"Olivia Park","city":"Dallas","state":"TX"},
367
+ {"customer_id":6,"name":"Liam Gray","city":"Denver","state":"CO"},
368
+ {"customer_id":7,"name":"Sophia Lee","city":"Boston","state":"MA"},
369
+ {"customer_id":8,"name":"Elijah Hall","city":"Miami","state":"FL"}
370
+ ]
371
+ },
372
+ {
373
+ "name":"products","pk":["product_id"],
374
+ "columns":[
375
+ {"name":"product_id","type":"INTEGER"},
376
+ {"name":"product_name","type":"TEXT"},
377
+ {"name":"category","type":"TEXT"},
378
+ {"name":"price","type":"REAL"}
379
+ ],
380
+ "fks":[],
381
+ "rows":[
382
+ {"product_id":101,"product_name":"Coffee Maker","category":"Home","price":49.99},
383
+ {"product_id":102,"product_name":"Electric Kettle","category":"Home","price":29.99},
384
+ {"product_id":103,"product_name":"Headphones","category":"Electronics","price":79.00},
385
+ {"product_id":104,"product_name":"USB-C Cable","category":"Electronics","price":9.99},
386
+ {"product_id":105,"product_name":"Notebook","category":"Stationery","price":3.49},
387
+ {"product_id":106,"product_name":"Desk Lamp","category":"Home","price":19.99},
388
+ {"product_id":107,"product_name":"T-Shirt","category":"Clothing","price":15.00},
389
+ {"product_id":108,"product_name":"Sneakers","category":"Clothing","price":65.00}
390
+ ]
391
+ },
392
+ {
393
+ "name":"orders","pk":["order_id"],
394
+ "columns":[
395
+ {"name":"order_id","type":"INTEGER"},
396
+ {"name":"customer_id","type":"INTEGER"},
397
+ {"name":"order_date","type":"TEXT"}
398
+ ],
399
+ "fks":[{"columns":["customer_id"],"ref_table":"customers","ref_columns":["customer_id"]}],
400
+ "rows":[
401
+ {"order_id":201,"customer_id":1,"order_date":"2024-01-05"},
402
+ {"order_id":202,"customer_id":2,"order_date":"2024-01-07"},
403
+ {"order_id":203,"customer_id":1,"order_date":"2024-01-12"},
404
+ {"order_id":204,"customer_id":3,"order_date":"2024-02-01"},
405
+ {"order_id":205,"customer_id":4,"order_date":"2024-02-10"},
406
+ {"order_id":206,"customer_id":5,"order_date":"2024-03-02"},
407
+ {"order_id":207,"customer_id":6,"order_date":"2024-03-03"},
408
+ {"order_id":208,"customer_id":7,"order_date":"2024-03-09"},
409
+ {"order_id":209,"customer_id":8,"order_date":"2024-03-15"},
410
+ {"order_id":210,"customer_id":3,"order_date":"2024-03-20"}
411
+ ]
412
+ },
413
+ {
414
+ "name":"order_items","pk":["order_id","product_id"],
415
+ "columns":[
416
+ {"name":"order_id","type":"INTEGER"},
417
+ {"name":"product_id","type":"INTEGER"},
418
+ {"name":"qty","type":"INTEGER"},
419
+ {"name":"unit_price","type":"REAL"}
420
+ ],
421
+ "fks":[
422
+ {"columns":["order_id"],"ref_table":"orders","ref_columns":["order_id"]},
423
+ {"columns":["product_id"],"ref_table":"products","ref_columns":["product_id"]}
424
+ ],
425
+ "rows":[
426
+ {"order_id":201,"product_id":101,"qty":1,"unit_price":49.99},
427
+ {"order_id":201,"product_id":104,"qty":2,"unit_price":9.99},
428
+ {"order_id":202,"product_id":107,"qty":3,"unit_price":15.00},
429
+ {"order_id":203,"product_id":103,"qty":1,"unit_price":79.00},
430
+ {"order_id":203,"product_id":105,"qty":5,"unit_price":3.49},
431
+ {"order_id":204,"product_id":102,"qty":2,"unit_price":29.99},
432
+ {"order_id":205,"product_id":108,"qty":1,"unit_price":65.00},
433
+ {"order_id":206,"product_id":106,"qty":2,"unit_price":19.99},
434
+ {"order_id":207,"product_id":104,"qty":4,"unit_price":9.99},
435
+ {"order_id":208,"product_id":101,"qty":1,"unit_price":49.99},
436
+ {"order_id":209,"product_id":107,"qty":2,"unit_price":15.00},
437
+ {"order_id":210,"product_id":103,"qty":1,"unit_price":79.00}
438
+ ]
439
+ }
440
+ ]
441
+ },
442
+ "questions":[
443
+ {"id":"RS_Q01","category":"SELECT *","difficulty":1,
444
+ "prompt_md":"Show everything from `customers`.",
445
+ "answer_sql":["SELECT * FROM customers;"]},
446
+ {"id":"RS_Q02","category":"SELECT columns","difficulty":1,
447
+ "prompt_md":"List product name and price from `products`.",
448
+ "answer_sql":["SELECT product_name, price FROM products;"]},
449
+ {"id":"RS_Q03","category":"WHERE","difficulty":1,
450
+ "prompt_md":"Orders placed in March 2024 (return `order_id`, `order_date`).",
451
+ "answer_sql":["SELECT order_id, order_date FROM orders WHERE order_date BETWEEN '2024-03-01' AND '2024-03-31';"]},
452
+ {"id":"RS_Q04","category":"Aliases","difficulty":1,
453
+ "prompt_md":"Join `orders` (alias `o`) with `customers` (alias `c`) and show `o.order_id`, `c.name`.",
454
+ "answer_sql":["SELECT o.order_id, c.name FROM orders o JOIN customers c ON o.customer_id=c.customer_id;"],
455
+ "requires_aliases":True,"required_aliases":["o","c"]},
456
+ {"id":"RS_Q05","category":"JOIN (INNER)","difficulty":2,
457
+ "prompt_md":"Inner join `order_items` with `products` to show items where qty ≥ 3. Return `product_name`, `qty`.",
458
+ "answer_sql":["SELECT p.product_name, oi.qty FROM order_items oi INNER JOIN products p ON oi.product_id=p.product_id WHERE oi.qty >= 3;"]},
459
+ {"id":"RS_Q06","category":"JOIN (LEFT)","difficulty":2,
460
+ "prompt_md":"Customers and their number of orders (include zero). Columns: `name`, `order_count`.",
461
+ "answer_sql":["SELECT c.name, COUNT(o.order_id) AS order_count FROM customers c LEFT JOIN orders o ON c.customer_id=o.customer_id GROUP BY c.name;"]},
462
+ {"id":"RS_Q07","category":"VIEW","difficulty":2,
463
+ "prompt_md":"Create view `vw_top_qty` with total quantity by product: `product_id`, `total_qty`.",
464
+ "answer_sql":["CREATE VIEW vw_top_qty AS SELECT product_id, SUM(qty) AS total_qty FROM order_items GROUP BY product_id;"]},
465
+ {"id":"RS_Q08","category":"CTAS / SELECT INTO","difficulty":2,
466
+ "prompt_md":"Create table `cheap_products` with products priced < 10.",
467
+ "answer_sql":[
468
+ "CREATE TABLE cheap_products AS SELECT * FROM products WHERE price < 10;",
469
+ "SELECT * INTO cheap_products FROM products WHERE price < 10;"
470
+ ]}
471
+ ]
472
+ }
473
+
474
+ # --- Wholesaler ---
475
+ FALLBACK_PACKS["wholesaler"] = {
476
+ "schema":{
477
+ "domain":"wholesaler",
478
+ "tables":[
479
+ {"name":"suppliers","pk":["supplier_id"],
480
+ "columns":[
481
+ {"name":"supplier_id","type":"INTEGER"},
482
+ {"name":"supplier_name","type":"TEXT"},
483
+ {"name":"country","type":"TEXT"}
484
+ ],
485
+ "fks":[],
486
+ "rows":[
487
+ {"supplier_id":1,"supplier_name":"Nordic Foods","country":"SE"},
488
+ {"supplier_id":2,"supplier_name":"Metro Trade","country":"DE"},
489
+ {"supplier_id":3,"supplier_name":"Pacific Imports","country":"US"},
490
+ {"supplier_id":4,"supplier_name":"Andes Supply","country":"CL"},
491
+ {"supplier_id":5,"supplier_name":"Sahara Wholesale","country":"MA"}
492
+ ]},
493
+ {"name":"items","pk":["item_id"],
494
+ "columns":[
495
+ {"name":"item_id","type":"INTEGER"},
496
+ {"name":"item_name","type":"TEXT"},
497
+ {"name":"unit_cost","type":"REAL"}
498
+ ],
499
+ "fks":[],
500
+ "rows":[
501
+ {"item_id":101,"item_name":"Olive Oil 1L","unit_cost":4.20},
502
+ {"item_id":102,"item_name":"Canned Tuna","unit_cost":1.10},
503
+ {"item_id":103,"item_name":"Basmati Rice 5kg","unit_cost":6.30},
504
+ {"item_id":104,"item_name":"Black Tea 200g","unit_cost":2.70},
505
+ {"item_id":105,"item_name":"Peanut Butter","unit_cost":3.00},
506
+ {"item_id":106,"item_name":"Tomato Paste","unit_cost":0.95},
507
+ {"item_id":107,"item_name":"Chickpeas 1kg","unit_cost":1.60},
508
+ {"item_id":108,"item_name":"Soy Sauce 500ml","unit_cost":2.10}
509
+ ]},
510
+ {"name":"purchase_orders","pk":["po_id"],
511
+ "columns":[
512
+ {"name":"po_id","type":"INTEGER"},
513
+ {"name":"supplier_id","type":"INTEGER"},
514
+ {"name":"po_date","type":"TEXT"}
515
+ ],
516
+ "fks":[{"columns":["supplier_id"],"ref_table":"suppliers","ref_columns":["supplier_id"]}],
517
+ "rows":[
518
+ {"po_id":201,"supplier_id":1,"po_date":"2024-01-10"},
519
+ {"po_id":202,"supplier_id":2,"po_date":"2024-01-18"},
520
+ {"po_id":203,"supplier_id":3,"po_date":"2024-02-05"},
521
+ {"po_id":204,"supplier_id":1,"po_date":"2024-02-22"},
522
+ {"po_id":205,"supplier_id":5,"po_date":"2024-03-01"},
523
+ {"po_id":206,"supplier_id":4,"po_date":"2024-03-07"}
524
+ ]},
525
+ {"name":"po_lines","pk":["po_id","item_id"],
526
+ "columns":[
527
+ {"name":"po_id","type":"INTEGER"},
528
+ {"name":"item_id","type":"INTEGER"},
529
+ {"name":"qty","type":"INTEGER"},
530
+ {"name":"line_cost","type":"REAL"}
531
+ ],
532
+ "fks":[
533
+ {"columns":["po_id"],"ref_table":"purchase_orders","ref_columns":["po_id"]},
534
+ {"columns":["item_id"],"ref_table":"items","ref_columns":["item_id"]}
535
+ ],
536
+ "rows":[
537
+ {"po_id":201,"item_id":101,"qty":200,"line_cost":840.0},
538
+ {"po_id":201,"item_id":106,"qty":500,"line_cost":475.0},
539
+ {"po_id":202,"item_id":103,"qty":120,"line_cost":756.0},
540
+ {"po_id":203,"item_id":102,"qty":600,"line_cost":660.0},
541
+ {"po_id":203,"item_id":104,"qty":150,"line_cost":405.0},
542
+ {"po_id":204,"item_id":105,"qty":180,"line_cost":540.0},
543
+ {"po_id":205,"item_id":107,"qty":300,"line_cost":480.0},
544
+ {"po_id":206,"item_id":108,"qty":250,"line_cost":525.0}
545
+ ]}
546
+ ]
547
+ },
548
+ "questions":[
549
+ {"id":"W_Q01","category":"SELECT *","difficulty":1,
550
+ "prompt_md":"Show all suppliers.",
551
+ "answer_sql":["SELECT * FROM suppliers;"]},
552
+ {"id":"W_Q02","category":"SELECT columns","difficulty":1,
553
+ "prompt_md":"Return `item_name` and `unit_cost` from `items`.",
554
+ "answer_sql":["SELECT item_name, unit_cost FROM items;"]},
555
+ {"id":"W_Q03","category":"WHERE","difficulty":1,
556
+ "prompt_md":"Items costing more than 3.00 (show name, cost).",
557
+ "answer_sql":["SELECT item_name, unit_cost FROM items WHERE unit_cost > 3.00;"]},
558
+ {"id":"W_Q04","category":"Aliases","difficulty":1,
559
+ "prompt_md":"Using aliases `p` and `s`, show each `po_id` with `supplier_name`.",
560
+ "answer_sql":["SELECT p.po_id, s.supplier_name FROM purchase_orders p JOIN suppliers s ON p.supplier_id=s.supplier_id;"],
561
+ "requires_aliases":True,"required_aliases":["p","s"]},
562
+ {"id":"W_Q05","category":"JOIN (INNER)","difficulty":2,
563
+ "prompt_md":"Inner join `po_lines` and `items`; list `item_name`, total qty per item.",
564
+ "answer_sql":["SELECT i.item_name, SUM(l.qty) AS total_qty FROM po_lines l INNER JOIN items i ON l.item_id=i.item_id GROUP BY i.item_name;"]},
565
+ {"id":"W_Q06","category":"JOIN (LEFT)","difficulty":2,
566
+ "prompt_md":"Suppliers and count of POs (include those with zero). Columns `supplier_name`, `po_count`.",
567
+ "answer_sql":["SELECT s.supplier_name, COUNT(p.po_id) AS po_count FROM suppliers s LEFT JOIN purchase_orders p ON s.supplier_id=p.supplier_id GROUP BY s.supplier_name;"]},
568
+ {"id":"W_Q07","category":"VIEW","difficulty":2,
569
+ "prompt_md":"Create view `vw_po_value` with `po_id` and total `line_cost` per PO.",
570
+ "answer_sql":["CREATE VIEW vw_po_value AS SELECT po_id, SUM(line_cost) AS po_value FROM po_lines GROUP BY po_id;"]},
571
+ {"id":"W_Q08","category":"CTAS / SELECT INTO","difficulty":2,
572
+ "prompt_md":"Create table `budget_items` where unit_cost < 2.00.",
573
+ "answer_sql":[
574
+ "CREATE TABLE budget_items AS SELECT * FROM items WHERE unit_cost < 2.00;",
575
+ "SELECT * INTO budget_items FROM items WHERE unit_cost < 2.00;"
576
+ ]}
577
+ ]
578
+ }
579
 
580
+ # --- Sales tax ---
581
+ FALLBACK_PACKS["sales tax"] = {
582
+ "schema":{
583
+ "domain":"sales tax",
584
+ "tables":[
585
+ {"name":"jurisdictions","pk":["jurisdiction_id"],
586
+ "columns":[
587
+ {"name":"jurisdiction_id","type":"INTEGER"},
588
+ {"name":"name","type":"TEXT"},
589
+ {"name":"state","type":"TEXT"}
590
+ ],
591
+ "fks":[],
592
+ "rows":[
593
+ {"jurisdiction_id":1,"name":"King County","state":"WA"},
594
+ {"jurisdiction_id":2,"name":"Multnomah","state":"OR"},
595
+ {"jurisdiction_id":3,"name":"Maricopa","state":"AZ"},
596
+ {"jurisdiction_id":4,"name":"Travis","state":"TX"},
597
+ {"jurisdiction_id":5,"name":"Denver","state":"CO"},
598
+ {"jurisdiction_id":6,"name":"Miami-Dade","state":"FL"}
599
+ ]},
600
+ {"name":"tax_rates","pk":["rate_id"],
601
+ "columns":[
602
+ {"name":"rate_id","type":"INTEGER"},
603
+ {"name":"jurisdiction_id","type":"INTEGER"},
604
+ {"name":"category","type":"TEXT"},
605
+ {"name":"rate","type":"REAL"}
606
+ ],
607
+ "fks":[{"columns":["jurisdiction_id"],"ref_table":"jurisdictions","ref_columns":["jurisdiction_id"]}],
608
+ "rows":[
609
+ {"rate_id":101,"jurisdiction_id":1,"category":"general","rate":0.102},
610
+ {"rate_id":102,"jurisdiction_id":2,"category":"general","rate":0.000}, # OR no sales tax
611
+ {"rate_id":103,"jurisdiction_id":3,"category":"general","rate":0.056},
612
+ {"rate_id":104,"jurisdiction_id":4,"category":"general","rate":0.0825},
613
+ {"rate_id":105,"jurisdiction_id":5,"category":"general","rate":0.081},
614
+ {"rate_id":106,"jurisdiction_id":6,"category":"general","rate":0.070}
615
+ ]},
616
+ {"name":"transactions","pk":["txn_id"],
617
+ "columns":[
618
+ {"name":"txn_id","type":"INTEGER"},
619
+ {"name":"txn_date","type":"TEXT"},
620
+ {"name":"amount","type":"REAL"},
621
+ {"name":"category","type":"TEXT"},
622
+ {"name":"jurisdiction_id","type":"INTEGER"}
623
+ ],
624
+ "fks":[{"columns":["jurisdiction_id"],"ref_table":"jurisdictions","ref_columns":["jurisdiction_id"]}],
625
+ "rows":[
626
+ {"txn_id":201,"txn_date":"2024-01-03","amount":120.00,"category":"general","jurisdiction_id":1},
627
+ {"txn_id":202,"txn_date":"2024-01-05","amount":55.25,"category":"general","jurisdiction_id":2},
628
+ {"txn_id":203,"txn_date":"2024-01-10","amount":300.00,"category":"general","jurisdiction_id":3},
629
+ {"txn_id":204,"txn_date":"2024-02-01","amount":240.55,"category":"general","jurisdiction_id":4},
630
+ {"txn_id":205,"txn_date":"2024-02-14","amount":89.99,"category":"general","jurisdiction_id":5},
631
+ {"txn_id":206,"txn_date":"2024-03-02","amount":150.00,"category":"general","jurisdiction_id":6},
632
+ {"txn_id":207,"txn_date":"2024-03-09","amount":70.00,"category":"general","jurisdiction_id":1},
633
+ {"txn_id":208,"txn_date":"2024-03-15","amount":18.50,"category":"general","jurisdiction_id":2},
634
+ {"txn_id":209,"txn_date":"2024-03-20","amount":99.95,"category":"general","jurisdiction_id":3},
635
+ {"txn_id":210,"txn_date":"2024-03-25","amount":199.99,"category":"general","jurisdiction_id":4}
636
+ ]}
637
+ ]
638
+ },
639
+ "questions":[
640
+ {"id":"TX_Q01","category":"SELECT *","difficulty":1,
641
+ "prompt_md":"Show all records from `jurisdictions`.",
642
+ "answer_sql":["SELECT * FROM jurisdictions;"]},
643
+ {"id":"TX_Q02","category":"SELECT columns","difficulty":1,
644
+ "prompt_md":"Show `jurisdiction_id`, `rate` from `tax_rates`.",
645
+ "answer_sql":["SELECT jurisdiction_id, rate FROM tax_rates;"]},
646
+ {"id":"TX_Q03","category":"WHERE","difficulty":1,
647
+ "prompt_md":"Transactions over $150 (return `txn_id`, `amount`).",
648
+ "answer_sql":["SELECT txn_id, amount FROM transactions WHERE amount > 150;"]},
649
+ {"id":"TX_Q04","category":"Aliases","difficulty":1,
650
+ "prompt_md":"Join `transactions` (`t`) with `jurisdictions` (`j`), returning `t.txn_id`, `j.name`.",
651
+ "answer_sql":["SELECT t.txn_id, j.name FROM transactions t JOIN jurisdictions j ON t.jurisdiction_id=j.jurisdiction_id;"],
652
+ "requires_aliases":True,"required_aliases":["t","j"]},
653
+ {"id":"TX_Q05","category":"JOIN (INNER)","difficulty":2,
654
+ "prompt_md":"Compute tax for each transaction: return `txn_id`, `amount*rate` as `tax`.",
655
+ "answer_sql":["SELECT t.txn_id, t.amount * r.rate AS tax FROM transactions t INNER JOIN tax_rates r ON t.jurisdiction_id=r.jurisdiction_id AND t.category=r.category;"]},
656
+ {"id":"TX_Q06","category":"JOIN (LEFT)","difficulty":2,
657
+ "prompt_md":"Jurisdictions and count of transactions (include zero). Columns `name`, `txn_count`.",
658
+ "answer_sql":["SELECT j.name, COUNT(t.txn_id) AS txn_count FROM jurisdictions j LEFT JOIN transactions t ON j.jurisdiction_id=t.jurisdiction_id GROUP BY j.name;"]},
659
+ {"id":"TX_Q07","category":"VIEW","difficulty":2,
660
+ "prompt_md":"Create view `vw_high_txn` with transactions amount > 150.",
661
+ "answer_sql":["CREATE VIEW vw_high_txn AS SELECT * FROM transactions WHERE amount > 150;"]},
662
+ {"id":"TX_Q08","category":"CTAS / SELECT INTO","difficulty":2,
663
+ "prompt_md":"Create table `low_rate` with tax_rates where rate < 0.06.",
664
+ "answer_sql":[
665
+ "CREATE TABLE low_rate AS SELECT * FROM tax_rates WHERE rate < 0.06;",
666
+ "SELECT * INTO low_rate FROM tax_rates WHERE rate < 0.06;"
667
+ ]}
668
+ ]
669
+ }
670
+
671
+ # --- Oil & gas wells ---
672
+ FALLBACK_PACKS["oil and gas wells"] = {
673
+ "schema":{
674
+ "domain":"oil and gas wells",
675
+ "tables":[
676
+ {"name":"wells","pk":["well_id"],
677
+ "columns":[
678
+ {"name":"well_id","type":"INTEGER"},
679
+ {"name":"well_name","type":"TEXT"},
680
+ {"name":"location","type":"TEXT"},
681
+ {"name":"status","type":"TEXT"},
682
+ {"name":"depth","type":"INTEGER"}
683
+ ],
684
+ "fks":[],
685
+ "rows":[
686
+ {"well_id":1,"well_name":"Alpha-1","location":"TX-TRV","status":"producing","depth":12000},
687
+ {"well_id":2,"well_name":"Bravo-2","location":"TX-TRV","status":"shut-in","depth":10500},
688
+ {"well_id":3,"well_name":"Cedar-7","location":"OK-CAD","status":"producing","depth":9800},
689
+ {"well_id":4,"well_name":"Delta-3","location":"ND-WIL","status":"drilling","depth":7000},
690
+ {"well_id":5,"well_name":"Eagle-5","location":"CO-DNV","status":"producing","depth":8500},
691
+ {"well_id":6,"well_name":"Fox-9","location":"NM-LEA","status":"producing","depth":11000}
692
+ ]},
693
+ {"name":"operators","pk":["operator_id"],
694
+ "columns":[
695
+ {"name":"operator_id","type":"INTEGER"},
696
+ {"name":"name","type":"TEXT"},
697
+ {"name":"contact","type":"TEXT"}
698
+ ],
699
+ "fks":[],
700
+ "rows":[
701
+ {"operator_id":10,"name":"PetroMax","contact":"pmx@example.com"},
702
+ {"operator_id":11,"name":"BlueRock Energy","contact":"blue@example.com"},
703
+ {"operator_id":12,"name":"HighPlains LLC","contact":"hp@example.com"},
704
+ {"operator_id":13,"name":"Mesa Oil","contact":"mesa@example.com"}
705
+ ]},
706
+ {"name":"well_operators","pk":["well_id","operator_id"],
707
+ "columns":[
708
+ {"name":"well_id","type":"INTEGER"},
709
+ {"name":"operator_id","type":"INTEGER"},
710
+ {"name":"start_date","type":"TEXT"}
711
+ ],
712
+ "fks":[
713
+ {"columns":["well_id"],"ref_table":"wells","ref_columns":["well_id"]},
714
+ {"columns":["operator_id"],"ref_table":"operators","ref_columns":["operator_id"]}
715
+ ],
716
+ "rows":[
717
+ {"well_id":1,"operator_id":10,"start_date":"2023-01-01"},
718
+ {"well_id":2,"operator_id":10,"start_date":"2023-06-01"},
719
+ {"well_id":3,"operator_id":11,"start_date":"2022-03-15"},
720
+ {"well_id":4,"operator_id":12,"start_date":"2024-02-01"},
721
+ {"well_id":5,"operator_id":13,"start_date":"2022-10-10"},
722
+ {"well_id":6,"operator_id":11,"start_date":"2021-08-05"}
723
+ ]},
724
+ {"name":"production","pk":["prod_id"],
725
+ "columns":[
726
+ {"name":"prod_id","type":"INTEGER"},
727
+ {"name":"well_id","type":"INTEGER"},
728
+ {"name":"month","type":"TEXT"},
729
+ {"name":"oil_bbl","type":"REAL"},
730
+ {"name":"gas_mcf","type":"REAL"}
731
+ ],
732
+ "fks":[{"columns":["well_id"],"ref_table":"wells","ref_columns":["well_id"]}],
733
+ "rows":[
734
+ {"prod_id":1001,"well_id":1,"month":"2024-01","oil_bbl":1200,"gas_mcf":5000},
735
+ {"prod_id":1002,"well_id":1,"month":"2024-02","oil_bbl":1180,"gas_mcf":5100},
736
+ {"prod_id":1003,"well_id":3,"month":"2024-01","oil_bbl":900,"gas_mcf":3000},
737
+ {"prod_id":1004,"well_id":3,"month":"2024-02","oil_bbl":950,"gas_mcf":3100},
738
+ {"prod_id":1005,"well_id":5,"month":"2024-01","oil_bbl":600,"gas_mcf":2200},
739
+ {"prod_id":1006,"well_id":6,"month":"2024-01","oil_bbl":750,"gas_mcf":2600},
740
+ {"prod_id":1007,"well_id":2,"month":"2024-01","oil_bbl":0,"gas_mcf":0},
741
+ {"prod_id":1008,"well_id":4,"month":"2024-02","oil_bbl":100,"gas_mcf":400}
742
+ ]}
743
+ ]
744
+ },
745
+ "questions":[
746
+ {"id":"OG_Q01","category":"SELECT *","difficulty":1,
747
+ "prompt_md":"List all rows from `wells`.",
748
+ "answer_sql":["SELECT * FROM wells;"]},
749
+ {"id":"OG_Q02","category":"SELECT columns","difficulty":1,
750
+ "prompt_md":"Return `well_name`, `status` from `wells`.",
751
+ "answer_sql":["SELECT well_name, status FROM wells;"]},
752
+ {"id":"OG_Q03","category":"WHERE","difficulty":1,
753
+ "prompt_md":"Wells deeper than 10,000 ft (return `well_name`, `depth`).",
754
+ "answer_sql":["SELECT well_name, depth FROM wells WHERE depth > 10000;"]},
755
+ {"id":"OG_Q04","category":"Aliases","difficulty":1,
756
+ "prompt_md":"Using `w` and `o`, show `w.well_name` with `o.name` (operator).",
757
+ "answer_sql":["SELECT w.well_name, o.name FROM wells w JOIN well_operators wo ON w.well_id=wo.well_id JOIN operators o ON wo.operator_id=o.operator_id;"],
758
+ "requires_aliases":True,"required_aliases":["w","o"]},
759
+ {"id":"OG_Q05","category":"JOIN (INNER)","difficulty":2,
760
+ "prompt_md":"Join `production` to `wells` and return `well_name`, total `oil_bbl` per well.",
761
+ "answer_sql":["SELECT w.well_name, SUM(p.oil_bbl) AS total_oil FROM production p INNER JOIN wells w ON p.well_id=w.well_id GROUP BY w.well_name;"]},
762
+ {"id":"OG_Q06","category":"JOIN (LEFT)","difficulty":2,
763
+ "prompt_md":"List operators and count of wells (include operators with zero). Columns `name`, `well_count`.",
764
+ "answer_sql":["SELECT o.name, COUNT(wo.well_id) AS well_count FROM operators o LEFT JOIN well_operators wo ON o.operator_id=wo.operator_id GROUP BY o.name;"]},
765
+ {"id":"OG_Q07","category":"VIEW","difficulty":2,
766
+ "prompt_md":"Create view `vw_prod_jan` for January 2024 production.",
767
+ "answer_sql":["CREATE VIEW vw_prod_jan AS SELECT * FROM production WHERE month='2024-01';"]},
768
+ {"id":"OG_Q08","category":"CTAS / SELECT INTO","difficulty":2,
769
+ "prompt_md":"Create table `active_wells` for wells with status='producing'.",
770
+ "answer_sql":[
771
+ "CREATE TABLE active_wells AS SELECT * FROM wells WHERE status='producing';",
772
+ "SELECT * INTO active_wells FROM wells WHERE status='producing';"
773
+ ]}
774
+ ]
775
+ }
776
 
777
+ # --- Marketing ---
778
+ FALLBACK_PACKS["marketing"] = {
779
+ "schema":{
780
+ "domain":"marketing",
781
+ "tables":[
782
+ {"name":"channels","pk":["channel_id"],
783
+ "columns":[
784
+ {"name":"channel_id","type":"INTEGER"},
785
+ {"name":"channel_name","type":"TEXT"}
786
+ ],
787
+ "fks":[],
788
+ "rows":[
789
+ {"channel_id":1,"channel_name":"Search"},
790
+ {"channel_id":2,"channel_name":"Social"},
791
+ {"channel_id":3,"channel_name":"Email"},
792
+ {"channel_id":4,"channel_name":"Display"}
793
+ ]},
794
+ {"name":"campaigns","pk":["campaign_id"],
795
+ "columns":[
796
+ {"name":"campaign_id","type":"INTEGER"},
797
+ {"name":"campaign_name","type":"TEXT"},
798
+ {"name":"channel_id","type":"INTEGER"},
799
+ {"name":"start_date","type":"TEXT"},
800
+ {"name":"budget","type":"REAL"}
801
+ ],
802
+ "fks":[{"columns":["channel_id"],"ref_table":"channels","ref_columns":["channel_id"]}],
803
+ "rows":[
804
+ {"campaign_id":101,"campaign_name":"Spring Search","channel_id":1,"start_date":"2024-03-01","budget":5000},
805
+ {"campaign_id":102,"campaign_name":"Brand Social","channel_id":2,"start_date":"2024-03-05","budget":3000},
806
+ {"campaign_id":103,"campaign_name":"Welcome Email","channel_id":3,"start_date":"2024-03-07","budget":1000},
807
+ {"campaign_id":104,"campaign_name":"Retargeting","channel_id":4,"start_date":"2024-03-10","budget":2000},
808
+ {"campaign_id":105,"campaign_name":"Summer Search","channel_id":1,"start_date":"2024-06-01","budget":6000},
809
+ {"campaign_id":106,"campaign_name":"Promo Social","channel_id":2,"start_date":"2024-06-05","budget":3500}
810
+ ]},
811
+ {"name":"ad_stats","pk":["campaign_id","day"],
812
+ "columns":[
813
+ {"name":"campaign_id","type":"INTEGER"},
814
+ {"name":"day","type":"TEXT"},
815
+ {"name":"impressions","type":"INTEGER"},
816
+ {"name":"clicks","type":"INTEGER"},
817
+ {"name":"spend","type":"REAL"}
818
+ ],
819
+ "fks":[{"columns":["campaign_id"],"ref_table":"campaigns","ref_columns":["campaign_id"]}],
820
+ "rows":[
821
+ {"campaign_id":101,"day":"2024-03-12","impressions":10000,"clicks":500,"spend":200.0},
822
+ {"campaign_id":101,"day":"2024-03-13","impressions":12000,"clicks":600,"spend":230.0},
823
+ {"campaign_id":102,"day":"2024-03-12","impressions":8000,"clicks":400,"spend":150.0},
824
+ {"campaign_id":103,"day":"2024-03-12","impressions":5000,"clicks":250,"spend":80.0},
825
+ {"campaign_id":104,"day":"2024-03-12","impressions":7000,"clicks":210,"spend":110.0},
826
+ {"campaign_id":106,"day":"2024-06-12","impressions":9500,"clicks":520,"spend":190.0}
827
+ ]},
828
+ {"name":"leads","pk":["lead_id"],
829
+ "columns":[
830
+ {"name":"lead_id","type":"INTEGER"},
831
+ {"name":"campaign_id","type":"INTEGER"},
832
+ {"name":"source","type":"TEXT"},
833
+ {"name":"qualified","type":"INTEGER"},
834
+ {"name":"revenue","type":"REAL"}
835
+ ],
836
+ "fks":[{"columns":["campaign_id"],"ref_table":"campaigns","ref_columns":["campaign_id"]}],
837
+ "rows":[
838
+ {"lead_id":1,"campaign_id":101,"source":"LP1","qualified":1,"revenue":400},
839
+ {"lead_id":2,"campaign_id":101,"source":"LP2","qualified":0,"revenue":0},
840
+ {"lead_id":3,"campaign_id":102,"source":"FB","qualified":1,"revenue":250},
841
+ {"lead_id":4,"campaign_id":103,"source":"Email","qualified":1,"revenue":300},
842
+ {"lead_id":5,"campaign_id":104,"source":"DSP","qualified":0,"revenue":0},
843
+ {"lead_id":6,"campaign_id":106,"source":"FB","qualified":1,"revenue":500}
844
+ ]}
845
+ ]
846
+ },
847
+ "questions":[
848
+ {"id":"M_Q01","category":"SELECT *","difficulty":1,
849
+ "prompt_md":"Show all channels.",
850
+ "answer_sql":["SELECT * FROM channels;"]},
851
+ {"id":"M_Q02","category":"SELECT columns","difficulty":1,
852
+ "prompt_md":"Return `campaign_name`, `budget` from `campaigns`.",
853
+ "answer_sql":["SELECT campaign_name, budget FROM campaigns;"]},
854
+ {"id":"M_Q03","category":"WHERE","difficulty":1,
855
+ "prompt_md":"Campaigns with budget ≥ 3000 (show `campaign_name`, `budget`).",
856
+ "answer_sql":["SELECT campaign_name, budget FROM campaigns WHERE budget >= 3000;"]},
857
+ {"id":"M_Q04","category":"Aliases","difficulty":1,
858
+ "prompt_md":"Join `campaigns` (`c`) with `channels` (`ch`) and show `c.campaign_name`, `ch.channel_name`.",
859
+ "answer_sql":["SELECT c.campaign_name, ch.channel_name FROM campaigns c JOIN channels ch ON c.channel_id=ch.channel_id;"],
860
+ "requires_aliases":True,"required_aliases":["c","ch"]},
861
+ {"id":"M_Q05","category":"JOIN (INNER)","difficulty":2,
862
+ "prompt_md":"Join `ad_stats` with `campaigns` and return `campaign_name`, total `clicks`.",
863
+ "answer_sql":["SELECT c.campaign_name, SUM(s.clicks) AS total_clicks FROM ad_stats s INNER JOIN campaigns c ON s.campaign_id=c.campaign_id GROUP BY c.campaign_name;"]},
864
+ {"id":"M_Q06","category":"JOIN (LEFT)","difficulty":2,
865
+ "prompt_md":"Channels and number of campaigns (include channels with zero). Columns `channel_name`, `campaigns`.",
866
+ "answer_sql":["SELECT ch.channel_name, COUNT(c.campaign_id) AS campaigns FROM channels ch LEFT JOIN campaigns c ON ch.channel_id=c.channel_id GROUP BY ch.channel_name;"]},
867
+ {"id":"M_Q07","category":"VIEW","difficulty":2,
868
+ "prompt_md":"Create view `vw_cost_per_click` with `campaign_id`, `day`, `spend/clicks` as `cpc` (avoid divide-by-zero).",
869
+ "answer_sql":["CREATE VIEW vw_cost_per_click AS SELECT campaign_id, day, CASE WHEN clicks=0 THEN NULL ELSE spend*1.0/clicks END AS cpc FROM ad_stats;"]},
870
+ {"id":"M_Q08","category":"CTAS / SELECT INTO","difficulty":2,
871
+ "prompt_md":"Create table `qualified_leads` of leads where `qualified=1`.",
872
+ "answer_sql":[
873
+ "CREATE TABLE qualified_leads AS SELECT * FROM leads WHERE qualified=1;",
874
+ "SELECT * INTO qualified_leads FROM leads WHERE qualified=1;"
875
+ ]}
876
+ ]
877
+ }
878
+
879
+ # Helpers to get a pack by domain (normalize key)
880
+ def get_fallback_pack_for(domain_name: str) -> Tuple[Dict[str,Any], List[Dict[str,Any]]]:
881
+ key = (domain_name or "").strip().lower()
882
+ for k, pack in FALLBACK_PACKS.items():
883
+ if k.lower() == key:
884
+ return pack["schema"], pack["questions"]
885
+ # default to bookstore
886
+ p = FALLBACK_PACKS["bookstore"]
887
+ return p["schema"], p["questions"]
888
+
889
+ # -------------------- OpenAI prompts + parsing helpers --------------------
890
+ ALLOWED_DOMAINS = ["bookstore", "retail sales", "wholesaler", "sales tax", "oil and gas wells", "marketing"]
891
+
892
+ DOMAIN_CYCLE_POS = 0 # will be set after first install
893
+
894
+ def _domain_prompt(prev_domain: Optional[str], preferred_domain: Optional[str]) -> str:
895
+ target = preferred_domain or "bookstore"
896
+ extra = f" (previous domain was '{prev_domain}', do not reuse it)" if prev_domain else ""
897
  return f"""
898
  Return ONLY a valid JSON object (no markdown, no prose).
899
+ You MUST set the top-level property "domain" to EXACTLY "{target}" (string match).{extra}
900
 
901
+ The JSON must have:
902
+ - "domain": "{target}"
903
+ - "tables": 3–4 table objects
904
+ - "questions": 8–12 question objects
 
 
 
 
 
905
 
906
+ Tables:
907
+ - SQLite-friendly. Use snake_case.
908
+ - Each table: name, pk (list), columns (list of {{name,type}}), fks (list of {{columns,ref_table,ref_columns}}), rows (8–15 small seed rows).
909
+
910
+ Questions:
911
+ - Categories among: "SELECT *", "SELECT columns", "WHERE", "Aliases",
912
+ "JOIN (INNER)", "JOIN (LEFT)", "Aggregation", "VIEW", "CTAS / SELECT INTO".
913
+ - Include at least one LEFT JOIN, one VIEW, one CTAS or SELECT INTO.
914
+ - Provide 1–3 'answer_sql' strings per question.
915
+ - Prefer SQLite-compatible SQL. Do NOT use RIGHT/FULL OUTER JOIN.
916
  """
917
 
918
  def _loose_json_parse(s: str) -> Optional[dict]:
 
1022
  })
1023
  return out
1024
 
1025
+ def llm_generate_domain_and_questions(prev_domain: Optional[str], preferred_domain: Optional[str]):
1026
  if not OPENAI_AVAILABLE or not os.getenv("OPENAI_API_KEY"):
1027
  return None, "OpenAI client not available or OPENAI_API_KEY missing.", None, {"accepted_questions":0,"dropped_questions":0}
1028
  errors = []
1029
+ prompt = _domain_prompt(prev_domain, preferred_domain)
1030
  for model in _candidate_models():
1031
  try:
1032
  try:
 
1048
  obj_raw = _loose_json_parse(data_text or "")
1049
  if not obj_raw:
1050
  raise RuntimeError("Could not parse JSON from model output.")
1051
+ for k in ["domain","tables","questions"]:
1052
  if k not in obj_raw:
1053
  raise RuntimeError(f"Missing key '{k}'")
1054
+ # Force domain to preferred
1055
+ if preferred_domain and (str(obj_raw.get("domain","")).strip().lower() != preferred_domain.strip().lower()):
1056
+ raise RuntimeError(f"Model returned domain '{obj_raw.get('domain')}', expected '{preferred_domain}'.")
1057
  tables = _canon_tables(obj_raw.get("tables", []))
1058
  if not tables: raise RuntimeError("No usable tables in LLM output.")
1059
  obj_raw["tables"] = tables
 
1119
  (schema.get("domain","unknown"), json.dumps(schema)))
1120
  con.commit()
1121
 
1122
+ def bootstrap_domain_with_llm_or_fallback(prev_domain: Optional[str], preferred_domain: str):
1123
+ obj, err, model_used, stats = llm_generate_domain_and_questions(prev_domain, preferred_domain)
1124
  if obj is None:
1125
+ # Use domain-specific fallback pack
1126
+ schema, questions = get_fallback_pack_for(preferred_domain)
1127
+ info = {"source":"fallback","model":None,"error":err,"accepted":0,"dropped":0}
1128
+ return schema, questions, info
1129
  return obj, obj["questions"], {"source":"openai","model":model_used,"error":None,"accepted":stats["accepted_questions"],"dropped":stats["dropped_questions"]}
1130
 
1131
+ def install_schema_and_prepare_questions(prev_domain: Optional[str], preferred_domain: str):
1132
+ schema, questions, info = bootstrap_domain_with_llm_or_fallback(prev_domain, preferred_domain)
1133
  install_schema(CONN, schema)
1134
  if not questions:
1135
+ # fallback to domain pack (still preferred)
1136
+ schema2, questions2 = get_fallback_pack_for(preferred_domain)
1137
+ install_schema(CONN, schema2)
1138
+ schema, questions, info = schema2, questions2, {"source":"fallback","model":None,"error":"No usable questions from LLM","accepted":0,"dropped":0}
1139
  return schema, questions, info
1140
 
1141
+ # -------------------- Domain cycling --------------------
1142
+ def _norm(s: str) -> str:
1143
+ return (s or "").strip().lower()
1144
+
1145
+ def _index_of_domain(name: str) -> int:
1146
+ low = _norm(name)
1147
+ for i, d in enumerate(ALLOWED_DOMAINS):
1148
+ if _norm(d) == low:
1149
+ return i
1150
+ return 0
1151
+
1152
+ def choose_next_domain(prev_domain: Optional[str]) -> str:
1153
+ global DOMAIN_CYCLE_POS
1154
+ if prev_domain is None:
1155
+ DOMAIN_CYCLE_POS = 0
1156
+ return ALLOWED_DOMAINS[DOMAIN_CYCLE_POS]
1157
+ idx = _index_of_domain(prev_domain)
1158
+ DOMAIN_CYCLE_POS = (idx + 1) % len(ALLOWED_DOMAINS)
1159
+ return ALLOWED_DOMAINS[DOMAIN_CYCLE_POS]
1160
+
1161
+ # -------------------- Initialize first domain --------------------
1162
+ # Start with bookstore for determinism on first boot
1163
+ CURRENT_SCHEMA, CURRENT_QS, CURRENT_INFO = install_schema_and_prepare_questions(prev_domain=None, preferred_domain="bookstore")
1164
+ DOMAIN_CYCLE_POS = _index_of_domain(CURRENT_SCHEMA.get("domain","bookstore"))
1165
 
1166
  # -------------------- Progress + mastery --------------------
1167
  def upsert_user(con: sqlite3.Connection, user_id: str, name: str):
 
1195
  return pd.read_sql_query("SELECT * FROM attempts WHERE user_id=? ORDER BY id DESC", con, params=(user_id,))
1196
 
1197
  def pick_next_question(user_id: str) -> Dict[str,Any]:
1198
+ pool = CURRENT_QS if CURRENT_QS else get_fallback_pack_for(CURRENT_SCHEMA.get("domain","bookstore"))[1]
1199
  df = fetch_attempts(CONN, user_id)
1200
  stats = topic_stats(df)
1201
  stats = stats.sort_values(by=["accuracy","attempts"], ascending=[True, True]) if not stats.empty else stats
 
1488
  }.get(cat, "Identify keys from the schema and join on them.")
1489
  return gr.update(value=f"**Hint:** {hint}", visible=True)
1490
 
1491
+ def export_progress(user_name: str):
1492
+ slug = "-".join((user_name or "").lower().split())
1493
+ if not slug: return None
1494
+ user_id = slug[:64]
1495
+ with DB_LOCK:
1496
+ df = pd.read_sql_query("SELECT * FROM attempts WHERE user_id=? ORDER BY id DESC", CONN, params=(user_id,))
1497
+ os.makedirs(EXPORT_DIR, exist_ok=True)
1498
+ path = os.path.abspath(os.path.join(EXPORT_DIR, f"{user_id}_progress.csv"))
1499
+ (pd.DataFrame([{"info":"No attempts yet."}]) if df.empty else df).to_csv(path, index=False)
1500
+ return path
1501
+
1502
  def _domain_status_md():
1503
+ if CURRENT_INFO.get("source","openai") == "openai":
1504
  accepted = CURRENT_INFO.get("accepted",0); dropped = CURRENT_INFO.get("dropped",0)
1505
  return (f"✅ **Domain via OpenAI** `{CURRENT_INFO.get('model','?')}` → **{CURRENT_SCHEMA.get('domain','?')}**. "
1506
  f"Accepted questions: {accepted}, dropped: {dropped}. \n"
 
1518
  """)
1519
  return df["name"].tolist() if not df.empty else ["(no tables)"]
1520
 
1521
+ # Always reseed a question on randomize (creates a guest session if needed)
1522
  def regenerate_domain(session: dict):
1523
  global CURRENT_SCHEMA, CURRENT_QS, CURRENT_INFO
1524
  prev = CURRENT_SCHEMA.get("domain") if CURRENT_SCHEMA else None
1525
+ preferred = choose_next_domain(prev)
1526
+ CURRENT_SCHEMA, CURRENT_QS, CURRENT_INFO = install_schema_and_prepare_questions(prev_domain=prev, preferred_domain=preferred)
1527
  erd = draw_dynamic_erd(CURRENT_SCHEMA)
1528
  status = _domain_status_md()
1529
 
 
1537
  q = pick_next_question(session["user_id"])
1538
  session.update({"qid": q["id"], "q": q, "start_ts": time.time()})
1539
 
 
1540
  stats = topic_stats(fetch_attempts(CONN, session["user_id"]))
1541
  empty_df = pd.DataFrame()
 
 
1542
  dd_update = gr.update(choices=list_tables_for_preview(), value=None)
1543
 
1544
  return (
 
1643
  outputs=[feedback_md],
1644
  )
1645
  export_btn.click(
1646
+ export_progress,
1647
  inputs=[export_name],
1648
  outputs=[export_file],
1649
  )
1650
+ regen_btn.click(
1651
  regenerate_domain,
1652
  inputs=[session_state],
1653
  outputs=[regen_fb, er_image, prompt_md, sql_input, tbl_dd, mastery_df, result_df, session_state],