Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,6 +4,7 @@
|
|
| 4 |
# - Students practice SELECT, WHERE, JOINs (INNER/LEFT), aliases, views, CTAS/SELECT INTO.
|
| 5 |
# - Validator enforces columns only when the prompt asks; otherwise focuses on rows.
|
| 6 |
# - ERD shows all FK edges in light gray and dynamically HIGHLIGHTS edges implied by JOINs.
|
|
|
|
| 7 |
|
| 8 |
import os
|
| 9 |
import re
|
|
@@ -232,140 +233,686 @@ def init_progress_tables(con: sqlite3.Connection):
|
|
| 232 |
|
| 233 |
init_progress_tables(CONN)
|
| 234 |
|
| 235 |
-
# --------------------
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
]
|
| 308 |
}
|
| 309 |
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 347 |
|
| 348 |
-
#
|
| 349 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 350 |
|
| 351 |
-
|
| 352 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 353 |
return f"""
|
| 354 |
Return ONLY a valid JSON object (no markdown, no prose).
|
| 355 |
-
|
| 356 |
|
| 357 |
-
|
| 358 |
-
-
|
| 359 |
-
-
|
| 360 |
-
|
| 361 |
-
- Questions: categories among "SELECT *", "SELECT columns", "WHERE", "Aliases",
|
| 362 |
-
"JOIN (INNER)", "JOIN (LEFT)", "Aggregation", "VIEW", "CTAS / SELECT INTO".
|
| 363 |
-
Include at least one LEFT JOIN, one VIEW creation, one CTAS or SELECT INTO.
|
| 364 |
-
Provide 1–3 'answer_sql' strings per question. Prefer SQLite-compatible SQL. Do NOT use RIGHT/FULL OUTER JOIN.
|
| 365 |
-
For 1–2 questions, set requires_aliases=true and list required_aliases.
|
| 366 |
|
| 367 |
-
|
| 368 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 369 |
"""
|
| 370 |
|
| 371 |
def _loose_json_parse(s: str) -> Optional[dict]:
|
|
@@ -475,11 +1022,11 @@ def _canon_tables(tables: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
| 475 |
})
|
| 476 |
return out
|
| 477 |
|
| 478 |
-
def llm_generate_domain_and_questions(prev_domain: Optional[str]):
|
| 479 |
if not OPENAI_AVAILABLE or not os.getenv("OPENAI_API_KEY"):
|
| 480 |
return None, "OpenAI client not available or OPENAI_API_KEY missing.", None, {"accepted_questions":0,"dropped_questions":0}
|
| 481 |
errors = []
|
| 482 |
-
prompt = _domain_prompt(prev_domain)
|
| 483 |
for model in _candidate_models():
|
| 484 |
try:
|
| 485 |
try:
|
|
@@ -501,9 +1048,12 @@ def llm_generate_domain_and_questions(prev_domain: Optional[str]):
|
|
| 501 |
obj_raw = _loose_json_parse(data_text or "")
|
| 502 |
if not obj_raw:
|
| 503 |
raise RuntimeError("Could not parse JSON from model output.")
|
| 504 |
-
for k in
|
| 505 |
if k not in obj_raw:
|
| 506 |
raise RuntimeError(f"Missing key '{k}'")
|
|
|
|
|
|
|
|
|
|
| 507 |
tables = _canon_tables(obj_raw.get("tables", []))
|
| 508 |
if not tables: raise RuntimeError("No usable tables in LLM output.")
|
| 509 |
obj_raw["tables"] = tables
|
|
@@ -569,21 +1119,49 @@ def install_schema(con: sqlite3.Connection, schema: Dict[str,Any]):
|
|
| 569 |
(schema.get("domain","unknown"), json.dumps(schema)))
|
| 570 |
con.commit()
|
| 571 |
|
| 572 |
-
def bootstrap_domain_with_llm_or_fallback(prev_domain: Optional[str]):
|
| 573 |
-
obj, err, model_used, stats = llm_generate_domain_and_questions(prev_domain)
|
| 574 |
if obj is None:
|
| 575 |
-
|
|
|
|
|
|
|
|
|
|
| 576 |
return obj, obj["questions"], {"source":"openai","model":model_used,"error":None,"accepted":stats["accepted_questions"],"dropped":stats["dropped_questions"]}
|
| 577 |
|
| 578 |
-
def install_schema_and_prepare_questions(prev_domain: Optional[str]):
|
| 579 |
-
schema, questions, info = bootstrap_domain_with_llm_or_fallback(prev_domain)
|
| 580 |
install_schema(CONN, schema)
|
| 581 |
if not questions:
|
| 582 |
-
|
|
|
|
|
|
|
|
|
|
| 583 |
return schema, questions, info
|
| 584 |
|
| 585 |
-
# --------------------
|
| 586 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 587 |
|
| 588 |
# -------------------- Progress + mastery --------------------
|
| 589 |
def upsert_user(con: sqlite3.Connection, user_id: str, name: str):
|
|
@@ -617,7 +1195,7 @@ def fetch_attempts(con: sqlite3.Connection, user_id: str) -> pd.DataFrame:
|
|
| 617 |
return pd.read_sql_query("SELECT * FROM attempts WHERE user_id=? ORDER BY id DESC", con, params=(user_id,))
|
| 618 |
|
| 619 |
def pick_next_question(user_id: str) -> Dict[str,Any]:
|
| 620 |
-
pool = CURRENT_QS if CURRENT_QS else
|
| 621 |
df = fetch_attempts(CONN, user_id)
|
| 622 |
stats = topic_stats(df)
|
| 623 |
stats = stats.sort_values(by=["accuracy","attempts"], ascending=[True, True]) if not stats.empty else stats
|
|
@@ -910,8 +1488,19 @@ def show_hint(session: dict):
|
|
| 910 |
}.get(cat, "Identify keys from the schema and join on them.")
|
| 911 |
return gr.update(value=f"**Hint:** {hint}", visible=True)
|
| 912 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 913 |
def _domain_status_md():
|
| 914 |
-
if CURRENT_INFO.get("source","openai"):
|
| 915 |
accepted = CURRENT_INFO.get("accepted",0); dropped = CURRENT_INFO.get("dropped",0)
|
| 916 |
return (f"✅ **Domain via OpenAI** `{CURRENT_INFO.get('model','?')}` → **{CURRENT_SCHEMA.get('domain','?')}**. "
|
| 917 |
f"Accepted questions: {accepted}, dropped: {dropped}. \n"
|
|
@@ -929,11 +1518,12 @@ def list_tables_for_preview():
|
|
| 929 |
""")
|
| 930 |
return df["name"].tolist() if not df.empty else ["(no tables)"]
|
| 931 |
|
| 932 |
-
#
|
| 933 |
def regenerate_domain(session: dict):
|
| 934 |
global CURRENT_SCHEMA, CURRENT_QS, CURRENT_INFO
|
| 935 |
prev = CURRENT_SCHEMA.get("domain") if CURRENT_SCHEMA else None
|
| 936 |
-
|
|
|
|
| 937 |
erd = draw_dynamic_erd(CURRENT_SCHEMA)
|
| 938 |
status = _domain_status_md()
|
| 939 |
|
|
@@ -947,11 +1537,8 @@ def regenerate_domain(session: dict):
|
|
| 947 |
q = pick_next_question(session["user_id"])
|
| 948 |
session.update({"qid": q["id"], "q": q, "start_ts": time.time()})
|
| 949 |
|
| 950 |
-
# Fresh mastery and cleared result preview
|
| 951 |
stats = topic_stats(fetch_attempts(CONN, session["user_id"]))
|
| 952 |
empty_df = pd.DataFrame()
|
| 953 |
-
|
| 954 |
-
# Refresh dropdown
|
| 955 |
dd_update = gr.update(choices=list_tables_for_preview(), value=None)
|
| 956 |
|
| 957 |
return (
|
|
@@ -1056,11 +1643,11 @@ with gr.Blocks(title="Adaptive SQL Trainer — Randomized Domains") as demo:
|
|
| 1056 |
outputs=[feedback_md],
|
| 1057 |
)
|
| 1058 |
export_btn.click(
|
| 1059 |
-
|
| 1060 |
inputs=[export_name],
|
| 1061 |
outputs=[export_file],
|
| 1062 |
)
|
| 1063 |
-
regen_btn.click(
|
| 1064 |
regenerate_domain,
|
| 1065 |
inputs=[session_state],
|
| 1066 |
outputs=[regen_fb, er_image, prompt_md, sql_input, tbl_dd, mastery_df, result_df, session_state],
|
|
|
|
| 4 |
# - Students practice SELECT, WHERE, JOINs (INNER/LEFT), aliases, views, CTAS/SELECT INTO.
|
| 5 |
# - Validator enforces columns only when the prompt asks; otherwise focuses on rows.
|
| 6 |
# - ERD shows all FK edges in light gray and dynamically HIGHLIGHTS edges implied by JOINs.
|
| 7 |
+
# - Domain picker now round-robins across: bookstore, retail sales, wholesaler, sales tax, oil & gas wells, marketing.
|
| 8 |
|
| 9 |
import os
|
| 10 |
import re
|
|
|
|
| 233 |
|
| 234 |
init_progress_tables(CONN)
|
| 235 |
|
| 236 |
+
# -------------------- Built-in fallback domain packs --------------------
|
| 237 |
+
# Each pack: {"schema": {...}, "questions": [...]}
|
| 238 |
+
FALLBACK_PACKS: Dict[str, Dict[str, Any]] = {}
|
| 239 |
+
|
| 240 |
+
# --- Bookstore (existing) ---
|
| 241 |
+
FALLBACK_PACKS["bookstore"] = {
|
| 242 |
+
"schema": {
|
| 243 |
+
"domain": "bookstore",
|
| 244 |
+
"tables": [
|
| 245 |
+
{
|
| 246 |
+
"name": "authors",
|
| 247 |
+
"pk": ["author_id"],
|
| 248 |
+
"columns": [
|
| 249 |
+
{"name":"author_id","type":"INTEGER"},
|
| 250 |
+
{"name":"name","type":"TEXT"},
|
| 251 |
+
{"name":"country","type":"TEXT"},
|
| 252 |
+
{"name":"birth_year","type":"INTEGER"},
|
| 253 |
+
],
|
| 254 |
+
"fks": [],
|
| 255 |
+
"rows": [
|
| 256 |
+
{"author_id":1,"name":"Isaac Asimov","country":"USA","birth_year":1920},
|
| 257 |
+
{"author_id":2,"name":"Ursula K. Le Guin","country":"USA","birth_year":1929},
|
| 258 |
+
{"author_id":3,"name":"Haruki Murakami","country":"Japan","birth_year":1949},
|
| 259 |
+
{"author_id":4,"name":"Chinua Achebe","country":"Nigeria","birth_year":1930},
|
| 260 |
+
{"author_id":5,"name":"Jane Austen","country":"UK","birth_year":1775},
|
| 261 |
+
{"author_id":6,"name":"J.K. Rowling","country":"UK","birth_year":1965},
|
| 262 |
+
{"author_id":7,"name":"Yuval Noah Harari","country":"Israel","birth_year":1976},
|
| 263 |
+
{"author_id":8,"name":"New Author","country":"Nowhere","birth_year":1990},
|
| 264 |
+
],
|
| 265 |
+
},
|
| 266 |
+
{
|
| 267 |
+
"name": "bookstores",
|
| 268 |
+
"pk": ["store_id"],
|
| 269 |
+
"columns": [
|
| 270 |
+
{"name":"store_id","type":"INTEGER"},
|
| 271 |
+
{"name":"name","type":"TEXT"},
|
| 272 |
+
{"name":"city","type":"TEXT"},
|
| 273 |
+
{"name":"state","type":"TEXT"},
|
| 274 |
+
],
|
| 275 |
+
"fks": [],
|
| 276 |
+
"rows": [
|
| 277 |
+
{"store_id":1,"name":"Downtown Books","city":"Oklahoma City","state":"OK"},
|
| 278 |
+
{"store_id":2,"name":"Harbor Books","city":"Seattle","state":"WA"},
|
| 279 |
+
{"store_id":3,"name":"Desert Pages","city":"Phoenix","state":"AZ"},
|
| 280 |
+
],
|
| 281 |
+
},
|
| 282 |
+
{
|
| 283 |
+
"name": "books",
|
| 284 |
+
"pk": ["book_id"],
|
| 285 |
+
"columns": [
|
| 286 |
+
{"name":"book_id","type":"INTEGER"},
|
| 287 |
+
{"name":"title","type":"TEXT"},
|
| 288 |
+
{"name":"author_id","type":"INTEGER"},
|
| 289 |
+
{"name":"store_id","type":"INTEGER"},
|
| 290 |
+
{"name":"category","type":"TEXT"},
|
| 291 |
+
{"name":"price","type":"REAL"},
|
| 292 |
+
{"name":"published_year","type":"INTEGER"},
|
| 293 |
+
],
|
| 294 |
+
"fks": [
|
| 295 |
+
{"columns":["author_id"],"ref_table":"authors","ref_columns":["author_id"]},
|
| 296 |
+
{"columns":["store_id"],"ref_table":"bookstores","ref_columns":["store_id"]},
|
| 297 |
+
],
|
| 298 |
+
"rows": [
|
| 299 |
+
{"book_id":101,"title":"Foundation","author_id":1,"store_id":1,"category":"Sci-Fi","price":14.99,"published_year":1951},
|
| 300 |
+
{"book_id":102,"title":"I, Robot","author_id":1,"store_id":1,"category":"Sci-Fi","price":12.50,"published_year":1950},
|
| 301 |
+
{"book_id":103,"title":"The Left Hand of Darkness","author_id":2,"store_id":2,"category":"Sci-Fi","price":16.00,"published_year":1969},
|
| 302 |
+
{"book_id":104,"title":"A Wizard of Earthsea","author_id":2,"store_id":2,"category":"Fantasy","price":11.50,"published_year":1968},
|
| 303 |
+
{"book_id":105,"title":"Norwegian Wood","author_id":3,"store_id":3,"category":"Fiction","price":18.00,"published_year":1987},
|
| 304 |
+
{"book_id":106,"title":"Kafka on the Shore","author_id":3,"store_id":1,"category":"Fiction","price":21.00,"published_year":2002},
|
| 305 |
+
{"book_id":107,"title":"Things Fall Apart","author_id":4,"store_id":1,"category":"Fiction","price":10.00,"published_year":1958},
|
| 306 |
+
{"book_id":108,"title":"Pride and Prejudice","author_id":5,"store_id":2,"category":"Fiction","price":9.00,"published_year":1813},
|
| 307 |
+
{"book_id":109,"title":"Harry Potter and the Sorcerer's Stone","author_id":6,"store_id":3,"category":"Children","price":22.00,"published_year":1997},
|
| 308 |
+
{"book_id":110,"title":"Harry Potter and the Chamber of Secrets","author_id":6,"store_id":3,"category":"Children","price":23.00,"published_year":1998},
|
| 309 |
+
{"book_id":111,"title":"Sapiens","author_id":7,"store_id":1,"category":"History","price":26.00,"published_year":2011},
|
| 310 |
+
{"book_id":112,"title":"Homo Deus","author_id":7,"store_id":2,"category":"History","price":28.00,"published_year":2015},
|
| 311 |
+
],
|
| 312 |
+
},
|
| 313 |
+
]
|
| 314 |
+
},
|
| 315 |
+
"questions": [
|
| 316 |
+
{"id":"B_Q01","category":"SELECT *","difficulty":1,
|
| 317 |
+
"prompt_md":"Select all rows and columns from `authors`.",
|
| 318 |
+
"answer_sql":["SELECT * FROM authors;"]},
|
| 319 |
+
{"id":"B_Q02","category":"SELECT columns","difficulty":1,
|
| 320 |
+
"prompt_md":"Show `title` and `price` from `books`.",
|
| 321 |
+
"answer_sql":["SELECT title, price FROM books;"]},
|
| 322 |
+
{"id":"B_Q03","category":"WHERE","difficulty":1,
|
| 323 |
+
"prompt_md":"List Sci‑Fi books under $15 (show title, price).",
|
| 324 |
+
"answer_sql":["SELECT title, price FROM books WHERE category='Sci-Fi' AND price < 15;"]},
|
| 325 |
+
{"id":"B_Q04","category":"Aliases","difficulty":1,
|
| 326 |
+
"prompt_md":"Using aliases `b` and `a`, join `books` to `authors` and show `b.title` and `a.name` as `author_name`.",
|
| 327 |
+
"answer_sql":["SELECT b.title, a.name AS author_name FROM books b JOIN authors a ON b.author_id=a.author_id;"],
|
| 328 |
+
"requires_aliases":True,"required_aliases":["a","b"]},
|
| 329 |
+
{"id":"B_Q05","category":"JOIN (INNER)","difficulty":2,
|
| 330 |
+
"prompt_md":"Inner join `books` and `bookstores`. Return `title`, `name` as `store`.",
|
| 331 |
+
"answer_sql":["SELECT b.title, s.name AS store FROM books b INNER JOIN bookstores s ON b.store_id=s.store_id;"]},
|
| 332 |
+
{"id":"B_Q06","category":"JOIN (LEFT)","difficulty":2,
|
| 333 |
+
"prompt_md":"List each author and their number of books (include authors with zero): columns `name`, `book_count`.",
|
| 334 |
+
"answer_sql":["SELECT a.name, COUNT(b.book_id) AS book_count FROM authors a LEFT JOIN books b ON a.author_id=b.author_id GROUP BY a.name;"]},
|
| 335 |
+
{"id":"B_Q07","category":"VIEW","difficulty":2,
|
| 336 |
+
"prompt_md":"Create a view `vw_pricy` with `title`, `price` for books priced > 25.",
|
| 337 |
+
"answer_sql":["CREATE VIEW vw_pricy AS SELECT title, price FROM books WHERE price > 25;"]},
|
| 338 |
+
{"id":"B_Q08","category":"CTAS / SELECT INTO","difficulty":2,
|
| 339 |
+
"prompt_md":"Create a table `cheap_books` containing books priced < 12. Use CTAS or SELECT INTO.",
|
| 340 |
+
"answer_sql":[
|
| 341 |
+
"CREATE TABLE cheap_books AS SELECT * FROM books WHERE price < 12;",
|
| 342 |
+
"SELECT * INTO cheap_books FROM books WHERE price < 12;"
|
| 343 |
+
]},
|
| 344 |
]
|
| 345 |
}
|
| 346 |
|
| 347 |
+
# --- Retail sales ---
|
| 348 |
+
FALLBACK_PACKS["retail sales"] = {
|
| 349 |
+
"schema": {
|
| 350 |
+
"domain": "retail sales",
|
| 351 |
+
"tables": [
|
| 352 |
+
{
|
| 353 |
+
"name":"customers","pk":["customer_id"],
|
| 354 |
+
"columns":[
|
| 355 |
+
{"name":"customer_id","type":"INTEGER"},
|
| 356 |
+
{"name":"name","type":"TEXT"},
|
| 357 |
+
{"name":"city","type":"TEXT"},
|
| 358 |
+
{"name":"state","type":"TEXT"}
|
| 359 |
+
],
|
| 360 |
+
"fks":[],
|
| 361 |
+
"rows":[
|
| 362 |
+
{"customer_id":1,"name":"Ava Reed","city":"Seattle","state":"WA"},
|
| 363 |
+
{"customer_id":2,"name":"Mason Ortiz","city":"Portland","state":"OR"},
|
| 364 |
+
{"customer_id":3,"name":"Noah Patel","city":"Phoenix","state":"AZ"},
|
| 365 |
+
{"customer_id":4,"name":"Emma Kim","city":"San Diego","state":"CA"},
|
| 366 |
+
{"customer_id":5,"name":"Olivia Park","city":"Dallas","state":"TX"},
|
| 367 |
+
{"customer_id":6,"name":"Liam Gray","city":"Denver","state":"CO"},
|
| 368 |
+
{"customer_id":7,"name":"Sophia Lee","city":"Boston","state":"MA"},
|
| 369 |
+
{"customer_id":8,"name":"Elijah Hall","city":"Miami","state":"FL"}
|
| 370 |
+
]
|
| 371 |
+
},
|
| 372 |
+
{
|
| 373 |
+
"name":"products","pk":["product_id"],
|
| 374 |
+
"columns":[
|
| 375 |
+
{"name":"product_id","type":"INTEGER"},
|
| 376 |
+
{"name":"product_name","type":"TEXT"},
|
| 377 |
+
{"name":"category","type":"TEXT"},
|
| 378 |
+
{"name":"price","type":"REAL"}
|
| 379 |
+
],
|
| 380 |
+
"fks":[],
|
| 381 |
+
"rows":[
|
| 382 |
+
{"product_id":101,"product_name":"Coffee Maker","category":"Home","price":49.99},
|
| 383 |
+
{"product_id":102,"product_name":"Electric Kettle","category":"Home","price":29.99},
|
| 384 |
+
{"product_id":103,"product_name":"Headphones","category":"Electronics","price":79.00},
|
| 385 |
+
{"product_id":104,"product_name":"USB-C Cable","category":"Electronics","price":9.99},
|
| 386 |
+
{"product_id":105,"product_name":"Notebook","category":"Stationery","price":3.49},
|
| 387 |
+
{"product_id":106,"product_name":"Desk Lamp","category":"Home","price":19.99},
|
| 388 |
+
{"product_id":107,"product_name":"T-Shirt","category":"Clothing","price":15.00},
|
| 389 |
+
{"product_id":108,"product_name":"Sneakers","category":"Clothing","price":65.00}
|
| 390 |
+
]
|
| 391 |
+
},
|
| 392 |
+
{
|
| 393 |
+
"name":"orders","pk":["order_id"],
|
| 394 |
+
"columns":[
|
| 395 |
+
{"name":"order_id","type":"INTEGER"},
|
| 396 |
+
{"name":"customer_id","type":"INTEGER"},
|
| 397 |
+
{"name":"order_date","type":"TEXT"}
|
| 398 |
+
],
|
| 399 |
+
"fks":[{"columns":["customer_id"],"ref_table":"customers","ref_columns":["customer_id"]}],
|
| 400 |
+
"rows":[
|
| 401 |
+
{"order_id":201,"customer_id":1,"order_date":"2024-01-05"},
|
| 402 |
+
{"order_id":202,"customer_id":2,"order_date":"2024-01-07"},
|
| 403 |
+
{"order_id":203,"customer_id":1,"order_date":"2024-01-12"},
|
| 404 |
+
{"order_id":204,"customer_id":3,"order_date":"2024-02-01"},
|
| 405 |
+
{"order_id":205,"customer_id":4,"order_date":"2024-02-10"},
|
| 406 |
+
{"order_id":206,"customer_id":5,"order_date":"2024-03-02"},
|
| 407 |
+
{"order_id":207,"customer_id":6,"order_date":"2024-03-03"},
|
| 408 |
+
{"order_id":208,"customer_id":7,"order_date":"2024-03-09"},
|
| 409 |
+
{"order_id":209,"customer_id":8,"order_date":"2024-03-15"},
|
| 410 |
+
{"order_id":210,"customer_id":3,"order_date":"2024-03-20"}
|
| 411 |
+
]
|
| 412 |
+
},
|
| 413 |
+
{
|
| 414 |
+
"name":"order_items","pk":["order_id","product_id"],
|
| 415 |
+
"columns":[
|
| 416 |
+
{"name":"order_id","type":"INTEGER"},
|
| 417 |
+
{"name":"product_id","type":"INTEGER"},
|
| 418 |
+
{"name":"qty","type":"INTEGER"},
|
| 419 |
+
{"name":"unit_price","type":"REAL"}
|
| 420 |
+
],
|
| 421 |
+
"fks":[
|
| 422 |
+
{"columns":["order_id"],"ref_table":"orders","ref_columns":["order_id"]},
|
| 423 |
+
{"columns":["product_id"],"ref_table":"products","ref_columns":["product_id"]}
|
| 424 |
+
],
|
| 425 |
+
"rows":[
|
| 426 |
+
{"order_id":201,"product_id":101,"qty":1,"unit_price":49.99},
|
| 427 |
+
{"order_id":201,"product_id":104,"qty":2,"unit_price":9.99},
|
| 428 |
+
{"order_id":202,"product_id":107,"qty":3,"unit_price":15.00},
|
| 429 |
+
{"order_id":203,"product_id":103,"qty":1,"unit_price":79.00},
|
| 430 |
+
{"order_id":203,"product_id":105,"qty":5,"unit_price":3.49},
|
| 431 |
+
{"order_id":204,"product_id":102,"qty":2,"unit_price":29.99},
|
| 432 |
+
{"order_id":205,"product_id":108,"qty":1,"unit_price":65.00},
|
| 433 |
+
{"order_id":206,"product_id":106,"qty":2,"unit_price":19.99},
|
| 434 |
+
{"order_id":207,"product_id":104,"qty":4,"unit_price":9.99},
|
| 435 |
+
{"order_id":208,"product_id":101,"qty":1,"unit_price":49.99},
|
| 436 |
+
{"order_id":209,"product_id":107,"qty":2,"unit_price":15.00},
|
| 437 |
+
{"order_id":210,"product_id":103,"qty":1,"unit_price":79.00}
|
| 438 |
+
]
|
| 439 |
+
}
|
| 440 |
+
]
|
| 441 |
+
},
|
| 442 |
+
"questions":[
|
| 443 |
+
{"id":"RS_Q01","category":"SELECT *","difficulty":1,
|
| 444 |
+
"prompt_md":"Show everything from `customers`.",
|
| 445 |
+
"answer_sql":["SELECT * FROM customers;"]},
|
| 446 |
+
{"id":"RS_Q02","category":"SELECT columns","difficulty":1,
|
| 447 |
+
"prompt_md":"List product name and price from `products`.",
|
| 448 |
+
"answer_sql":["SELECT product_name, price FROM products;"]},
|
| 449 |
+
{"id":"RS_Q03","category":"WHERE","difficulty":1,
|
| 450 |
+
"prompt_md":"Orders placed in March 2024 (return `order_id`, `order_date`).",
|
| 451 |
+
"answer_sql":["SELECT order_id, order_date FROM orders WHERE order_date BETWEEN '2024-03-01' AND '2024-03-31';"]},
|
| 452 |
+
{"id":"RS_Q04","category":"Aliases","difficulty":1,
|
| 453 |
+
"prompt_md":"Join `orders` (alias `o`) with `customers` (alias `c`) and show `o.order_id`, `c.name`.",
|
| 454 |
+
"answer_sql":["SELECT o.order_id, c.name FROM orders o JOIN customers c ON o.customer_id=c.customer_id;"],
|
| 455 |
+
"requires_aliases":True,"required_aliases":["o","c"]},
|
| 456 |
+
{"id":"RS_Q05","category":"JOIN (INNER)","difficulty":2,
|
| 457 |
+
"prompt_md":"Inner join `order_items` with `products` to show items where qty ≥ 3. Return `product_name`, `qty`.",
|
| 458 |
+
"answer_sql":["SELECT p.product_name, oi.qty FROM order_items oi INNER JOIN products p ON oi.product_id=p.product_id WHERE oi.qty >= 3;"]},
|
| 459 |
+
{"id":"RS_Q06","category":"JOIN (LEFT)","difficulty":2,
|
| 460 |
+
"prompt_md":"Customers and their number of orders (include zero). Columns: `name`, `order_count`.",
|
| 461 |
+
"answer_sql":["SELECT c.name, COUNT(o.order_id) AS order_count FROM customers c LEFT JOIN orders o ON c.customer_id=o.customer_id GROUP BY c.name;"]},
|
| 462 |
+
{"id":"RS_Q07","category":"VIEW","difficulty":2,
|
| 463 |
+
"prompt_md":"Create view `vw_top_qty` with total quantity by product: `product_id`, `total_qty`.",
|
| 464 |
+
"answer_sql":["CREATE VIEW vw_top_qty AS SELECT product_id, SUM(qty) AS total_qty FROM order_items GROUP BY product_id;"]},
|
| 465 |
+
{"id":"RS_Q08","category":"CTAS / SELECT INTO","difficulty":2,
|
| 466 |
+
"prompt_md":"Create table `cheap_products` with products priced < 10.",
|
| 467 |
+
"answer_sql":[
|
| 468 |
+
"CREATE TABLE cheap_products AS SELECT * FROM products WHERE price < 10;",
|
| 469 |
+
"SELECT * INTO cheap_products FROM products WHERE price < 10;"
|
| 470 |
+
]}
|
| 471 |
+
]
|
| 472 |
+
}
|
| 473 |
+
|
| 474 |
+
# --- Wholesaler ---
|
| 475 |
+
FALLBACK_PACKS["wholesaler"] = {
|
| 476 |
+
"schema":{
|
| 477 |
+
"domain":"wholesaler",
|
| 478 |
+
"tables":[
|
| 479 |
+
{"name":"suppliers","pk":["supplier_id"],
|
| 480 |
+
"columns":[
|
| 481 |
+
{"name":"supplier_id","type":"INTEGER"},
|
| 482 |
+
{"name":"supplier_name","type":"TEXT"},
|
| 483 |
+
{"name":"country","type":"TEXT"}
|
| 484 |
+
],
|
| 485 |
+
"fks":[],
|
| 486 |
+
"rows":[
|
| 487 |
+
{"supplier_id":1,"supplier_name":"Nordic Foods","country":"SE"},
|
| 488 |
+
{"supplier_id":2,"supplier_name":"Metro Trade","country":"DE"},
|
| 489 |
+
{"supplier_id":3,"supplier_name":"Pacific Imports","country":"US"},
|
| 490 |
+
{"supplier_id":4,"supplier_name":"Andes Supply","country":"CL"},
|
| 491 |
+
{"supplier_id":5,"supplier_name":"Sahara Wholesale","country":"MA"}
|
| 492 |
+
]},
|
| 493 |
+
{"name":"items","pk":["item_id"],
|
| 494 |
+
"columns":[
|
| 495 |
+
{"name":"item_id","type":"INTEGER"},
|
| 496 |
+
{"name":"item_name","type":"TEXT"},
|
| 497 |
+
{"name":"unit_cost","type":"REAL"}
|
| 498 |
+
],
|
| 499 |
+
"fks":[],
|
| 500 |
+
"rows":[
|
| 501 |
+
{"item_id":101,"item_name":"Olive Oil 1L","unit_cost":4.20},
|
| 502 |
+
{"item_id":102,"item_name":"Canned Tuna","unit_cost":1.10},
|
| 503 |
+
{"item_id":103,"item_name":"Basmati Rice 5kg","unit_cost":6.30},
|
| 504 |
+
{"item_id":104,"item_name":"Black Tea 200g","unit_cost":2.70},
|
| 505 |
+
{"item_id":105,"item_name":"Peanut Butter","unit_cost":3.00},
|
| 506 |
+
{"item_id":106,"item_name":"Tomato Paste","unit_cost":0.95},
|
| 507 |
+
{"item_id":107,"item_name":"Chickpeas 1kg","unit_cost":1.60},
|
| 508 |
+
{"item_id":108,"item_name":"Soy Sauce 500ml","unit_cost":2.10}
|
| 509 |
+
]},
|
| 510 |
+
{"name":"purchase_orders","pk":["po_id"],
|
| 511 |
+
"columns":[
|
| 512 |
+
{"name":"po_id","type":"INTEGER"},
|
| 513 |
+
{"name":"supplier_id","type":"INTEGER"},
|
| 514 |
+
{"name":"po_date","type":"TEXT"}
|
| 515 |
+
],
|
| 516 |
+
"fks":[{"columns":["supplier_id"],"ref_table":"suppliers","ref_columns":["supplier_id"]}],
|
| 517 |
+
"rows":[
|
| 518 |
+
{"po_id":201,"supplier_id":1,"po_date":"2024-01-10"},
|
| 519 |
+
{"po_id":202,"supplier_id":2,"po_date":"2024-01-18"},
|
| 520 |
+
{"po_id":203,"supplier_id":3,"po_date":"2024-02-05"},
|
| 521 |
+
{"po_id":204,"supplier_id":1,"po_date":"2024-02-22"},
|
| 522 |
+
{"po_id":205,"supplier_id":5,"po_date":"2024-03-01"},
|
| 523 |
+
{"po_id":206,"supplier_id":4,"po_date":"2024-03-07"}
|
| 524 |
+
]},
|
| 525 |
+
{"name":"po_lines","pk":["po_id","item_id"],
|
| 526 |
+
"columns":[
|
| 527 |
+
{"name":"po_id","type":"INTEGER"},
|
| 528 |
+
{"name":"item_id","type":"INTEGER"},
|
| 529 |
+
{"name":"qty","type":"INTEGER"},
|
| 530 |
+
{"name":"line_cost","type":"REAL"}
|
| 531 |
+
],
|
| 532 |
+
"fks":[
|
| 533 |
+
{"columns":["po_id"],"ref_table":"purchase_orders","ref_columns":["po_id"]},
|
| 534 |
+
{"columns":["item_id"],"ref_table":"items","ref_columns":["item_id"]}
|
| 535 |
+
],
|
| 536 |
+
"rows":[
|
| 537 |
+
{"po_id":201,"item_id":101,"qty":200,"line_cost":840.0},
|
| 538 |
+
{"po_id":201,"item_id":106,"qty":500,"line_cost":475.0},
|
| 539 |
+
{"po_id":202,"item_id":103,"qty":120,"line_cost":756.0},
|
| 540 |
+
{"po_id":203,"item_id":102,"qty":600,"line_cost":660.0},
|
| 541 |
+
{"po_id":203,"item_id":104,"qty":150,"line_cost":405.0},
|
| 542 |
+
{"po_id":204,"item_id":105,"qty":180,"line_cost":540.0},
|
| 543 |
+
{"po_id":205,"item_id":107,"qty":300,"line_cost":480.0},
|
| 544 |
+
{"po_id":206,"item_id":108,"qty":250,"line_cost":525.0}
|
| 545 |
+
]}
|
| 546 |
+
]
|
| 547 |
+
},
|
| 548 |
+
"questions":[
|
| 549 |
+
{"id":"W_Q01","category":"SELECT *","difficulty":1,
|
| 550 |
+
"prompt_md":"Show all suppliers.",
|
| 551 |
+
"answer_sql":["SELECT * FROM suppliers;"]},
|
| 552 |
+
{"id":"W_Q02","category":"SELECT columns","difficulty":1,
|
| 553 |
+
"prompt_md":"Return `item_name` and `unit_cost` from `items`.",
|
| 554 |
+
"answer_sql":["SELECT item_name, unit_cost FROM items;"]},
|
| 555 |
+
{"id":"W_Q03","category":"WHERE","difficulty":1,
|
| 556 |
+
"prompt_md":"Items costing more than 3.00 (show name, cost).",
|
| 557 |
+
"answer_sql":["SELECT item_name, unit_cost FROM items WHERE unit_cost > 3.00;"]},
|
| 558 |
+
{"id":"W_Q04","category":"Aliases","difficulty":1,
|
| 559 |
+
"prompt_md":"Using aliases `p` and `s`, show each `po_id` with `supplier_name`.",
|
| 560 |
+
"answer_sql":["SELECT p.po_id, s.supplier_name FROM purchase_orders p JOIN suppliers s ON p.supplier_id=s.supplier_id;"],
|
| 561 |
+
"requires_aliases":True,"required_aliases":["p","s"]},
|
| 562 |
+
{"id":"W_Q05","category":"JOIN (INNER)","difficulty":2,
|
| 563 |
+
"prompt_md":"Inner join `po_lines` and `items`; list `item_name`, total qty per item.",
|
| 564 |
+
"answer_sql":["SELECT i.item_name, SUM(l.qty) AS total_qty FROM po_lines l INNER JOIN items i ON l.item_id=i.item_id GROUP BY i.item_name;"]},
|
| 565 |
+
{"id":"W_Q06","category":"JOIN (LEFT)","difficulty":2,
|
| 566 |
+
"prompt_md":"Suppliers and count of POs (include those with zero). Columns `supplier_name`, `po_count`.",
|
| 567 |
+
"answer_sql":["SELECT s.supplier_name, COUNT(p.po_id) AS po_count FROM suppliers s LEFT JOIN purchase_orders p ON s.supplier_id=p.supplier_id GROUP BY s.supplier_name;"]},
|
| 568 |
+
{"id":"W_Q07","category":"VIEW","difficulty":2,
|
| 569 |
+
"prompt_md":"Create view `vw_po_value` with `po_id` and total `line_cost` per PO.",
|
| 570 |
+
"answer_sql":["CREATE VIEW vw_po_value AS SELECT po_id, SUM(line_cost) AS po_value FROM po_lines GROUP BY po_id;"]},
|
| 571 |
+
{"id":"W_Q08","category":"CTAS / SELECT INTO","difficulty":2,
|
| 572 |
+
"prompt_md":"Create table `budget_items` where unit_cost < 2.00.",
|
| 573 |
+
"answer_sql":[
|
| 574 |
+
"CREATE TABLE budget_items AS SELECT * FROM items WHERE unit_cost < 2.00;",
|
| 575 |
+
"SELECT * INTO budget_items FROM items WHERE unit_cost < 2.00;"
|
| 576 |
+
]}
|
| 577 |
+
]
|
| 578 |
+
}
|
| 579 |
|
| 580 |
+
# --- Sales tax ---
|
| 581 |
+
FALLBACK_PACKS["sales tax"] = {
|
| 582 |
+
"schema":{
|
| 583 |
+
"domain":"sales tax",
|
| 584 |
+
"tables":[
|
| 585 |
+
{"name":"jurisdictions","pk":["jurisdiction_id"],
|
| 586 |
+
"columns":[
|
| 587 |
+
{"name":"jurisdiction_id","type":"INTEGER"},
|
| 588 |
+
{"name":"name","type":"TEXT"},
|
| 589 |
+
{"name":"state","type":"TEXT"}
|
| 590 |
+
],
|
| 591 |
+
"fks":[],
|
| 592 |
+
"rows":[
|
| 593 |
+
{"jurisdiction_id":1,"name":"King County","state":"WA"},
|
| 594 |
+
{"jurisdiction_id":2,"name":"Multnomah","state":"OR"},
|
| 595 |
+
{"jurisdiction_id":3,"name":"Maricopa","state":"AZ"},
|
| 596 |
+
{"jurisdiction_id":4,"name":"Travis","state":"TX"},
|
| 597 |
+
{"jurisdiction_id":5,"name":"Denver","state":"CO"},
|
| 598 |
+
{"jurisdiction_id":6,"name":"Miami-Dade","state":"FL"}
|
| 599 |
+
]},
|
| 600 |
+
{"name":"tax_rates","pk":["rate_id"],
|
| 601 |
+
"columns":[
|
| 602 |
+
{"name":"rate_id","type":"INTEGER"},
|
| 603 |
+
{"name":"jurisdiction_id","type":"INTEGER"},
|
| 604 |
+
{"name":"category","type":"TEXT"},
|
| 605 |
+
{"name":"rate","type":"REAL"}
|
| 606 |
+
],
|
| 607 |
+
"fks":[{"columns":["jurisdiction_id"],"ref_table":"jurisdictions","ref_columns":["jurisdiction_id"]}],
|
| 608 |
+
"rows":[
|
| 609 |
+
{"rate_id":101,"jurisdiction_id":1,"category":"general","rate":0.102},
|
| 610 |
+
{"rate_id":102,"jurisdiction_id":2,"category":"general","rate":0.000}, # OR no sales tax
|
| 611 |
+
{"rate_id":103,"jurisdiction_id":3,"category":"general","rate":0.056},
|
| 612 |
+
{"rate_id":104,"jurisdiction_id":4,"category":"general","rate":0.0825},
|
| 613 |
+
{"rate_id":105,"jurisdiction_id":5,"category":"general","rate":0.081},
|
| 614 |
+
{"rate_id":106,"jurisdiction_id":6,"category":"general","rate":0.070}
|
| 615 |
+
]},
|
| 616 |
+
{"name":"transactions","pk":["txn_id"],
|
| 617 |
+
"columns":[
|
| 618 |
+
{"name":"txn_id","type":"INTEGER"},
|
| 619 |
+
{"name":"txn_date","type":"TEXT"},
|
| 620 |
+
{"name":"amount","type":"REAL"},
|
| 621 |
+
{"name":"category","type":"TEXT"},
|
| 622 |
+
{"name":"jurisdiction_id","type":"INTEGER"}
|
| 623 |
+
],
|
| 624 |
+
"fks":[{"columns":["jurisdiction_id"],"ref_table":"jurisdictions","ref_columns":["jurisdiction_id"]}],
|
| 625 |
+
"rows":[
|
| 626 |
+
{"txn_id":201,"txn_date":"2024-01-03","amount":120.00,"category":"general","jurisdiction_id":1},
|
| 627 |
+
{"txn_id":202,"txn_date":"2024-01-05","amount":55.25,"category":"general","jurisdiction_id":2},
|
| 628 |
+
{"txn_id":203,"txn_date":"2024-01-10","amount":300.00,"category":"general","jurisdiction_id":3},
|
| 629 |
+
{"txn_id":204,"txn_date":"2024-02-01","amount":240.55,"category":"general","jurisdiction_id":4},
|
| 630 |
+
{"txn_id":205,"txn_date":"2024-02-14","amount":89.99,"category":"general","jurisdiction_id":5},
|
| 631 |
+
{"txn_id":206,"txn_date":"2024-03-02","amount":150.00,"category":"general","jurisdiction_id":6},
|
| 632 |
+
{"txn_id":207,"txn_date":"2024-03-09","amount":70.00,"category":"general","jurisdiction_id":1},
|
| 633 |
+
{"txn_id":208,"txn_date":"2024-03-15","amount":18.50,"category":"general","jurisdiction_id":2},
|
| 634 |
+
{"txn_id":209,"txn_date":"2024-03-20","amount":99.95,"category":"general","jurisdiction_id":3},
|
| 635 |
+
{"txn_id":210,"txn_date":"2024-03-25","amount":199.99,"category":"general","jurisdiction_id":4}
|
| 636 |
+
]}
|
| 637 |
+
]
|
| 638 |
+
},
|
| 639 |
+
"questions":[
|
| 640 |
+
{"id":"TX_Q01","category":"SELECT *","difficulty":1,
|
| 641 |
+
"prompt_md":"Show all records from `jurisdictions`.",
|
| 642 |
+
"answer_sql":["SELECT * FROM jurisdictions;"]},
|
| 643 |
+
{"id":"TX_Q02","category":"SELECT columns","difficulty":1,
|
| 644 |
+
"prompt_md":"Show `jurisdiction_id`, `rate` from `tax_rates`.",
|
| 645 |
+
"answer_sql":["SELECT jurisdiction_id, rate FROM tax_rates;"]},
|
| 646 |
+
{"id":"TX_Q03","category":"WHERE","difficulty":1,
|
| 647 |
+
"prompt_md":"Transactions over $150 (return `txn_id`, `amount`).",
|
| 648 |
+
"answer_sql":["SELECT txn_id, amount FROM transactions WHERE amount > 150;"]},
|
| 649 |
+
{"id":"TX_Q04","category":"Aliases","difficulty":1,
|
| 650 |
+
"prompt_md":"Join `transactions` (`t`) with `jurisdictions` (`j`), returning `t.txn_id`, `j.name`.",
|
| 651 |
+
"answer_sql":["SELECT t.txn_id, j.name FROM transactions t JOIN jurisdictions j ON t.jurisdiction_id=j.jurisdiction_id;"],
|
| 652 |
+
"requires_aliases":True,"required_aliases":["t","j"]},
|
| 653 |
+
{"id":"TX_Q05","category":"JOIN (INNER)","difficulty":2,
|
| 654 |
+
"prompt_md":"Compute tax for each transaction: return `txn_id`, `amount*rate` as `tax`.",
|
| 655 |
+
"answer_sql":["SELECT t.txn_id, t.amount * r.rate AS tax FROM transactions t INNER JOIN tax_rates r ON t.jurisdiction_id=r.jurisdiction_id AND t.category=r.category;"]},
|
| 656 |
+
{"id":"TX_Q06","category":"JOIN (LEFT)","difficulty":2,
|
| 657 |
+
"prompt_md":"Jurisdictions and count of transactions (include zero). Columns `name`, `txn_count`.",
|
| 658 |
+
"answer_sql":["SELECT j.name, COUNT(t.txn_id) AS txn_count FROM jurisdictions j LEFT JOIN transactions t ON j.jurisdiction_id=t.jurisdiction_id GROUP BY j.name;"]},
|
| 659 |
+
{"id":"TX_Q07","category":"VIEW","difficulty":2,
|
| 660 |
+
"prompt_md":"Create view `vw_high_txn` with transactions amount > 150.",
|
| 661 |
+
"answer_sql":["CREATE VIEW vw_high_txn AS SELECT * FROM transactions WHERE amount > 150;"]},
|
| 662 |
+
{"id":"TX_Q08","category":"CTAS / SELECT INTO","difficulty":2,
|
| 663 |
+
"prompt_md":"Create table `low_rate` with tax_rates where rate < 0.06.",
|
| 664 |
+
"answer_sql":[
|
| 665 |
+
"CREATE TABLE low_rate AS SELECT * FROM tax_rates WHERE rate < 0.06;",
|
| 666 |
+
"SELECT * INTO low_rate FROM tax_rates WHERE rate < 0.06;"
|
| 667 |
+
]}
|
| 668 |
+
]
|
| 669 |
+
}
|
| 670 |
+
|
| 671 |
+
# --- Oil & gas wells ---
|
| 672 |
+
FALLBACK_PACKS["oil and gas wells"] = {
|
| 673 |
+
"schema":{
|
| 674 |
+
"domain":"oil and gas wells",
|
| 675 |
+
"tables":[
|
| 676 |
+
{"name":"wells","pk":["well_id"],
|
| 677 |
+
"columns":[
|
| 678 |
+
{"name":"well_id","type":"INTEGER"},
|
| 679 |
+
{"name":"well_name","type":"TEXT"},
|
| 680 |
+
{"name":"location","type":"TEXT"},
|
| 681 |
+
{"name":"status","type":"TEXT"},
|
| 682 |
+
{"name":"depth","type":"INTEGER"}
|
| 683 |
+
],
|
| 684 |
+
"fks":[],
|
| 685 |
+
"rows":[
|
| 686 |
+
{"well_id":1,"well_name":"Alpha-1","location":"TX-TRV","status":"producing","depth":12000},
|
| 687 |
+
{"well_id":2,"well_name":"Bravo-2","location":"TX-TRV","status":"shut-in","depth":10500},
|
| 688 |
+
{"well_id":3,"well_name":"Cedar-7","location":"OK-CAD","status":"producing","depth":9800},
|
| 689 |
+
{"well_id":4,"well_name":"Delta-3","location":"ND-WIL","status":"drilling","depth":7000},
|
| 690 |
+
{"well_id":5,"well_name":"Eagle-5","location":"CO-DNV","status":"producing","depth":8500},
|
| 691 |
+
{"well_id":6,"well_name":"Fox-9","location":"NM-LEA","status":"producing","depth":11000}
|
| 692 |
+
]},
|
| 693 |
+
{"name":"operators","pk":["operator_id"],
|
| 694 |
+
"columns":[
|
| 695 |
+
{"name":"operator_id","type":"INTEGER"},
|
| 696 |
+
{"name":"name","type":"TEXT"},
|
| 697 |
+
{"name":"contact","type":"TEXT"}
|
| 698 |
+
],
|
| 699 |
+
"fks":[],
|
| 700 |
+
"rows":[
|
| 701 |
+
{"operator_id":10,"name":"PetroMax","contact":"pmx@example.com"},
|
| 702 |
+
{"operator_id":11,"name":"BlueRock Energy","contact":"blue@example.com"},
|
| 703 |
+
{"operator_id":12,"name":"HighPlains LLC","contact":"hp@example.com"},
|
| 704 |
+
{"operator_id":13,"name":"Mesa Oil","contact":"mesa@example.com"}
|
| 705 |
+
]},
|
| 706 |
+
{"name":"well_operators","pk":["well_id","operator_id"],
|
| 707 |
+
"columns":[
|
| 708 |
+
{"name":"well_id","type":"INTEGER"},
|
| 709 |
+
{"name":"operator_id","type":"INTEGER"},
|
| 710 |
+
{"name":"start_date","type":"TEXT"}
|
| 711 |
+
],
|
| 712 |
+
"fks":[
|
| 713 |
+
{"columns":["well_id"],"ref_table":"wells","ref_columns":["well_id"]},
|
| 714 |
+
{"columns":["operator_id"],"ref_table":"operators","ref_columns":["operator_id"]}
|
| 715 |
+
],
|
| 716 |
+
"rows":[
|
| 717 |
+
{"well_id":1,"operator_id":10,"start_date":"2023-01-01"},
|
| 718 |
+
{"well_id":2,"operator_id":10,"start_date":"2023-06-01"},
|
| 719 |
+
{"well_id":3,"operator_id":11,"start_date":"2022-03-15"},
|
| 720 |
+
{"well_id":4,"operator_id":12,"start_date":"2024-02-01"},
|
| 721 |
+
{"well_id":5,"operator_id":13,"start_date":"2022-10-10"},
|
| 722 |
+
{"well_id":6,"operator_id":11,"start_date":"2021-08-05"}
|
| 723 |
+
]},
|
| 724 |
+
{"name":"production","pk":["prod_id"],
|
| 725 |
+
"columns":[
|
| 726 |
+
{"name":"prod_id","type":"INTEGER"},
|
| 727 |
+
{"name":"well_id","type":"INTEGER"},
|
| 728 |
+
{"name":"month","type":"TEXT"},
|
| 729 |
+
{"name":"oil_bbl","type":"REAL"},
|
| 730 |
+
{"name":"gas_mcf","type":"REAL"}
|
| 731 |
+
],
|
| 732 |
+
"fks":[{"columns":["well_id"],"ref_table":"wells","ref_columns":["well_id"]}],
|
| 733 |
+
"rows":[
|
| 734 |
+
{"prod_id":1001,"well_id":1,"month":"2024-01","oil_bbl":1200,"gas_mcf":5000},
|
| 735 |
+
{"prod_id":1002,"well_id":1,"month":"2024-02","oil_bbl":1180,"gas_mcf":5100},
|
| 736 |
+
{"prod_id":1003,"well_id":3,"month":"2024-01","oil_bbl":900,"gas_mcf":3000},
|
| 737 |
+
{"prod_id":1004,"well_id":3,"month":"2024-02","oil_bbl":950,"gas_mcf":3100},
|
| 738 |
+
{"prod_id":1005,"well_id":5,"month":"2024-01","oil_bbl":600,"gas_mcf":2200},
|
| 739 |
+
{"prod_id":1006,"well_id":6,"month":"2024-01","oil_bbl":750,"gas_mcf":2600},
|
| 740 |
+
{"prod_id":1007,"well_id":2,"month":"2024-01","oil_bbl":0,"gas_mcf":0},
|
| 741 |
+
{"prod_id":1008,"well_id":4,"month":"2024-02","oil_bbl":100,"gas_mcf":400}
|
| 742 |
+
]}
|
| 743 |
+
]
|
| 744 |
+
},
|
| 745 |
+
"questions":[
|
| 746 |
+
{"id":"OG_Q01","category":"SELECT *","difficulty":1,
|
| 747 |
+
"prompt_md":"List all rows from `wells`.",
|
| 748 |
+
"answer_sql":["SELECT * FROM wells;"]},
|
| 749 |
+
{"id":"OG_Q02","category":"SELECT columns","difficulty":1,
|
| 750 |
+
"prompt_md":"Return `well_name`, `status` from `wells`.",
|
| 751 |
+
"answer_sql":["SELECT well_name, status FROM wells;"]},
|
| 752 |
+
{"id":"OG_Q03","category":"WHERE","difficulty":1,
|
| 753 |
+
"prompt_md":"Wells deeper than 10,000 ft (return `well_name`, `depth`).",
|
| 754 |
+
"answer_sql":["SELECT well_name, depth FROM wells WHERE depth > 10000;"]},
|
| 755 |
+
{"id":"OG_Q04","category":"Aliases","difficulty":1,
|
| 756 |
+
"prompt_md":"Using `w` and `o`, show `w.well_name` with `o.name` (operator).",
|
| 757 |
+
"answer_sql":["SELECT w.well_name, o.name FROM wells w JOIN well_operators wo ON w.well_id=wo.well_id JOIN operators o ON wo.operator_id=o.operator_id;"],
|
| 758 |
+
"requires_aliases":True,"required_aliases":["w","o"]},
|
| 759 |
+
{"id":"OG_Q05","category":"JOIN (INNER)","difficulty":2,
|
| 760 |
+
"prompt_md":"Join `production` to `wells` and return `well_name`, total `oil_bbl` per well.",
|
| 761 |
+
"answer_sql":["SELECT w.well_name, SUM(p.oil_bbl) AS total_oil FROM production p INNER JOIN wells w ON p.well_id=w.well_id GROUP BY w.well_name;"]},
|
| 762 |
+
{"id":"OG_Q06","category":"JOIN (LEFT)","difficulty":2,
|
| 763 |
+
"prompt_md":"List operators and count of wells (include operators with zero). Columns `name`, `well_count`.",
|
| 764 |
+
"answer_sql":["SELECT o.name, COUNT(wo.well_id) AS well_count FROM operators o LEFT JOIN well_operators wo ON o.operator_id=wo.operator_id GROUP BY o.name;"]},
|
| 765 |
+
{"id":"OG_Q07","category":"VIEW","difficulty":2,
|
| 766 |
+
"prompt_md":"Create view `vw_prod_jan` for January 2024 production.",
|
| 767 |
+
"answer_sql":["CREATE VIEW vw_prod_jan AS SELECT * FROM production WHERE month='2024-01';"]},
|
| 768 |
+
{"id":"OG_Q08","category":"CTAS / SELECT INTO","difficulty":2,
|
| 769 |
+
"prompt_md":"Create table `active_wells` for wells with status='producing'.",
|
| 770 |
+
"answer_sql":[
|
| 771 |
+
"CREATE TABLE active_wells AS SELECT * FROM wells WHERE status='producing';",
|
| 772 |
+
"SELECT * INTO active_wells FROM wells WHERE status='producing';"
|
| 773 |
+
]}
|
| 774 |
+
]
|
| 775 |
+
}
|
| 776 |
|
| 777 |
+
# --- Marketing ---
|
| 778 |
+
FALLBACK_PACKS["marketing"] = {
|
| 779 |
+
"schema":{
|
| 780 |
+
"domain":"marketing",
|
| 781 |
+
"tables":[
|
| 782 |
+
{"name":"channels","pk":["channel_id"],
|
| 783 |
+
"columns":[
|
| 784 |
+
{"name":"channel_id","type":"INTEGER"},
|
| 785 |
+
{"name":"channel_name","type":"TEXT"}
|
| 786 |
+
],
|
| 787 |
+
"fks":[],
|
| 788 |
+
"rows":[
|
| 789 |
+
{"channel_id":1,"channel_name":"Search"},
|
| 790 |
+
{"channel_id":2,"channel_name":"Social"},
|
| 791 |
+
{"channel_id":3,"channel_name":"Email"},
|
| 792 |
+
{"channel_id":4,"channel_name":"Display"}
|
| 793 |
+
]},
|
| 794 |
+
{"name":"campaigns","pk":["campaign_id"],
|
| 795 |
+
"columns":[
|
| 796 |
+
{"name":"campaign_id","type":"INTEGER"},
|
| 797 |
+
{"name":"campaign_name","type":"TEXT"},
|
| 798 |
+
{"name":"channel_id","type":"INTEGER"},
|
| 799 |
+
{"name":"start_date","type":"TEXT"},
|
| 800 |
+
{"name":"budget","type":"REAL"}
|
| 801 |
+
],
|
| 802 |
+
"fks":[{"columns":["channel_id"],"ref_table":"channels","ref_columns":["channel_id"]}],
|
| 803 |
+
"rows":[
|
| 804 |
+
{"campaign_id":101,"campaign_name":"Spring Search","channel_id":1,"start_date":"2024-03-01","budget":5000},
|
| 805 |
+
{"campaign_id":102,"campaign_name":"Brand Social","channel_id":2,"start_date":"2024-03-05","budget":3000},
|
| 806 |
+
{"campaign_id":103,"campaign_name":"Welcome Email","channel_id":3,"start_date":"2024-03-07","budget":1000},
|
| 807 |
+
{"campaign_id":104,"campaign_name":"Retargeting","channel_id":4,"start_date":"2024-03-10","budget":2000},
|
| 808 |
+
{"campaign_id":105,"campaign_name":"Summer Search","channel_id":1,"start_date":"2024-06-01","budget":6000},
|
| 809 |
+
{"campaign_id":106,"campaign_name":"Promo Social","channel_id":2,"start_date":"2024-06-05","budget":3500}
|
| 810 |
+
]},
|
| 811 |
+
{"name":"ad_stats","pk":["campaign_id","day"],
|
| 812 |
+
"columns":[
|
| 813 |
+
{"name":"campaign_id","type":"INTEGER"},
|
| 814 |
+
{"name":"day","type":"TEXT"},
|
| 815 |
+
{"name":"impressions","type":"INTEGER"},
|
| 816 |
+
{"name":"clicks","type":"INTEGER"},
|
| 817 |
+
{"name":"spend","type":"REAL"}
|
| 818 |
+
],
|
| 819 |
+
"fks":[{"columns":["campaign_id"],"ref_table":"campaigns","ref_columns":["campaign_id"]}],
|
| 820 |
+
"rows":[
|
| 821 |
+
{"campaign_id":101,"day":"2024-03-12","impressions":10000,"clicks":500,"spend":200.0},
|
| 822 |
+
{"campaign_id":101,"day":"2024-03-13","impressions":12000,"clicks":600,"spend":230.0},
|
| 823 |
+
{"campaign_id":102,"day":"2024-03-12","impressions":8000,"clicks":400,"spend":150.0},
|
| 824 |
+
{"campaign_id":103,"day":"2024-03-12","impressions":5000,"clicks":250,"spend":80.0},
|
| 825 |
+
{"campaign_id":104,"day":"2024-03-12","impressions":7000,"clicks":210,"spend":110.0},
|
| 826 |
+
{"campaign_id":106,"day":"2024-06-12","impressions":9500,"clicks":520,"spend":190.0}
|
| 827 |
+
]},
|
| 828 |
+
{"name":"leads","pk":["lead_id"],
|
| 829 |
+
"columns":[
|
| 830 |
+
{"name":"lead_id","type":"INTEGER"},
|
| 831 |
+
{"name":"campaign_id","type":"INTEGER"},
|
| 832 |
+
{"name":"source","type":"TEXT"},
|
| 833 |
+
{"name":"qualified","type":"INTEGER"},
|
| 834 |
+
{"name":"revenue","type":"REAL"}
|
| 835 |
+
],
|
| 836 |
+
"fks":[{"columns":["campaign_id"],"ref_table":"campaigns","ref_columns":["campaign_id"]}],
|
| 837 |
+
"rows":[
|
| 838 |
+
{"lead_id":1,"campaign_id":101,"source":"LP1","qualified":1,"revenue":400},
|
| 839 |
+
{"lead_id":2,"campaign_id":101,"source":"LP2","qualified":0,"revenue":0},
|
| 840 |
+
{"lead_id":3,"campaign_id":102,"source":"FB","qualified":1,"revenue":250},
|
| 841 |
+
{"lead_id":4,"campaign_id":103,"source":"Email","qualified":1,"revenue":300},
|
| 842 |
+
{"lead_id":5,"campaign_id":104,"source":"DSP","qualified":0,"revenue":0},
|
| 843 |
+
{"lead_id":6,"campaign_id":106,"source":"FB","qualified":1,"revenue":500}
|
| 844 |
+
]}
|
| 845 |
+
]
|
| 846 |
+
},
|
| 847 |
+
"questions":[
|
| 848 |
+
{"id":"M_Q01","category":"SELECT *","difficulty":1,
|
| 849 |
+
"prompt_md":"Show all channels.",
|
| 850 |
+
"answer_sql":["SELECT * FROM channels;"]},
|
| 851 |
+
{"id":"M_Q02","category":"SELECT columns","difficulty":1,
|
| 852 |
+
"prompt_md":"Return `campaign_name`, `budget` from `campaigns`.",
|
| 853 |
+
"answer_sql":["SELECT campaign_name, budget FROM campaigns;"]},
|
| 854 |
+
{"id":"M_Q03","category":"WHERE","difficulty":1,
|
| 855 |
+
"prompt_md":"Campaigns with budget ≥ 3000 (show `campaign_name`, `budget`).",
|
| 856 |
+
"answer_sql":["SELECT campaign_name, budget FROM campaigns WHERE budget >= 3000;"]},
|
| 857 |
+
{"id":"M_Q04","category":"Aliases","difficulty":1,
|
| 858 |
+
"prompt_md":"Join `campaigns` (`c`) with `channels` (`ch`) and show `c.campaign_name`, `ch.channel_name`.",
|
| 859 |
+
"answer_sql":["SELECT c.campaign_name, ch.channel_name FROM campaigns c JOIN channels ch ON c.channel_id=ch.channel_id;"],
|
| 860 |
+
"requires_aliases":True,"required_aliases":["c","ch"]},
|
| 861 |
+
{"id":"M_Q05","category":"JOIN (INNER)","difficulty":2,
|
| 862 |
+
"prompt_md":"Join `ad_stats` with `campaigns` and return `campaign_name`, total `clicks`.",
|
| 863 |
+
"answer_sql":["SELECT c.campaign_name, SUM(s.clicks) AS total_clicks FROM ad_stats s INNER JOIN campaigns c ON s.campaign_id=c.campaign_id GROUP BY c.campaign_name;"]},
|
| 864 |
+
{"id":"M_Q06","category":"JOIN (LEFT)","difficulty":2,
|
| 865 |
+
"prompt_md":"Channels and number of campaigns (include channels with zero). Columns `channel_name`, `campaigns`.",
|
| 866 |
+
"answer_sql":["SELECT ch.channel_name, COUNT(c.campaign_id) AS campaigns FROM channels ch LEFT JOIN campaigns c ON ch.channel_id=c.channel_id GROUP BY ch.channel_name;"]},
|
| 867 |
+
{"id":"M_Q07","category":"VIEW","difficulty":2,
|
| 868 |
+
"prompt_md":"Create view `vw_cost_per_click` with `campaign_id`, `day`, `spend/clicks` as `cpc` (avoid divide-by-zero).",
|
| 869 |
+
"answer_sql":["CREATE VIEW vw_cost_per_click AS SELECT campaign_id, day, CASE WHEN clicks=0 THEN NULL ELSE spend*1.0/clicks END AS cpc FROM ad_stats;"]},
|
| 870 |
+
{"id":"M_Q08","category":"CTAS / SELECT INTO","difficulty":2,
|
| 871 |
+
"prompt_md":"Create table `qualified_leads` of leads where `qualified=1`.",
|
| 872 |
+
"answer_sql":[
|
| 873 |
+
"CREATE TABLE qualified_leads AS SELECT * FROM leads WHERE qualified=1;",
|
| 874 |
+
"SELECT * INTO qualified_leads FROM leads WHERE qualified=1;"
|
| 875 |
+
]}
|
| 876 |
+
]
|
| 877 |
+
}
|
| 878 |
+
|
| 879 |
+
# Helpers to get a pack by domain (normalize key)
|
| 880 |
+
def get_fallback_pack_for(domain_name: str) -> Tuple[Dict[str,Any], List[Dict[str,Any]]]:
|
| 881 |
+
key = (domain_name or "").strip().lower()
|
| 882 |
+
for k, pack in FALLBACK_PACKS.items():
|
| 883 |
+
if k.lower() == key:
|
| 884 |
+
return pack["schema"], pack["questions"]
|
| 885 |
+
# default to bookstore
|
| 886 |
+
p = FALLBACK_PACKS["bookstore"]
|
| 887 |
+
return p["schema"], p["questions"]
|
| 888 |
+
|
| 889 |
+
# -------------------- OpenAI prompts + parsing helpers --------------------
|
| 890 |
+
ALLOWED_DOMAINS = ["bookstore", "retail sales", "wholesaler", "sales tax", "oil and gas wells", "marketing"]
|
| 891 |
+
|
| 892 |
+
DOMAIN_CYCLE_POS = 0 # will be set after first install
|
| 893 |
+
|
| 894 |
+
def _domain_prompt(prev_domain: Optional[str], preferred_domain: Optional[str]) -> str:
|
| 895 |
+
target = preferred_domain or "bookstore"
|
| 896 |
+
extra = f" (previous domain was '{prev_domain}', do not reuse it)" if prev_domain else ""
|
| 897 |
return f"""
|
| 898 |
Return ONLY a valid JSON object (no markdown, no prose).
|
| 899 |
+
You MUST set the top-level property "domain" to EXACTLY "{target}" (string match).{extra}
|
| 900 |
|
| 901 |
+
The JSON must have:
|
| 902 |
+
- "domain": "{target}"
|
| 903 |
+
- "tables": 3–4 table objects
|
| 904 |
+
- "questions": 8–12 question objects
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 905 |
|
| 906 |
+
Tables:
|
| 907 |
+
- SQLite-friendly. Use snake_case.
|
| 908 |
+
- Each table: name, pk (list), columns (list of {{name,type}}), fks (list of {{columns,ref_table,ref_columns}}), rows (8–15 small seed rows).
|
| 909 |
+
|
| 910 |
+
Questions:
|
| 911 |
+
- Categories among: "SELECT *", "SELECT columns", "WHERE", "Aliases",
|
| 912 |
+
"JOIN (INNER)", "JOIN (LEFT)", "Aggregation", "VIEW", "CTAS / SELECT INTO".
|
| 913 |
+
- Include at least one LEFT JOIN, one VIEW, one CTAS or SELECT INTO.
|
| 914 |
+
- Provide 1–3 'answer_sql' strings per question.
|
| 915 |
+
- Prefer SQLite-compatible SQL. Do NOT use RIGHT/FULL OUTER JOIN.
|
| 916 |
"""
|
| 917 |
|
| 918 |
def _loose_json_parse(s: str) -> Optional[dict]:
|
|
|
|
| 1022 |
})
|
| 1023 |
return out
|
| 1024 |
|
| 1025 |
+
def llm_generate_domain_and_questions(prev_domain: Optional[str], preferred_domain: Optional[str]):
|
| 1026 |
if not OPENAI_AVAILABLE or not os.getenv("OPENAI_API_KEY"):
|
| 1027 |
return None, "OpenAI client not available or OPENAI_API_KEY missing.", None, {"accepted_questions":0,"dropped_questions":0}
|
| 1028 |
errors = []
|
| 1029 |
+
prompt = _domain_prompt(prev_domain, preferred_domain)
|
| 1030 |
for model in _candidate_models():
|
| 1031 |
try:
|
| 1032 |
try:
|
|
|
|
| 1048 |
obj_raw = _loose_json_parse(data_text or "")
|
| 1049 |
if not obj_raw:
|
| 1050 |
raise RuntimeError("Could not parse JSON from model output.")
|
| 1051 |
+
for k in ["domain","tables","questions"]:
|
| 1052 |
if k not in obj_raw:
|
| 1053 |
raise RuntimeError(f"Missing key '{k}'")
|
| 1054 |
+
# Force domain to preferred
|
| 1055 |
+
if preferred_domain and (str(obj_raw.get("domain","")).strip().lower() != preferred_domain.strip().lower()):
|
| 1056 |
+
raise RuntimeError(f"Model returned domain '{obj_raw.get('domain')}', expected '{preferred_domain}'.")
|
| 1057 |
tables = _canon_tables(obj_raw.get("tables", []))
|
| 1058 |
if not tables: raise RuntimeError("No usable tables in LLM output.")
|
| 1059 |
obj_raw["tables"] = tables
|
|
|
|
| 1119 |
(schema.get("domain","unknown"), json.dumps(schema)))
|
| 1120 |
con.commit()
|
| 1121 |
|
| 1122 |
+
def bootstrap_domain_with_llm_or_fallback(prev_domain: Optional[str], preferred_domain: str):
|
| 1123 |
+
obj, err, model_used, stats = llm_generate_domain_and_questions(prev_domain, preferred_domain)
|
| 1124 |
if obj is None:
|
| 1125 |
+
# Use domain-specific fallback pack
|
| 1126 |
+
schema, questions = get_fallback_pack_for(preferred_domain)
|
| 1127 |
+
info = {"source":"fallback","model":None,"error":err,"accepted":0,"dropped":0}
|
| 1128 |
+
return schema, questions, info
|
| 1129 |
return obj, obj["questions"], {"source":"openai","model":model_used,"error":None,"accepted":stats["accepted_questions"],"dropped":stats["dropped_questions"]}
|
| 1130 |
|
| 1131 |
+
def install_schema_and_prepare_questions(prev_domain: Optional[str], preferred_domain: str):
|
| 1132 |
+
schema, questions, info = bootstrap_domain_with_llm_or_fallback(prev_domain, preferred_domain)
|
| 1133 |
install_schema(CONN, schema)
|
| 1134 |
if not questions:
|
| 1135 |
+
# fallback to domain pack (still preferred)
|
| 1136 |
+
schema2, questions2 = get_fallback_pack_for(preferred_domain)
|
| 1137 |
+
install_schema(CONN, schema2)
|
| 1138 |
+
schema, questions, info = schema2, questions2, {"source":"fallback","model":None,"error":"No usable questions from LLM","accepted":0,"dropped":0}
|
| 1139 |
return schema, questions, info
|
| 1140 |
|
| 1141 |
+
# -------------------- Domain cycling --------------------
|
| 1142 |
+
def _norm(s: str) -> str:
|
| 1143 |
+
return (s or "").strip().lower()
|
| 1144 |
+
|
| 1145 |
+
def _index_of_domain(name: str) -> int:
|
| 1146 |
+
low = _norm(name)
|
| 1147 |
+
for i, d in enumerate(ALLOWED_DOMAINS):
|
| 1148 |
+
if _norm(d) == low:
|
| 1149 |
+
return i
|
| 1150 |
+
return 0
|
| 1151 |
+
|
| 1152 |
+
def choose_next_domain(prev_domain: Optional[str]) -> str:
|
| 1153 |
+
global DOMAIN_CYCLE_POS
|
| 1154 |
+
if prev_domain is None:
|
| 1155 |
+
DOMAIN_CYCLE_POS = 0
|
| 1156 |
+
return ALLOWED_DOMAINS[DOMAIN_CYCLE_POS]
|
| 1157 |
+
idx = _index_of_domain(prev_domain)
|
| 1158 |
+
DOMAIN_CYCLE_POS = (idx + 1) % len(ALLOWED_DOMAINS)
|
| 1159 |
+
return ALLOWED_DOMAINS[DOMAIN_CYCLE_POS]
|
| 1160 |
+
|
| 1161 |
+
# -------------------- Initialize first domain --------------------
|
| 1162 |
+
# Start with bookstore for determinism on first boot
|
| 1163 |
+
CURRENT_SCHEMA, CURRENT_QS, CURRENT_INFO = install_schema_and_prepare_questions(prev_domain=None, preferred_domain="bookstore")
|
| 1164 |
+
DOMAIN_CYCLE_POS = _index_of_domain(CURRENT_SCHEMA.get("domain","bookstore"))
|
| 1165 |
|
| 1166 |
# -------------------- Progress + mastery --------------------
|
| 1167 |
def upsert_user(con: sqlite3.Connection, user_id: str, name: str):
|
|
|
|
| 1195 |
return pd.read_sql_query("SELECT * FROM attempts WHERE user_id=? ORDER BY id DESC", con, params=(user_id,))
|
| 1196 |
|
| 1197 |
def pick_next_question(user_id: str) -> Dict[str,Any]:
|
| 1198 |
+
pool = CURRENT_QS if CURRENT_QS else get_fallback_pack_for(CURRENT_SCHEMA.get("domain","bookstore"))[1]
|
| 1199 |
df = fetch_attempts(CONN, user_id)
|
| 1200 |
stats = topic_stats(df)
|
| 1201 |
stats = stats.sort_values(by=["accuracy","attempts"], ascending=[True, True]) if not stats.empty else stats
|
|
|
|
| 1488 |
}.get(cat, "Identify keys from the schema and join on them.")
|
| 1489 |
return gr.update(value=f"**Hint:** {hint}", visible=True)
|
| 1490 |
|
| 1491 |
+
def export_progress(user_name: str):
|
| 1492 |
+
slug = "-".join((user_name or "").lower().split())
|
| 1493 |
+
if not slug: return None
|
| 1494 |
+
user_id = slug[:64]
|
| 1495 |
+
with DB_LOCK:
|
| 1496 |
+
df = pd.read_sql_query("SELECT * FROM attempts WHERE user_id=? ORDER BY id DESC", CONN, params=(user_id,))
|
| 1497 |
+
os.makedirs(EXPORT_DIR, exist_ok=True)
|
| 1498 |
+
path = os.path.abspath(os.path.join(EXPORT_DIR, f"{user_id}_progress.csv"))
|
| 1499 |
+
(pd.DataFrame([{"info":"No attempts yet."}]) if df.empty else df).to_csv(path, index=False)
|
| 1500 |
+
return path
|
| 1501 |
+
|
| 1502 |
def _domain_status_md():
|
| 1503 |
+
if CURRENT_INFO.get("source","openai") == "openai":
|
| 1504 |
accepted = CURRENT_INFO.get("accepted",0); dropped = CURRENT_INFO.get("dropped",0)
|
| 1505 |
return (f"✅ **Domain via OpenAI** `{CURRENT_INFO.get('model','?')}` → **{CURRENT_SCHEMA.get('domain','?')}**. "
|
| 1506 |
f"Accepted questions: {accepted}, dropped: {dropped}. \n"
|
|
|
|
| 1518 |
""")
|
| 1519 |
return df["name"].tolist() if not df.empty else ["(no tables)"]
|
| 1520 |
|
| 1521 |
+
# Always reseed a question on randomize (creates a guest session if needed)
|
| 1522 |
def regenerate_domain(session: dict):
|
| 1523 |
global CURRENT_SCHEMA, CURRENT_QS, CURRENT_INFO
|
| 1524 |
prev = CURRENT_SCHEMA.get("domain") if CURRENT_SCHEMA else None
|
| 1525 |
+
preferred = choose_next_domain(prev)
|
| 1526 |
+
CURRENT_SCHEMA, CURRENT_QS, CURRENT_INFO = install_schema_and_prepare_questions(prev_domain=prev, preferred_domain=preferred)
|
| 1527 |
erd = draw_dynamic_erd(CURRENT_SCHEMA)
|
| 1528 |
status = _domain_status_md()
|
| 1529 |
|
|
|
|
| 1537 |
q = pick_next_question(session["user_id"])
|
| 1538 |
session.update({"qid": q["id"], "q": q, "start_ts": time.time()})
|
| 1539 |
|
|
|
|
| 1540 |
stats = topic_stats(fetch_attempts(CONN, session["user_id"]))
|
| 1541 |
empty_df = pd.DataFrame()
|
|
|
|
|
|
|
| 1542 |
dd_update = gr.update(choices=list_tables_for_preview(), value=None)
|
| 1543 |
|
| 1544 |
return (
|
|
|
|
| 1643 |
outputs=[feedback_md],
|
| 1644 |
)
|
| 1645 |
export_btn.click(
|
| 1646 |
+
export_progress,
|
| 1647 |
inputs=[export_name],
|
| 1648 |
outputs=[export_file],
|
| 1649 |
)
|
| 1650 |
+
regen_btn.click(
|
| 1651 |
regenerate_domain,
|
| 1652 |
inputs=[session_state],
|
| 1653 |
outputs=[regen_fb, er_image, prompt_md, sql_input, tbl_dd, mastery_df, result_df, session_state],
|