Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
# - 3–4 related tables with seed rows installed in SQLite.
|
| 4 |
# - Students practice SELECT, WHERE, JOINs (INNER/LEFT), aliases, views, CTAS/SELECT INTO.
|
| 5 |
# - Validator enforces columns only when the prompt asks; otherwise focuses on rows.
|
| 6 |
-
# - ERD shows
|
| 7 |
# - Domain picker now round-robins across: bookstore, retail sales, wholesaler, sales tax, oil & gas wells, marketing.
|
| 8 |
|
| 9 |
import os
|
|
@@ -62,15 +62,12 @@ def _fig_to_pil(fig) -> Image.Image:
|
|
| 62 |
def draw_dynamic_erd(
|
| 63 |
schema: Dict[str, Any],
|
| 64 |
highlight_tables: Optional[Set[str]] = None,
|
| 65 |
-
highlight_edges: Optional[Set[Tuple[str, str]]] = None,
|
| 66 |
) -> Image.Image:
|
| 67 |
"""
|
| 68 |
-
Draw tables
|
| 69 |
-
highlight_edges uses (src_table, dst_table) with dst_table = referenced table.
|
| 70 |
"""
|
| 71 |
highlight_tables = set(highlight_tables or [])
|
| 72 |
-
def _norm_edge(a, b): return tuple(sorted([a, b]))
|
| 73 |
-
H = set(_norm_edge(*e) for e in (highlight_edges or set()))
|
| 74 |
|
| 75 |
tables = schema.get("tables", [])
|
| 76 |
fig, ax = plt.subplots(figsize=PLOT_FIGSIZE); ax.axis("off")
|
|
@@ -78,23 +75,18 @@ def draw_dynamic_erd(
|
|
| 78 |
ax.text(0.5, 0.5, "No tables to diagram.", ha="center", va="center")
|
| 79 |
return _fig_to_pil(fig)
|
| 80 |
|
|
|
|
| 81 |
n = len(tables)
|
| 82 |
margin = 0.03
|
| 83 |
width = (1 - margin * (n + 1)) / max(n, 1)
|
| 84 |
height = 0.70
|
| 85 |
y = 0.20
|
| 86 |
|
| 87 |
-
|
| 88 |
-
for t in tables:
|
| 89 |
-
for fk in t.get("fks", []) or []:
|
| 90 |
-
dst = fk.get("ref_table")
|
| 91 |
-
if dst:
|
| 92 |
-
fk_edges.append((t["name"], dst))
|
| 93 |
-
|
| 94 |
-
boxes: Dict[str, Tuple[float,float,float,float]] = {}
|
| 95 |
for i, t in enumerate(tables):
|
| 96 |
tx = margin + i * (width + margin)
|
| 97 |
-
|
|
|
|
| 98 |
lw = 2.0 if t["name"] in highlight_tables else 1.2
|
| 99 |
ax.add_patch(Rectangle((tx, y), width, height, fill=False, lw=lw))
|
| 100 |
ax.text(tx + 0.01, y + height - 0.04, t["name"], fontsize=10, ha="left", va="top", weight="bold")
|
|
@@ -118,28 +110,10 @@ def draw_dynamic_erd(
|
|
| 118 |
ax.text(tx + 0.016, yy, f"{nm}{tag}", fontsize=9, ha="left", va="top")
|
| 119 |
yy -= 0.055
|
| 120 |
|
| 121 |
-
for (src, dst) in fk_edges:
|
| 122 |
-
if src not in boxes or dst not in boxes:
|
| 123 |
-
continue
|
| 124 |
-
(x1, y1, w1, h1) = boxes[src]
|
| 125 |
-
(x2, y2, w2, h2) = boxes[dst]
|
| 126 |
-
ax.annotate("",
|
| 127 |
-
xy=(x2 + w2/2.0, y2 + h2),
|
| 128 |
-
xytext=(x1 + w1/2.0, y1),
|
| 129 |
-
arrowprops=dict(arrowstyle="->", lw=1.0, color="#cccccc"))
|
| 130 |
-
|
| 131 |
-
for (src, dst) in fk_edges:
|
| 132 |
-
if _norm_edge(src, dst) in H:
|
| 133 |
-
(x1, y1, w1, h1) = boxes[src]
|
| 134 |
-
(x2, y2, w2, h2) = boxes[dst]
|
| 135 |
-
ax.annotate("",
|
| 136 |
-
xy=(x2 + w2/2.0, y2 + h2),
|
| 137 |
-
xytext=(x1 + w1/2.0, y1),
|
| 138 |
-
arrowprops=dict(arrowstyle="->", lw=2.6, color="#2b6cb0"))
|
| 139 |
-
|
| 140 |
ax.text(0.5, 0.06, f"Domain: {schema.get('domain','unknown')}", fontsize=9, ha="center")
|
| 141 |
return _fig_to_pil(fig)
|
| 142 |
|
|
|
|
| 143 |
JOIN_TBL_RE = re.compile(r"\b(?:from|join)\s+([a-z_]\w*)(?:\s+(?:as\s+)?([a-z_]\w*))?", re.IGNORECASE)
|
| 144 |
EQ_ON_RE = re.compile(r"([a-z_]\w*)\.[a-z_]\w*\s*=\s*([a-z_]\w*)\.[a-z_]\w*", re.IGNORECASE)
|
| 145 |
USING_RE = re.compile(r"\bjoin\s+([a-z_]\w*)(?:\s+(?:as\s+)?([a-z_]\w*))?\s+using\s*\(", re.IGNORECASE)
|
|
@@ -158,6 +132,7 @@ def sql_highlights(sql: str, schema: Dict[str, Any]) -> Tuple[Set[str], Set[Tupl
|
|
| 158 |
alias_to_table[alias] = table
|
| 159 |
join_order.append(alias)
|
| 160 |
|
|
|
|
| 161 |
edges: Set[Tuple[str, str]] = set()
|
| 162 |
for a1, a2 in EQ_ON_RE.findall(low):
|
| 163 |
t1 = alias_to_table.get(a1, a1)
|
|
@@ -174,10 +149,77 @@ def sql_highlights(sql: str, schema: Dict[str, Any]) -> Tuple[Set[str], Set[Tupl
|
|
| 174 |
|
| 175 |
used_tables = {alias_to_table.get(a, a) for a in join_order}
|
| 176 |
schema_tables = {t["name"] for t in schema.get("tables", [])}
|
| 177 |
-
edges = {
|
| 178 |
-
used_tables = {
|
| 179 |
return used_tables, edges
|
| 180 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
# -------------------- SQLite + locking --------------------
|
| 182 |
DB_DIR = "/data" if os.path.exists("/data") else "."
|
| 183 |
DB_PATH = os.path.join(DB_DIR, "sql_trainer_dynamic.db")
|
|
@@ -234,9 +276,8 @@ def init_progress_tables(con: sqlite3.Connection):
|
|
| 234 |
init_progress_tables(CONN)
|
| 235 |
|
| 236 |
# -------------------- Built-in fallback domain packs --------------------
|
| 237 |
-
#
|
| 238 |
FALLBACK_PACKS: Dict[str, Dict[str, Any]] = {}
|
| 239 |
-
|
| 240 |
# --- Bookstore (existing) ---
|
| 241 |
FALLBACK_PACKS["bookstore"] = {
|
| 242 |
"schema": {
|
|
@@ -300,14 +341,14 @@ FALLBACK_PACKS["bookstore"] = {
|
|
| 300 |
{"book_id":102,"title":"I, Robot","author_id":1,"store_id":1,"category":"Sci-Fi","price":12.50,"published_year":1950},
|
| 301 |
{"book_id":103,"title":"The Left Hand of Darkness","author_id":2,"store_id":2,"category":"Sci-Fi","price":16.00,"published_year":1969},
|
| 302 |
{"book_id":104,"title":"A Wizard of Earthsea","author_id":2,"store_id":2,"category":"Fantasy","price":11.50,"published_year":1968},
|
| 303 |
-
{"book_id":105,"title":"Norwegian Wood","author_id":3,"store_id":3,"category":"Fiction","price":18.00
|
| 304 |
-
{"book_id":106,"title":"Kafka on the Shore","author_id":3,"store_id":1,"category":"Fiction","price":21.00
|
| 305 |
-
{"book_id":107,"title":"Things Fall Apart","author_id":4,"store_id":1,"category":"Fiction","price":10.00
|
| 306 |
-
{"book_id":108,"title":"Pride and Prejudice","author_id":5,"store_id":2,"category":"Fiction","price":9.00
|
| 307 |
-
{"book_id":109,"title":"Harry Potter and the Sorcerer's Stone","author_id":6,"store_id":3,"category":"Children","price":22.00
|
| 308 |
-
{"book_id":110,"title":"Harry Potter and the Chamber of Secrets","author_id":6,"store_id":3,"category":"Children","price":23.00
|
| 309 |
-
{"book_id":111,"title":"Sapiens","author_id":7,"store_id":1,"category":"History","price":26.00
|
| 310 |
-
{"book_id":112,"title":"Homo Deus","author_id":7,"store_id":2,"category":"History","price":28.00
|
| 311 |
],
|
| 312 |
},
|
| 313 |
]
|
|
@@ -343,102 +384,82 @@ FALLBACK_PACKS["bookstore"] = {
|
|
| 343 |
]},
|
| 344 |
]
|
| 345 |
}
|
| 346 |
-
|
| 347 |
# --- Retail sales ---
|
| 348 |
FALLBACK_PACKS["retail sales"] = {
|
| 349 |
"schema": {
|
| 350 |
"domain": "retail sales",
|
| 351 |
"tables": [
|
| 352 |
-
{
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
{"columns":["product_id"],"ref_table":"products","ref_columns":["product_id"]}
|
| 424 |
-
],
|
| 425 |
-
"rows":[
|
| 426 |
-
{"order_id":201,"product_id":101,"qty":1,"unit_price":49.99},
|
| 427 |
-
{"order_id":201,"product_id":104,"qty":2,"unit_price":9.99},
|
| 428 |
-
{"order_id":202,"product_id":107,"qty":3,"unit_price":15.00},
|
| 429 |
-
{"order_id":203,"product_id":103,"qty":1,"unit_price":79.00},
|
| 430 |
-
{"order_id":203,"product_id":105,"qty":5,"unit_price":3.49},
|
| 431 |
-
{"order_id":204,"product_id":102,"qty":2,"unit_price":29.99},
|
| 432 |
-
{"order_id":205,"product_id":108,"qty":1,"unit_price":65.00},
|
| 433 |
-
{"order_id":206,"product_id":106,"qty":2,"unit_price":19.99},
|
| 434 |
-
{"order_id":207,"product_id":104,"qty":4,"unit_price":9.99},
|
| 435 |
-
{"order_id":208,"product_id":101,"qty":1,"unit_price":49.99},
|
| 436 |
-
{"order_id":209,"product_id":107,"qty":2,"unit_price":15.00},
|
| 437 |
-
{"order_id":210,"product_id":103,"qty":1,"unit_price":79.00}
|
| 438 |
-
]
|
| 439 |
-
}
|
| 440 |
-
]
|
| 441 |
-
},
|
| 442 |
"questions":[
|
| 443 |
{"id":"RS_Q01","category":"SELECT *","difficulty":1,
|
| 444 |
"prompt_md":"Show everything from `customers`.",
|
|
@@ -470,7 +491,6 @@ FALLBACK_PACKS["retail sales"] = {
|
|
| 470 |
]}
|
| 471 |
]
|
| 472 |
}
|
| 473 |
-
|
| 474 |
# --- Wholesaler ---
|
| 475 |
FALLBACK_PACKS["wholesaler"] = {
|
| 476 |
"schema":{
|
|
@@ -480,22 +500,19 @@ FALLBACK_PACKS["wholesaler"] = {
|
|
| 480 |
"columns":[
|
| 481 |
{"name":"supplier_id","type":"INTEGER"},
|
| 482 |
{"name":"supplier_name","type":"TEXT"},
|
| 483 |
-
{"name":"country","type":"TEXT"}
|
| 484 |
-
],
|
| 485 |
"fks":[],
|
| 486 |
"rows":[
|
| 487 |
{"supplier_id":1,"supplier_name":"Nordic Foods","country":"SE"},
|
| 488 |
{"supplier_id":2,"supplier_name":"Metro Trade","country":"DE"},
|
| 489 |
{"supplier_id":3,"supplier_name":"Pacific Imports","country":"US"},
|
| 490 |
{"supplier_id":4,"supplier_name":"Andes Supply","country":"CL"},
|
| 491 |
-
{"supplier_id":5,"supplier_name":"Sahara Wholesale","country":"MA"}
|
| 492 |
-
]},
|
| 493 |
{"name":"items","pk":["item_id"],
|
| 494 |
"columns":[
|
| 495 |
{"name":"item_id","type":"INTEGER"},
|
| 496 |
{"name":"item_name","type":"TEXT"},
|
| 497 |
-
{"name":"unit_cost","type":"REAL"}
|
| 498 |
-
],
|
| 499 |
"fks":[],
|
| 500 |
"rows":[
|
| 501 |
{"item_id":101,"item_name":"Olive Oil 1L","unit_cost":4.20},
|
|
@@ -505,14 +522,12 @@ FALLBACK_PACKS["wholesaler"] = {
|
|
| 505 |
{"item_id":105,"item_name":"Peanut Butter","unit_cost":3.00},
|
| 506 |
{"item_id":106,"item_name":"Tomato Paste","unit_cost":0.95},
|
| 507 |
{"item_id":107,"item_name":"Chickpeas 1kg","unit_cost":1.60},
|
| 508 |
-
{"item_id":108,"item_name":"Soy Sauce 500ml","unit_cost":2.10}
|
| 509 |
-
]},
|
| 510 |
{"name":"purchase_orders","pk":["po_id"],
|
| 511 |
"columns":[
|
| 512 |
{"name":"po_id","type":"INTEGER"},
|
| 513 |
{"name":"supplier_id","type":"INTEGER"},
|
| 514 |
-
{"name":"po_date","type":"TEXT"}
|
| 515 |
-
],
|
| 516 |
"fks":[{"columns":["supplier_id"],"ref_table":"suppliers","ref_columns":["supplier_id"]}],
|
| 517 |
"rows":[
|
| 518 |
{"po_id":201,"supplier_id":1,"po_date":"2024-01-10"},
|
|
@@ -520,19 +535,16 @@ FALLBACK_PACKS["wholesaler"] = {
|
|
| 520 |
{"po_id":203,"supplier_id":3,"po_date":"2024-02-05"},
|
| 521 |
{"po_id":204,"supplier_id":1,"po_date":"2024-02-22"},
|
| 522 |
{"po_id":205,"supplier_id":5,"po_date":"2024-03-01"},
|
| 523 |
-
{"po_id":206,"supplier_id":4,"po_date":"2024-03-07"}
|
| 524 |
-
]},
|
| 525 |
{"name":"po_lines","pk":["po_id","item_id"],
|
| 526 |
"columns":[
|
| 527 |
{"name":"po_id","type":"INTEGER"},
|
| 528 |
{"name":"item_id","type":"INTEGER"},
|
| 529 |
{"name":"qty","type":"INTEGER"},
|
| 530 |
-
{"name":"line_cost","type":"REAL"}
|
| 531 |
-
],
|
| 532 |
"fks":[
|
| 533 |
{"columns":["po_id"],"ref_table":"purchase_orders","ref_columns":["po_id"]},
|
| 534 |
-
{"columns":["item_id"],"ref_table":"items","ref_columns":["item_id"]}
|
| 535 |
-
],
|
| 536 |
"rows":[
|
| 537 |
{"po_id":201,"item_id":101,"qty":200,"line_cost":840.0},
|
| 538 |
{"po_id":201,"item_id":106,"qty":500,"line_cost":475.0},
|
|
@@ -541,9 +553,7 @@ FALLBACK_PACKS["wholesaler"] = {
|
|
| 541 |
{"po_id":203,"item_id":104,"qty":150,"line_cost":405.0},
|
| 542 |
{"po_id":204,"item_id":105,"qty":180,"line_cost":540.0},
|
| 543 |
{"po_id":205,"item_id":107,"qty":300,"line_cost":480.0},
|
| 544 |
-
{"po_id":206,"item_id":108,"qty":250,"line_cost":525.0}
|
| 545 |
-
]}
|
| 546 |
-
]
|
| 547 |
},
|
| 548 |
"questions":[
|
| 549 |
{"id":"W_Q01","category":"SELECT *","difficulty":1,
|
|
@@ -576,7 +586,6 @@ FALLBACK_PACKS["wholesaler"] = {
|
|
| 576 |
]}
|
| 577 |
]
|
| 578 |
}
|
| 579 |
-
|
| 580 |
# --- Sales tax ---
|
| 581 |
FALLBACK_PACKS["sales tax"] = {
|
| 582 |
"schema":{
|
|
@@ -586,8 +595,7 @@ FALLBACK_PACKS["sales tax"] = {
|
|
| 586 |
"columns":[
|
| 587 |
{"name":"jurisdiction_id","type":"INTEGER"},
|
| 588 |
{"name":"name","type":"TEXT"},
|
| 589 |
-
{"name":"state","type":"TEXT"}
|
| 590 |
-
],
|
| 591 |
"fks":[],
|
| 592 |
"rows":[
|
| 593 |
{"jurisdiction_id":1,"name":"King County","state":"WA"},
|
|
@@ -595,32 +603,28 @@ FALLBACK_PACKS["sales tax"] = {
|
|
| 595 |
{"jurisdiction_id":3,"name":"Maricopa","state":"AZ"},
|
| 596 |
{"jurisdiction_id":4,"name":"Travis","state":"TX"},
|
| 597 |
{"jurisdiction_id":5,"name":"Denver","state":"CO"},
|
| 598 |
-
{"jurisdiction_id":6,"name":"Miami-Dade","state":"FL"}
|
| 599 |
-
]},
|
| 600 |
{"name":"tax_rates","pk":["rate_id"],
|
| 601 |
"columns":[
|
| 602 |
{"name":"rate_id","type":"INTEGER"},
|
| 603 |
{"name":"jurisdiction_id","type":"INTEGER"},
|
| 604 |
{"name":"category","type":"TEXT"},
|
| 605 |
-
{"name":"rate","type":"REAL"}
|
| 606 |
-
],
|
| 607 |
"fks":[{"columns":["jurisdiction_id"],"ref_table":"jurisdictions","ref_columns":["jurisdiction_id"]}],
|
| 608 |
"rows":[
|
| 609 |
{"rate_id":101,"jurisdiction_id":1,"category":"general","rate":0.102},
|
| 610 |
-
{"rate_id":102,"jurisdiction_id":2,"category":"general","rate":0.000},
|
| 611 |
{"rate_id":103,"jurisdiction_id":3,"category":"general","rate":0.056},
|
| 612 |
{"rate_id":104,"jurisdiction_id":4,"category":"general","rate":0.0825},
|
| 613 |
{"rate_id":105,"jurisdiction_id":5,"category":"general","rate":0.081},
|
| 614 |
-
{"rate_id":106,"jurisdiction_id":6,"category":"general","rate":0.070}
|
| 615 |
-
]},
|
| 616 |
{"name":"transactions","pk":["txn_id"],
|
| 617 |
"columns":[
|
| 618 |
{"name":"txn_id","type":"INTEGER"},
|
| 619 |
{"name":"txn_date","type":"TEXT"},
|
| 620 |
{"name":"amount","type":"REAL"},
|
| 621 |
{"name":"category","type":"TEXT"},
|
| 622 |
-
{"name":"jurisdiction_id","type":"INTEGER"}
|
| 623 |
-
],
|
| 624 |
"fks":[{"columns":["jurisdiction_id"],"ref_table":"jurisdictions","ref_columns":["jurisdiction_id"]}],
|
| 625 |
"rows":[
|
| 626 |
{"txn_id":201,"txn_date":"2024-01-03","amount":120.00,"category":"general","jurisdiction_id":1},
|
|
@@ -632,9 +636,7 @@ FALLBACK_PACKS["sales tax"] = {
|
|
| 632 |
{"txn_id":207,"txn_date":"2024-03-09","amount":70.00,"category":"general","jurisdiction_id":1},
|
| 633 |
{"txn_id":208,"txn_date":"2024-03-15","amount":18.50,"category":"general","jurisdiction_id":2},
|
| 634 |
{"txn_id":209,"txn_date":"2024-03-20","amount":99.95,"category":"general","jurisdiction_id":3},
|
| 635 |
-
{"txn_id":210,"txn_date":"2024-03-25","amount":199.99,"category":"general","jurisdiction_id":4}
|
| 636 |
-
]}
|
| 637 |
-
]
|
| 638 |
},
|
| 639 |
"questions":[
|
| 640 |
{"id":"TX_Q01","category":"SELECT *","difficulty":1,
|
|
@@ -667,7 +669,6 @@ FALLBACK_PACKS["sales tax"] = {
|
|
| 667 |
]}
|
| 668 |
]
|
| 669 |
}
|
| 670 |
-
|
| 671 |
# --- Oil & gas wells ---
|
| 672 |
FALLBACK_PACKS["oil and gas wells"] = {
|
| 673 |
"schema":{
|
|
@@ -679,8 +680,7 @@ FALLBACK_PACKS["oil and gas wells"] = {
|
|
| 679 |
{"name":"well_name","type":"TEXT"},
|
| 680 |
{"name":"location","type":"TEXT"},
|
| 681 |
{"name":"status","type":"TEXT"},
|
| 682 |
-
{"name":"depth","type":"INTEGER"}
|
| 683 |
-
],
|
| 684 |
"fks":[],
|
| 685 |
"rows":[
|
| 686 |
{"well_id":1,"well_name":"Alpha-1","location":"TX-TRV","status":"producing","depth":12000},
|
|
@@ -688,47 +688,40 @@ FALLBACK_PACKS["oil and gas wells"] = {
|
|
| 688 |
{"well_id":3,"well_name":"Cedar-7","location":"OK-CAD","status":"producing","depth":9800},
|
| 689 |
{"well_id":4,"well_name":"Delta-3","location":"ND-WIL","status":"drilling","depth":7000},
|
| 690 |
{"well_id":5,"well_name":"Eagle-5","location":"CO-DNV","status":"producing","depth":8500},
|
| 691 |
-
{"well_id":6,"well_name":"Fox-9","location":"NM-LEA","status":"producing","depth":11000}
|
| 692 |
-
]},
|
| 693 |
{"name":"operators","pk":["operator_id"],
|
| 694 |
"columns":[
|
| 695 |
{"name":"operator_id","type":"INTEGER"},
|
| 696 |
{"name":"name","type":"TEXT"},
|
| 697 |
-
{"name":"contact","type":"TEXT"}
|
| 698 |
-
],
|
| 699 |
"fks":[],
|
| 700 |
"rows":[
|
| 701 |
{"operator_id":10,"name":"PetroMax","contact":"pmx@example.com"},
|
| 702 |
{"operator_id":11,"name":"BlueRock Energy","contact":"blue@example.com"},
|
| 703 |
{"operator_id":12,"name":"HighPlains LLC","contact":"hp@example.com"},
|
| 704 |
-
{"operator_id":13,"name":"Mesa Oil","contact":"mesa@example.com"}
|
| 705 |
-
]},
|
| 706 |
{"name":"well_operators","pk":["well_id","operator_id"],
|
| 707 |
"columns":[
|
| 708 |
{"name":"well_id","type":"INTEGER"},
|
| 709 |
{"name":"operator_id","type":"INTEGER"},
|
| 710 |
-
{"name":"start_date","type":"TEXT"}
|
| 711 |
-
],
|
| 712 |
"fks":[
|
| 713 |
{"columns":["well_id"],"ref_table":"wells","ref_columns":["well_id"]},
|
| 714 |
-
{"columns":["operator_id"],"ref_table":"operators","ref_columns":["operator_id"]}
|
| 715 |
-
],
|
| 716 |
"rows":[
|
| 717 |
{"well_id":1,"operator_id":10,"start_date":"2023-01-01"},
|
| 718 |
{"well_id":2,"operator_id":10,"start_date":"2023-06-01"},
|
| 719 |
{"well_id":3,"operator_id":11,"start_date":"2022-03-15"},
|
| 720 |
{"well_id":4,"operator_id":12,"start_date":"2024-02-01"},
|
| 721 |
{"well_id":5,"operator_id":13,"start_date":"2022-10-10"},
|
| 722 |
-
{"well_id":6,"operator_id":11,"start_date":"2021-08-05"}
|
| 723 |
-
]},
|
| 724 |
{"name":"production","pk":["prod_id"],
|
| 725 |
"columns":[
|
| 726 |
{"name":"prod_id","type":"INTEGER"},
|
| 727 |
{"name":"well_id","type":"INTEGER"},
|
| 728 |
{"name":"month","type":"TEXT"},
|
| 729 |
{"name":"oil_bbl","type":"REAL"},
|
| 730 |
-
{"name":"gas_mcf","type":"REAL"}
|
| 731 |
-
],
|
| 732 |
"fks":[{"columns":["well_id"],"ref_table":"wells","ref_columns":["well_id"]}],
|
| 733 |
"rows":[
|
| 734 |
{"prod_id":1001,"well_id":1,"month":"2024-01","oil_bbl":1200,"gas_mcf":5000},
|
|
@@ -738,9 +731,7 @@ FALLBACK_PACKS["oil and gas wells"] = {
|
|
| 738 |
{"prod_id":1005,"well_id":5,"month":"2024-01","oil_bbl":600,"gas_mcf":2200},
|
| 739 |
{"prod_id":1006,"well_id":6,"month":"2024-01","oil_bbl":750,"gas_mcf":2600},
|
| 740 |
{"prod_id":1007,"well_id":2,"month":"2024-01","oil_bbl":0,"gas_mcf":0},
|
| 741 |
-
{"prod_id":1008,"well_id":4,"month":"2024-02","oil_bbl":100,"gas_mcf":400}
|
| 742 |
-
]}
|
| 743 |
-
]
|
| 744 |
},
|
| 745 |
"questions":[
|
| 746 |
{"id":"OG_Q01","category":"SELECT *","difficulty":1,
|
|
@@ -773,7 +764,6 @@ FALLBACK_PACKS["oil and gas wells"] = {
|
|
| 773 |
]}
|
| 774 |
]
|
| 775 |
}
|
| 776 |
-
|
| 777 |
# --- Marketing ---
|
| 778 |
FALLBACK_PACKS["marketing"] = {
|
| 779 |
"schema":{
|
|
@@ -782,23 +772,20 @@ FALLBACK_PACKS["marketing"] = {
|
|
| 782 |
{"name":"channels","pk":["channel_id"],
|
| 783 |
"columns":[
|
| 784 |
{"name":"channel_id","type":"INTEGER"},
|
| 785 |
-
{"name":"channel_name","type":"TEXT"}
|
| 786 |
-
],
|
| 787 |
"fks":[],
|
| 788 |
"rows":[
|
| 789 |
{"channel_id":1,"channel_name":"Search"},
|
| 790 |
{"channel_id":2,"channel_name":"Social"},
|
| 791 |
{"channel_id":3,"channel_name":"Email"},
|
| 792 |
-
{"channel_id":4,"channel_name":"Display"}
|
| 793 |
-
]},
|
| 794 |
{"name":"campaigns","pk":["campaign_id"],
|
| 795 |
"columns":[
|
| 796 |
{"name":"campaign_id","type":"INTEGER"},
|
| 797 |
{"name":"campaign_name","type":"TEXT"},
|
| 798 |
{"name":"channel_id","type":"INTEGER"},
|
| 799 |
{"name":"start_date","type":"TEXT"},
|
| 800 |
-
{"name":"budget","type":"REAL"}
|
| 801 |
-
],
|
| 802 |
"fks":[{"columns":["channel_id"],"ref_table":"channels","ref_columns":["channel_id"]}],
|
| 803 |
"rows":[
|
| 804 |
{"campaign_id":101,"campaign_name":"Spring Search","channel_id":1,"start_date":"2024-03-01","budget":5000},
|
|
@@ -806,16 +793,14 @@ FALLBACK_PACKS["marketing"] = {
|
|
| 806 |
{"campaign_id":103,"campaign_name":"Welcome Email","channel_id":3,"start_date":"2024-03-07","budget":1000},
|
| 807 |
{"campaign_id":104,"campaign_name":"Retargeting","channel_id":4,"start_date":"2024-03-10","budget":2000},
|
| 808 |
{"campaign_id":105,"campaign_name":"Summer Search","channel_id":1,"start_date":"2024-06-01","budget":6000},
|
| 809 |
-
{"campaign_id":106,"campaign_name":"Promo Social","channel_id":2,"start_date":"2024-06-05","budget":3500}
|
| 810 |
-
]},
|
| 811 |
{"name":"ad_stats","pk":["campaign_id","day"],
|
| 812 |
"columns":[
|
| 813 |
{"name":"campaign_id","type":"INTEGER"},
|
| 814 |
{"name":"day","type":"TEXT"},
|
| 815 |
{"name":"impressions","type":"INTEGER"},
|
| 816 |
{"name":"clicks","type":"INTEGER"},
|
| 817 |
-
{"name":"spend","type":"REAL"}
|
| 818 |
-
],
|
| 819 |
"fks":[{"columns":["campaign_id"],"ref_table":"campaigns","ref_columns":["campaign_id"]}],
|
| 820 |
"rows":[
|
| 821 |
{"campaign_id":101,"day":"2024-03-12","impressions":10000,"clicks":500,"spend":200.0},
|
|
@@ -823,16 +808,14 @@ FALLBACK_PACKS["marketing"] = {
|
|
| 823 |
{"campaign_id":102,"day":"2024-03-12","impressions":8000,"clicks":400,"spend":150.0},
|
| 824 |
{"campaign_id":103,"day":"2024-03-12","impressions":5000,"clicks":250,"spend":80.0},
|
| 825 |
{"campaign_id":104,"day":"2024-03-12","impressions":7000,"clicks":210,"spend":110.0},
|
| 826 |
-
{"campaign_id":106,"day":"2024-06-12","impressions":9500,"clicks":520,"spend":190.0}
|
| 827 |
-
]},
|
| 828 |
{"name":"leads","pk":["lead_id"],
|
| 829 |
"columns":[
|
| 830 |
{"name":"lead_id","type":"INTEGER"},
|
| 831 |
{"name":"campaign_id","type":"INTEGER"},
|
| 832 |
{"name":"source","type":"TEXT"},
|
| 833 |
{"name":"qualified","type":"INTEGER"},
|
| 834 |
-
{"name":"revenue","type":"REAL"}
|
| 835 |
-
],
|
| 836 |
"fks":[{"columns":["campaign_id"],"ref_table":"campaigns","ref_columns":["campaign_id"]}],
|
| 837 |
"rows":[
|
| 838 |
{"lead_id":1,"campaign_id":101,"source":"LP1","qualified":1,"revenue":400},
|
|
@@ -840,9 +823,7 @@ FALLBACK_PACKS["marketing"] = {
|
|
| 840 |
{"lead_id":3,"campaign_id":102,"source":"FB","qualified":1,"revenue":250},
|
| 841 |
{"lead_id":4,"campaign_id":103,"source":"Email","qualified":1,"revenue":300},
|
| 842 |
{"lead_id":5,"campaign_id":104,"source":"DSP","qualified":0,"revenue":0},
|
| 843 |
-
{"lead_id":6,"campaign_id":106,"source":"FB","qualified":1,"revenue":500}
|
| 844 |
-
]}
|
| 845 |
-
]
|
| 846 |
},
|
| 847 |
"questions":[
|
| 848 |
{"id":"M_Q01","category":"SELECT *","difficulty":1,
|
|
@@ -985,6 +966,13 @@ def _canon_question(q: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
|
| 985 |
req_aliases = [a.strip() for a in re.split(r"[,\s]+", req_aliases) if a.strip()]
|
| 986 |
elif not isinstance(req_aliases, list):
|
| 987 |
req_aliases = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 988 |
return {
|
| 989 |
"id": str(q.get("id") or f"LLM_{int(time.time()*1000)}_{random.randint(100,999)}"),
|
| 990 |
"category": cat,
|
|
@@ -1122,7 +1110,6 @@ def install_schema(con: sqlite3.Connection, schema: Dict[str,Any]):
|
|
| 1122 |
def bootstrap_domain_with_llm_or_fallback(prev_domain: Optional[str], preferred_domain: str):
|
| 1123 |
obj, err, model_used, stats = llm_generate_domain_and_questions(prev_domain, preferred_domain)
|
| 1124 |
if obj is None:
|
| 1125 |
-
# Use domain-specific fallback pack
|
| 1126 |
schema, questions = get_fallback_pack_for(preferred_domain)
|
| 1127 |
info = {"source":"fallback","model":None,"error":err,"accepted":0,"dropped":0}
|
| 1128 |
return schema, questions, info
|
|
@@ -1132,7 +1119,6 @@ def install_schema_and_prepare_questions(prev_domain: Optional[str], preferred_d
|
|
| 1132 |
schema, questions, info = bootstrap_domain_with_llm_or_fallback(prev_domain, preferred_domain)
|
| 1133 |
install_schema(CONN, schema)
|
| 1134 |
if not questions:
|
| 1135 |
-
# fallback to domain pack (still preferred)
|
| 1136 |
schema2, questions2 = get_fallback_pack_for(preferred_domain)
|
| 1137 |
install_schema(CONN, schema2)
|
| 1138 |
schema, questions, info = schema2, questions2, {"source":"fallback","model":None,"error":"No usable questions from LLM","accepted":0,"dropped":0}
|
|
@@ -1159,7 +1145,6 @@ def choose_next_domain(prev_domain: Optional[str]) -> str:
|
|
| 1159 |
return ALLOWED_DOMAINS[DOMAIN_CYCLE_POS]
|
| 1160 |
|
| 1161 |
# -------------------- Initialize first domain --------------------
|
| 1162 |
-
# Start with bookstore for determinism on first boot
|
| 1163 |
CURRENT_SCHEMA, CURRENT_QS, CURRENT_INFO = install_schema_and_prepare_questions(prev_domain=None, preferred_domain="bookstore")
|
| 1164 |
DOMAIN_CYCLE_POS = _index_of_domain(CURRENT_SCHEMA.get("domain","bookstore"))
|
| 1165 |
|
|
@@ -1250,17 +1235,27 @@ def detect_cartesian(con: sqlite3.Connection, sql: str, df_result: pd.DataFrame)
|
|
| 1250 |
return "Possible cartesian product: no join condition detected."
|
| 1251 |
return None
|
| 1252 |
|
|
|
|
| 1253 |
def should_enforce_columns(q: Dict[str, Any]) -> bool:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1254 |
cat = (q.get("category") or "").strip()
|
| 1255 |
-
if cat in ("SELECT columns", "
|
| 1256 |
return True
|
| 1257 |
-
|
| 1258 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1259 |
return True
|
| 1260 |
-
if re.search(r"\((?:show|return|display)[^)]+\)",
|
| 1261 |
return True
|
| 1262 |
-
if re.search(r"\b(show|return|display|select)\b[^.]{0,
|
| 1263 |
return True
|
|
|
|
| 1264 |
return False
|
| 1265 |
|
| 1266 |
def _normalize_columns(df: pd.DataFrame) -> pd.DataFrame:
|
|
@@ -1363,13 +1358,19 @@ def answer_df(answer_sql: List[str]) -> Optional[pd.DataFrame]:
|
|
| 1363 |
continue
|
| 1364 |
return None
|
| 1365 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1366 |
def validate_answer(q: Dict[str,Any], student_sql: str, df_student: Optional[pd.DataFrame]) -> Tuple[bool, str]:
|
| 1367 |
df_expected = answer_df(q["answer_sql"])
|
| 1368 |
if df_expected is None:
|
| 1369 |
return (df_student is not None), f"**Explanation:** Your statement executed successfully for this task."
|
| 1370 |
if df_student is None:
|
| 1371 |
return False, f"**Explanation:** Expected data result differs."
|
| 1372 |
-
|
|
|
|
|
|
|
| 1373 |
ok, note = results_equal_or_superset(df_student, df_expected)
|
| 1374 |
if ok and note == "extra_columns":
|
| 1375 |
return True, "**Note:** You returned extra columns. The rows match; try selecting only the requested columns next time."
|
|
@@ -1377,9 +1378,11 @@ def validate_answer(q: Dict[str,Any], student_sql: str, df_student: Optional[pd.
|
|
| 1377 |
return True, "**Explanation:** Your result matches a canonical solution."
|
| 1378 |
return False, f"**Explanation:** Compare your result to a canonical solution."
|
| 1379 |
else:
|
| 1380 |
-
|
| 1381 |
-
if
|
| 1382 |
return True, "**Explanation:** Columns weren’t specified for this task; row count matches the canonical answer."
|
|
|
|
|
|
|
| 1383 |
return False, "**Explanation:** For this task we compared row counts (projection not enforced) and they didn’t match."
|
| 1384 |
|
| 1385 |
def log_attempt(user_id: str, qid: str, category: str, correct: bool, sql_text: str,
|
|
@@ -1432,7 +1435,8 @@ def render_preview(sql_text: str, session: dict):
|
|
| 1432 |
return gr.update(value="", visible=False), draw_dynamic_erd(CURRENT_SCHEMA)
|
| 1433 |
hi_tables, hi_edges = sql_highlights(s, CURRENT_SCHEMA)
|
| 1434 |
erd = draw_dynamic_erd(CURRENT_SCHEMA, highlight_tables=hi_tables, highlight_edges=hi_edges)
|
| 1435 |
-
|
|
|
|
| 1436 |
|
| 1437 |
def submit_answer(sql_text: str, session: dict):
|
| 1438 |
if not session or "user_id" not in session or "q" not in session:
|
|
@@ -1561,14 +1565,28 @@ def preview_table(tbl: str):
|
|
| 1561 |
return pd.DataFrame([{"error": str(e)}])
|
| 1562 |
|
| 1563 |
# -------------------- UI --------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1564 |
with gr.Blocks(title="Adaptive SQL Trainer — Randomized Domains") as demo:
|
|
|
|
| 1565 |
gr.Markdown(
|
| 1566 |
"""
|
| 1567 |
# 🧪 Adaptive SQL Trainer — Randomized Domains (SQLite)
|
| 1568 |
- Uses **OpenAI** (if configured) to randomize a domain (bookstore, retail sales, wholesaler,
|
| 1569 |
sales tax, oil & gas wells, marketing), generate **3–4 tables** and **8–12** questions.
|
| 1570 |
- Practice `SELECT`, `WHERE`, `JOIN` (INNER/LEFT), **aliases**, **views**, and **CTAS / SELECT INTO**.
|
| 1571 |
-
- **ERD
|
| 1572 |
"""
|
| 1573 |
)
|
| 1574 |
|
|
@@ -1659,4 +1677,4 @@ with gr.Blocks(title="Adaptive SQL Trainer — Randomized Domains") as demo:
|
|
| 1659 |
)
|
| 1660 |
|
| 1661 |
if __name__ == "__main__":
|
| 1662 |
-
demo.launch()
|
|
|
|
| 3 |
# - 3–4 related tables with seed rows installed in SQLite.
|
| 4 |
# - Students practice SELECT, WHERE, JOINs (INNER/LEFT), aliases, views, CTAS/SELECT INTO.
|
| 5 |
# - Validator enforces columns only when the prompt asks; otherwise focuses on rows.
|
| 6 |
+
# - ERD shows tables only (no edges) and bolds the tables you reference while typing.
|
| 7 |
# - Domain picker now round-robins across: bookstore, retail sales, wholesaler, sales tax, oil & gas wells, marketing.
|
| 8 |
|
| 9 |
import os
|
|
|
|
| 62 |
def draw_dynamic_erd(
|
| 63 |
schema: Dict[str, Any],
|
| 64 |
highlight_tables: Optional[Set[str]] = None,
|
| 65 |
+
highlight_edges: Optional[Set[Tuple[str, str]]] = None, # kept for API compatibility; ignored
|
| 66 |
) -> Image.Image:
|
| 67 |
"""
|
| 68 |
+
Draw tables (no FK edges). If highlight_tables provided, those table borders are bolded.
|
|
|
|
| 69 |
"""
|
| 70 |
highlight_tables = set(highlight_tables or [])
|
|
|
|
|
|
|
| 71 |
|
| 72 |
tables = schema.get("tables", [])
|
| 73 |
fig, ax = plt.subplots(figsize=PLOT_FIGSIZE); ax.axis("off")
|
|
|
|
| 75 |
ax.text(0.5, 0.5, "No tables to diagram.", ha="center", va="center")
|
| 76 |
return _fig_to_pil(fig)
|
| 77 |
|
| 78 |
+
# Layout tables horizontally
|
| 79 |
n = len(tables)
|
| 80 |
margin = 0.03
|
| 81 |
width = (1 - margin * (n + 1)) / max(n, 1)
|
| 82 |
height = 0.70
|
| 83 |
y = 0.20
|
| 84 |
|
| 85 |
+
# Draw table boxes + columns (no edges)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
for i, t in enumerate(tables):
|
| 87 |
tx = margin + i * (width + margin)
|
| 88 |
+
|
| 89 |
+
# Border weight highlights usage
|
| 90 |
lw = 2.0 if t["name"] in highlight_tables else 1.2
|
| 91 |
ax.add_patch(Rectangle((tx, y), width, height, fill=False, lw=lw))
|
| 92 |
ax.text(tx + 0.01, y + height - 0.04, t["name"], fontsize=10, ha="left", va="top", weight="bold")
|
|
|
|
| 110 |
ax.text(tx + 0.016, yy, f"{nm}{tag}", fontsize=9, ha="left", va="top")
|
| 111 |
yy -= 0.055
|
| 112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
ax.text(0.5, 0.06, f"Domain: {schema.get('domain','unknown')}", fontsize=9, ha="center")
|
| 114 |
return _fig_to_pil(fig)
|
| 115 |
|
| 116 |
+
# Parse JOINs from SQL to highlight tables (edges ignored now)
|
| 117 |
JOIN_TBL_RE = re.compile(r"\b(?:from|join)\s+([a-z_]\w*)(?:\s+(?:as\s+)?([a-z_]\w*))?", re.IGNORECASE)
|
| 118 |
EQ_ON_RE = re.compile(r"([a-z_]\w*)\.[a-z_]\w*\s*=\s*([a-z_]\w*)\.[a-z_]\w*", re.IGNORECASE)
|
| 119 |
USING_RE = re.compile(r"\bjoin\s+([a-z_]\w*)(?:\s+(?:as\s+)?([a-z_]\w*))?\s+using\s*\(", re.IGNORECASE)
|
|
|
|
| 132 |
alias_to_table[alias] = table
|
| 133 |
join_order.append(alias)
|
| 134 |
|
| 135 |
+
# Edges are computed but not used by the renderer anymore
|
| 136 |
edges: Set[Tuple[str, str]] = set()
|
| 137 |
for a1, a2 in EQ_ON_RE.findall(low):
|
| 138 |
t1 = alias_to_table.get(a1, a1)
|
|
|
|
| 149 |
|
| 150 |
used_tables = {alias_to_table.get(a, a) for a in join_order}
|
| 151 |
schema_tables = {t["name"] for t in schema.get("tables", [])}
|
| 152 |
+
edges = {(a, b) for (a, b) in edges if a in schema_tables and b in schema_tables}
|
| 153 |
+
used_tables = {t for t in used_tables if t in schema_tables}
|
| 154 |
return used_tables, edges
|
| 155 |
|
| 156 |
+
# -------------------- SQL keyword highlighting (preview) --------------------
|
| 157 |
+
def _html_escape(s: str) -> str:
|
| 158 |
+
return s.replace("&", "&").replace("<", "<").replace(">", ">")
|
| 159 |
+
|
| 160 |
+
# Multi-word phrases first (to avoid double-highlighting)
|
| 161 |
+
PHRASE_CLASSES = [
|
| 162 |
+
(r"\bgroup\s+by\b", "kw-group"),
|
| 163 |
+
(r"\border\s+by\b", "kw-order"),
|
| 164 |
+
(r"\bleft\s+join\b", "kw-join"),
|
| 165 |
+
(r"\binner\s+join\b", "kw-join"),
|
| 166 |
+
(r"\bright\s+join\b", "kw-join"),
|
| 167 |
+
(r"\bfull\s+(?:outer\s+)?join\b", "kw-join"),
|
| 168 |
+
(r"\bcreate\s+view\b", "kw-ddl"),
|
| 169 |
+
(r"\bcreate\s+table\b", "kw-ddl"),
|
| 170 |
+
(r"\binsert\s+into\b", "kw-ddl"),
|
| 171 |
+
(r"\bselect\s+into\b", "kw-ddl"),
|
| 172 |
+
(r"\bprimary\s+key\b", "kw-ddl"),
|
| 173 |
+
(r"\bforeign\s+key\b", "kw-ddl"),
|
| 174 |
+
]
|
| 175 |
+
|
| 176 |
+
TOKEN_CLASSES = [
|
| 177 |
+
(r"\bselect\b", "kw-core"),
|
| 178 |
+
(r"\bfrom\b", "kw-core"),
|
| 179 |
+
(r"\bwhere\b", "kw-core"),
|
| 180 |
+
(r"\bjoin\b", "kw-join"),
|
| 181 |
+
(r"\bon\b", "kw-join"),
|
| 182 |
+
(r"\busing\b", "kw-join"),
|
| 183 |
+
(r"\bhaving\b", "kw-group"),
|
| 184 |
+
(r"\blimit\b", "kw-order"),
|
| 185 |
+
(r"\boffset\b", "kw-order"),
|
| 186 |
+
(r"\bdistinct\b", "kw-core"),
|
| 187 |
+
(r"\bcase\b", "kw-core"),
|
| 188 |
+
(r"\bwhen\b", "kw-core"),
|
| 189 |
+
(r"\bthen\b", "kw-core"),
|
| 190 |
+
(r"\belse\b", "kw-core"),
|
| 191 |
+
(r"\bend\b", "kw-core"),
|
| 192 |
+
(r"\band\b", "kw-core"),
|
| 193 |
+
(r"\bor\b", "kw-core"),
|
| 194 |
+
(r"\bnot\b", "kw-core"),
|
| 195 |
+
(r"\bin\b", "kw-core"),
|
| 196 |
+
(r"\bis\b", "kw-core"),
|
| 197 |
+
(r"\bnull\b", "kw-core"),
|
| 198 |
+
(r"\blike\b", "kw-core"),
|
| 199 |
+
(r"\bbetween\b", "kw-core"),
|
| 200 |
+
(r"\bunion\b", "kw-core"),
|
| 201 |
+
(r"\ball\b", "kw-core"),
|
| 202 |
+
(r"\bupdate\b", "kw-ddl"),
|
| 203 |
+
(r"\bdelete\b", "kw-ddl"),
|
| 204 |
+
(r"\bview\b", "kw-ddl"),
|
| 205 |
+
(r"\btable\b", "kw-ddl"),
|
| 206 |
+
(r"\binto\b", "kw-ddl"),
|
| 207 |
+
(r"\bvalues\b", "kw-ddl"),
|
| 208 |
+
(r"\bcreate\b", "kw-ddl"),
|
| 209 |
+
]
|
| 210 |
+
|
| 211 |
+
def highlight_sql_html(sql: str) -> str:
|
| 212 |
+
if not sql:
|
| 213 |
+
return ""
|
| 214 |
+
txt = _html_escape(sql)
|
| 215 |
+
# Phrases first
|
| 216 |
+
for pat, cls in PHRASE_CLASSES:
|
| 217 |
+
txt = re.sub(pat, lambda m: f'<span class="kw {cls}">{m.group(0).upper()}</span>', txt, flags=re.IGNORECASE)
|
| 218 |
+
# Single tokens
|
| 219 |
+
for pat, cls in TOKEN_CLASSES:
|
| 220 |
+
txt = re.sub(pat, lambda m: f'<span class="kw {cls}">{m.group(0).upper()}</span>', txt, flags=re.IGNORECASE)
|
| 221 |
+
return f'<div class="sql-preview"><pre><code>{txt}</code></pre></div>'
|
| 222 |
+
|
| 223 |
# -------------------- SQLite + locking --------------------
|
| 224 |
DB_DIR = "/data" if os.path.exists("/data") else "."
|
| 225 |
DB_PATH = os.path.join(DB_DIR, "sql_trainer_dynamic.db")
|
|
|
|
| 276 |
init_progress_tables(CONN)
|
| 277 |
|
| 278 |
# -------------------- Built-in fallback domain packs --------------------
|
| 279 |
+
# (packs identical to your last version; omitted comments for brevity)
|
| 280 |
FALLBACK_PACKS: Dict[str, Dict[str, Any]] = {}
|
|
|
|
| 281 |
# --- Bookstore (existing) ---
|
| 282 |
FALLBACK_PACKS["bookstore"] = {
|
| 283 |
"schema": {
|
|
|
|
| 341 |
{"book_id":102,"title":"I, Robot","author_id":1,"store_id":1,"category":"Sci-Fi","price":12.50,"published_year":1950},
|
| 342 |
{"book_id":103,"title":"The Left Hand of Darkness","author_id":2,"store_id":2,"category":"Sci-Fi","price":16.00,"published_year":1969},
|
| 343 |
{"book_id":104,"title":"A Wizard of Earthsea","author_id":2,"store_id":2,"category":"Fantasy","price":11.50,"published_year":1968},
|
| 344 |
+
{"book_id":105,"title":"Norwegian Wood","author_id":3,"store_id":3,"category":"Fiction","price":18.00},
|
| 345 |
+
{"book_id":106,"title":"Kafka on the Shore","author_id":3,"store_id":1,"category":"Fiction","price":21.00},
|
| 346 |
+
{"book_id":107,"title":"Things Fall Apart","author_id":4,"store_id":1,"category":"Fiction","price":10.00},
|
| 347 |
+
{"book_id":108,"title":"Pride and Prejudice","author_id":5,"store_id":2,"category":"Fiction","price":9.00},
|
| 348 |
+
{"book_id":109,"title":"Harry Potter and the Sorcerer's Stone","author_id":6,"store_id":3,"category":"Children","price":22.00},
|
| 349 |
+
{"book_id":110,"title":"Harry Potter and the Chamber of Secrets","author_id":6,"store_id":3,"category":"Children","price":23.00},
|
| 350 |
+
{"book_id":111,"title":"Sapiens","author_id":7,"store_id":1,"category":"History","price":26.00},
|
| 351 |
+
{"book_id":112,"title":"Homo Deus","author_id":7,"store_id":2,"category":"History","price":28.00},
|
| 352 |
],
|
| 353 |
},
|
| 354 |
]
|
|
|
|
| 384 |
]},
|
| 385 |
]
|
| 386 |
}
|
|
|
|
| 387 |
# --- Retail sales ---
|
| 388 |
FALLBACK_PACKS["retail sales"] = {
|
| 389 |
"schema": {
|
| 390 |
"domain": "retail sales",
|
| 391 |
"tables": [
|
| 392 |
+
{"name":"customers","pk":["customer_id"],
|
| 393 |
+
"columns":[
|
| 394 |
+
{"name":"customer_id","type":"INTEGER"},
|
| 395 |
+
{"name":"name","type":"TEXT"},
|
| 396 |
+
{"name":"city","type":"TEXT"},
|
| 397 |
+
{"name":"state","type":"TEXT"}],
|
| 398 |
+
"fks":[],
|
| 399 |
+
"rows":[
|
| 400 |
+
{"customer_id":1,"name":"Ava Reed","city":"Seattle","state":"WA"},
|
| 401 |
+
{"customer_id":2,"name":"Mason Ortiz","city":"Portland","state":"OR"},
|
| 402 |
+
{"customer_id":3,"name":"Noah Patel","city":"Phoenix","state":"AZ"},
|
| 403 |
+
{"customer_id":4,"name":"Emma Kim","city":"San Diego","state":"CA"},
|
| 404 |
+
{"customer_id":5,"name":"Olivia Park","city":"Dallas","state":"TX"},
|
| 405 |
+
{"customer_id":6,"name":"Liam Gray","city":"Denver","state":"CO"},
|
| 406 |
+
{"customer_id":7,"name":"Sophia Lee","city":"Boston","state":"MA"},
|
| 407 |
+
{"customer_id":8,"name":"Elijah Hall","city":"Miami","state":"FL"}]},
|
| 408 |
+
{"name":"products","pk":["product_id"],
|
| 409 |
+
"columns":[
|
| 410 |
+
{"name":"product_id","type":"INTEGER"},
|
| 411 |
+
{"name":"product_name","type":"TEXT"},
|
| 412 |
+
{"name":"category","type":"TEXT"},
|
| 413 |
+
{"name":"price","type":"REAL"}],
|
| 414 |
+
"fks":[],
|
| 415 |
+
"rows":[
|
| 416 |
+
{"product_id":101,"product_name":"Coffee Maker","category":"Home","price":49.99},
|
| 417 |
+
{"product_id":102,"product_name":"Electric Kettle","category":"Home","price":29.99},
|
| 418 |
+
{"product_id":103,"product_name":"Headphones","category":"Electronics","price":79.00},
|
| 419 |
+
{"product_id":104,"product_name":"USB-C Cable","category":"Electronics","price":9.99},
|
| 420 |
+
{"product_id":105,"product_name":"Notebook","category":"Stationery","price":3.49},
|
| 421 |
+
{"product_id":106,"product_name":"Desk Lamp","category":"Home","price":19.99},
|
| 422 |
+
{"product_id":107,"product_name":"T-Shirt","category":"Clothing","price":15.00},
|
| 423 |
+
{"product_id":108,"product_name":"Sneakers","category":"Clothing","price":65.00}]},
|
| 424 |
+
{"name":"orders","pk":["order_id"],
|
| 425 |
+
"columns":[
|
| 426 |
+
{"name":"order_id","type":"INTEGER"},
|
| 427 |
+
{"name":"customer_id","type":"INTEGER"},
|
| 428 |
+
{"name":"order_date","type":"TEXT"}],
|
| 429 |
+
"fks":[{"columns":["customer_id"],"ref_table":"customers","ref_columns":["customer_id"]}],
|
| 430 |
+
"rows":[
|
| 431 |
+
{"order_id":201,"customer_id":1,"order_date":"2024-01-05"},
|
| 432 |
+
{"order_id":202,"customer_id":2,"order_date":"2024-01-07"},
|
| 433 |
+
{"order_id":203,"customer_id":1,"order_date":"2024-01-12"},
|
| 434 |
+
{"order_id":204,"customer_id":3,"order_date":"2024-02-01"},
|
| 435 |
+
{"order_id":205,"customer_id":4,"order_date":"2024-02-10"},
|
| 436 |
+
{"order_id":206,"customer_id":5,"order_date":"2024-03-02"},
|
| 437 |
+
{"order_id":207,"customer_id":6,"order_date":"2024-03-03"},
|
| 438 |
+
{"order_id":208,"customer_id":7,"order_date":"2024-03-09"},
|
| 439 |
+
{"order_id":209,"customer_id":8,"order_date":"2024-03-15"},
|
| 440 |
+
{"order_id":210,"customer_id":3,"order_date":"2024-03-20"}]},
|
| 441 |
+
{"name":"order_items","pk":["order_id","product_id"],
|
| 442 |
+
"columns":[
|
| 443 |
+
{"name":"order_id","type":"INTEGER"},
|
| 444 |
+
{"name":"product_id","type":"INTEGER"},
|
| 445 |
+
{"name":"qty","type":"INTEGER"},
|
| 446 |
+
{"name":"unit_price","type":"REAL"}],
|
| 447 |
+
"fks":[
|
| 448 |
+
{"columns":["order_id"],"ref_table":"orders","ref_columns":["order_id"]},
|
| 449 |
+
{"columns":["product_id"],"ref_table":"products","ref_columns":["product_id"]}],
|
| 450 |
+
"rows":[
|
| 451 |
+
{"order_id":201,"product_id":101,"qty":1,"unit_price":49.99},
|
| 452 |
+
{"order_id":201,"product_id":104,"qty":2,"unit_price":9.99},
|
| 453 |
+
{"order_id":202,"product_id":107,"qty":3,"unit_price":15.00},
|
| 454 |
+
{"order_id":203,"product_id":103,"qty":1,"unit_price":79.00},
|
| 455 |
+
{"order_id":203,"product_id":105,"qty":5,"unit_price":3.49},
|
| 456 |
+
{"order_id":204,"product_id":102,"qty":2,"unit_price":29.99},
|
| 457 |
+
{"order_id":205,"product_id":108,"qty":1,"unit_price":65.00},
|
| 458 |
+
{"order_id":206,"product_id":106,"qty":2,"unit_price":19.99},
|
| 459 |
+
{"order_id":207,"product_id":104,"qty":4,"unit_price":9.99},
|
| 460 |
+
{"order_id":208,"product_id":101,"qty":1,"unit_price":49.99},
|
| 461 |
+
{"order_id":209,"product_id":107,"qty":2,"unit_price":15.00},
|
| 462 |
+
{"order_id":210,"product_id":103,"qty":1,"unit_price":79.00}]}] },
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 463 |
"questions":[
|
| 464 |
{"id":"RS_Q01","category":"SELECT *","difficulty":1,
|
| 465 |
"prompt_md":"Show everything from `customers`.",
|
|
|
|
| 491 |
]}
|
| 492 |
]
|
| 493 |
}
|
|
|
|
| 494 |
# --- Wholesaler ---
|
| 495 |
FALLBACK_PACKS["wholesaler"] = {
|
| 496 |
"schema":{
|
|
|
|
| 500 |
"columns":[
|
| 501 |
{"name":"supplier_id","type":"INTEGER"},
|
| 502 |
{"name":"supplier_name","type":"TEXT"},
|
| 503 |
+
{"name":"country","type":"TEXT"}],
|
|
|
|
| 504 |
"fks":[],
|
| 505 |
"rows":[
|
| 506 |
{"supplier_id":1,"supplier_name":"Nordic Foods","country":"SE"},
|
| 507 |
{"supplier_id":2,"supplier_name":"Metro Trade","country":"DE"},
|
| 508 |
{"supplier_id":3,"supplier_name":"Pacific Imports","country":"US"},
|
| 509 |
{"supplier_id":4,"supplier_name":"Andes Supply","country":"CL"},
|
| 510 |
+
{"supplier_id":5,"supplier_name":"Sahara Wholesale","country":"MA"}]},
|
|
|
|
| 511 |
{"name":"items","pk":["item_id"],
|
| 512 |
"columns":[
|
| 513 |
{"name":"item_id","type":"INTEGER"},
|
| 514 |
{"name":"item_name","type":"TEXT"},
|
| 515 |
+
{"name":"unit_cost","type":"REAL"}],
|
|
|
|
| 516 |
"fks":[],
|
| 517 |
"rows":[
|
| 518 |
{"item_id":101,"item_name":"Olive Oil 1L","unit_cost":4.20},
|
|
|
|
| 522 |
{"item_id":105,"item_name":"Peanut Butter","unit_cost":3.00},
|
| 523 |
{"item_id":106,"item_name":"Tomato Paste","unit_cost":0.95},
|
| 524 |
{"item_id":107,"item_name":"Chickpeas 1kg","unit_cost":1.60},
|
| 525 |
+
{"item_id":108,"item_name":"Soy Sauce 500ml","unit_cost":2.10}]},
|
|
|
|
| 526 |
{"name":"purchase_orders","pk":["po_id"],
|
| 527 |
"columns":[
|
| 528 |
{"name":"po_id","type":"INTEGER"},
|
| 529 |
{"name":"supplier_id","type":"INTEGER"},
|
| 530 |
+
{"name":"po_date","type":"TEXT"}],
|
|
|
|
| 531 |
"fks":[{"columns":["supplier_id"],"ref_table":"suppliers","ref_columns":["supplier_id"]}],
|
| 532 |
"rows":[
|
| 533 |
{"po_id":201,"supplier_id":1,"po_date":"2024-01-10"},
|
|
|
|
| 535 |
{"po_id":203,"supplier_id":3,"po_date":"2024-02-05"},
|
| 536 |
{"po_id":204,"supplier_id":1,"po_date":"2024-02-22"},
|
| 537 |
{"po_id":205,"supplier_id":5,"po_date":"2024-03-01"},
|
| 538 |
+
{"po_id":206,"supplier_id":4,"po_date":"2024-03-07"}]},
|
|
|
|
| 539 |
{"name":"po_lines","pk":["po_id","item_id"],
|
| 540 |
"columns":[
|
| 541 |
{"name":"po_id","type":"INTEGER"},
|
| 542 |
{"name":"item_id","type":"INTEGER"},
|
| 543 |
{"name":"qty","type":"INTEGER"},
|
| 544 |
+
{"name":"line_cost","type":"REAL"}],
|
|
|
|
| 545 |
"fks":[
|
| 546 |
{"columns":["po_id"],"ref_table":"purchase_orders","ref_columns":["po_id"]},
|
| 547 |
+
{"columns":["item_id"],"ref_table":"items","ref_columns":["item_id"]}],
|
|
|
|
| 548 |
"rows":[
|
| 549 |
{"po_id":201,"item_id":101,"qty":200,"line_cost":840.0},
|
| 550 |
{"po_id":201,"item_id":106,"qty":500,"line_cost":475.0},
|
|
|
|
| 553 |
{"po_id":203,"item_id":104,"qty":150,"line_cost":405.0},
|
| 554 |
{"po_id":204,"item_id":105,"qty":180,"line_cost":540.0},
|
| 555 |
{"po_id":205,"item_id":107,"qty":300,"line_cost":480.0},
|
| 556 |
+
{"po_id":206,"item_id":108,"qty":250,"line_cost":525.0}]}]
|
|
|
|
|
|
|
| 557 |
},
|
| 558 |
"questions":[
|
| 559 |
{"id":"W_Q01","category":"SELECT *","difficulty":1,
|
|
|
|
| 586 |
]}
|
| 587 |
]
|
| 588 |
}
|
|
|
|
| 589 |
# --- Sales tax ---
|
| 590 |
FALLBACK_PACKS["sales tax"] = {
|
| 591 |
"schema":{
|
|
|
|
| 595 |
"columns":[
|
| 596 |
{"name":"jurisdiction_id","type":"INTEGER"},
|
| 597 |
{"name":"name","type":"TEXT"},
|
| 598 |
+
{"name":"state","type":"TEXT"}],
|
|
|
|
| 599 |
"fks":[],
|
| 600 |
"rows":[
|
| 601 |
{"jurisdiction_id":1,"name":"King County","state":"WA"},
|
|
|
|
| 603 |
{"jurisdiction_id":3,"name":"Maricopa","state":"AZ"},
|
| 604 |
{"jurisdiction_id":4,"name":"Travis","state":"TX"},
|
| 605 |
{"jurisdiction_id":5,"name":"Denver","state":"CO"},
|
| 606 |
+
{"jurisdiction_id":6,"name":"Miami-Dade","state":"FL"}]},
|
|
|
|
| 607 |
{"name":"tax_rates","pk":["rate_id"],
|
| 608 |
"columns":[
|
| 609 |
{"name":"rate_id","type":"INTEGER"},
|
| 610 |
{"name":"jurisdiction_id","type":"INTEGER"},
|
| 611 |
{"name":"category","type":"TEXT"},
|
| 612 |
+
{"name":"rate","type":"REAL"}],
|
|
|
|
| 613 |
"fks":[{"columns":["jurisdiction_id"],"ref_table":"jurisdictions","ref_columns":["jurisdiction_id"]}],
|
| 614 |
"rows":[
|
| 615 |
{"rate_id":101,"jurisdiction_id":1,"category":"general","rate":0.102},
|
| 616 |
+
{"rate_id":102,"jurisdiction_id":2,"category":"general","rate":0.000},
|
| 617 |
{"rate_id":103,"jurisdiction_id":3,"category":"general","rate":0.056},
|
| 618 |
{"rate_id":104,"jurisdiction_id":4,"category":"general","rate":0.0825},
|
| 619 |
{"rate_id":105,"jurisdiction_id":5,"category":"general","rate":0.081},
|
| 620 |
+
{"rate_id":106,"jurisdiction_id":6,"category":"general","rate":0.070}]},
|
|
|
|
| 621 |
{"name":"transactions","pk":["txn_id"],
|
| 622 |
"columns":[
|
| 623 |
{"name":"txn_id","type":"INTEGER"},
|
| 624 |
{"name":"txn_date","type":"TEXT"},
|
| 625 |
{"name":"amount","type":"REAL"},
|
| 626 |
{"name":"category","type":"TEXT"},
|
| 627 |
+
{"name":"jurisdiction_id","type":"INTEGER"}],
|
|
|
|
| 628 |
"fks":[{"columns":["jurisdiction_id"],"ref_table":"jurisdictions","ref_columns":["jurisdiction_id"]}],
|
| 629 |
"rows":[
|
| 630 |
{"txn_id":201,"txn_date":"2024-01-03","amount":120.00,"category":"general","jurisdiction_id":1},
|
|
|
|
| 636 |
{"txn_id":207,"txn_date":"2024-03-09","amount":70.00,"category":"general","jurisdiction_id":1},
|
| 637 |
{"txn_id":208,"txn_date":"2024-03-15","amount":18.50,"category":"general","jurisdiction_id":2},
|
| 638 |
{"txn_id":209,"txn_date":"2024-03-20","amount":99.95,"category":"general","jurisdiction_id":3},
|
| 639 |
+
{"txn_id":210,"txn_date":"2024-03-25","amount":199.99,"category":"general","jurisdiction_id":4}]}]
|
|
|
|
|
|
|
| 640 |
},
|
| 641 |
"questions":[
|
| 642 |
{"id":"TX_Q01","category":"SELECT *","difficulty":1,
|
|
|
|
| 669 |
]}
|
| 670 |
]
|
| 671 |
}
|
|
|
|
| 672 |
# --- Oil & gas wells ---
|
| 673 |
FALLBACK_PACKS["oil and gas wells"] = {
|
| 674 |
"schema":{
|
|
|
|
| 680 |
{"name":"well_name","type":"TEXT"},
|
| 681 |
{"name":"location","type":"TEXT"},
|
| 682 |
{"name":"status","type":"TEXT"},
|
| 683 |
+
{"name":"depth","type":"INTEGER"}],
|
|
|
|
| 684 |
"fks":[],
|
| 685 |
"rows":[
|
| 686 |
{"well_id":1,"well_name":"Alpha-1","location":"TX-TRV","status":"producing","depth":12000},
|
|
|
|
| 688 |
{"well_id":3,"well_name":"Cedar-7","location":"OK-CAD","status":"producing","depth":9800},
|
| 689 |
{"well_id":4,"well_name":"Delta-3","location":"ND-WIL","status":"drilling","depth":7000},
|
| 690 |
{"well_id":5,"well_name":"Eagle-5","location":"CO-DNV","status":"producing","depth":8500},
|
| 691 |
+
{"well_id":6,"well_name":"Fox-9","location":"NM-LEA","status":"producing","depth":11000}]},
|
|
|
|
| 692 |
{"name":"operators","pk":["operator_id"],
|
| 693 |
"columns":[
|
| 694 |
{"name":"operator_id","type":"INTEGER"},
|
| 695 |
{"name":"name","type":"TEXT"},
|
| 696 |
+
{"name":"contact","type":"TEXT"}],
|
|
|
|
| 697 |
"fks":[],
|
| 698 |
"rows":[
|
| 699 |
{"operator_id":10,"name":"PetroMax","contact":"pmx@example.com"},
|
| 700 |
{"operator_id":11,"name":"BlueRock Energy","contact":"blue@example.com"},
|
| 701 |
{"operator_id":12,"name":"HighPlains LLC","contact":"hp@example.com"},
|
| 702 |
+
{"operator_id":13,"name":"Mesa Oil","contact":"mesa@example.com"}]},
|
|
|
|
| 703 |
{"name":"well_operators","pk":["well_id","operator_id"],
|
| 704 |
"columns":[
|
| 705 |
{"name":"well_id","type":"INTEGER"},
|
| 706 |
{"name":"operator_id","type":"INTEGER"},
|
| 707 |
+
{"name":"start_date","type":"TEXT"}],
|
|
|
|
| 708 |
"fks":[
|
| 709 |
{"columns":["well_id"],"ref_table":"wells","ref_columns":["well_id"]},
|
| 710 |
+
{"columns":["operator_id"],"ref_table":"operators","ref_columns":["operator_id"]}],
|
|
|
|
| 711 |
"rows":[
|
| 712 |
{"well_id":1,"operator_id":10,"start_date":"2023-01-01"},
|
| 713 |
{"well_id":2,"operator_id":10,"start_date":"2023-06-01"},
|
| 714 |
{"well_id":3,"operator_id":11,"start_date":"2022-03-15"},
|
| 715 |
{"well_id":4,"operator_id":12,"start_date":"2024-02-01"},
|
| 716 |
{"well_id":5,"operator_id":13,"start_date":"2022-10-10"},
|
| 717 |
+
{"well_id":6,"operator_id":11,"start_date":"2021-08-05"}]},
|
|
|
|
| 718 |
{"name":"production","pk":["prod_id"],
|
| 719 |
"columns":[
|
| 720 |
{"name":"prod_id","type":"INTEGER"},
|
| 721 |
{"name":"well_id","type":"INTEGER"},
|
| 722 |
{"name":"month","type":"TEXT"},
|
| 723 |
{"name":"oil_bbl","type":"REAL"},
|
| 724 |
+
{"name":"gas_mcf","type":"REAL"}],
|
|
|
|
| 725 |
"fks":[{"columns":["well_id"],"ref_table":"wells","ref_columns":["well_id"]}],
|
| 726 |
"rows":[
|
| 727 |
{"prod_id":1001,"well_id":1,"month":"2024-01","oil_bbl":1200,"gas_mcf":5000},
|
|
|
|
| 731 |
{"prod_id":1005,"well_id":5,"month":"2024-01","oil_bbl":600,"gas_mcf":2200},
|
| 732 |
{"prod_id":1006,"well_id":6,"month":"2024-01","oil_bbl":750,"gas_mcf":2600},
|
| 733 |
{"prod_id":1007,"well_id":2,"month":"2024-01","oil_bbl":0,"gas_mcf":0},
|
| 734 |
+
{"prod_id":1008,"well_id":4,"month":"2024-02","oil_bbl":100,"gas_mcf":400}]}]
|
|
|
|
|
|
|
| 735 |
},
|
| 736 |
"questions":[
|
| 737 |
{"id":"OG_Q01","category":"SELECT *","difficulty":1,
|
|
|
|
| 764 |
]}
|
| 765 |
]
|
| 766 |
}
|
|
|
|
| 767 |
# --- Marketing ---
|
| 768 |
FALLBACK_PACKS["marketing"] = {
|
| 769 |
"schema":{
|
|
|
|
| 772 |
{"name":"channels","pk":["channel_id"],
|
| 773 |
"columns":[
|
| 774 |
{"name":"channel_id","type":"INTEGER"},
|
| 775 |
+
{"name":"channel_name","type":"TEXT"}],
|
|
|
|
| 776 |
"fks":[],
|
| 777 |
"rows":[
|
| 778 |
{"channel_id":1,"channel_name":"Search"},
|
| 779 |
{"channel_id":2,"channel_name":"Social"},
|
| 780 |
{"channel_id":3,"channel_name":"Email"},
|
| 781 |
+
{"channel_id":4,"channel_name":"Display"}]},
|
|
|
|
| 782 |
{"name":"campaigns","pk":["campaign_id"],
|
| 783 |
"columns":[
|
| 784 |
{"name":"campaign_id","type":"INTEGER"},
|
| 785 |
{"name":"campaign_name","type":"TEXT"},
|
| 786 |
{"name":"channel_id","type":"INTEGER"},
|
| 787 |
{"name":"start_date","type":"TEXT"},
|
| 788 |
+
{"name":"budget","type":"REAL"}],
|
|
|
|
| 789 |
"fks":[{"columns":["channel_id"],"ref_table":"channels","ref_columns":["channel_id"]}],
|
| 790 |
"rows":[
|
| 791 |
{"campaign_id":101,"campaign_name":"Spring Search","channel_id":1,"start_date":"2024-03-01","budget":5000},
|
|
|
|
| 793 |
{"campaign_id":103,"campaign_name":"Welcome Email","channel_id":3,"start_date":"2024-03-07","budget":1000},
|
| 794 |
{"campaign_id":104,"campaign_name":"Retargeting","channel_id":4,"start_date":"2024-03-10","budget":2000},
|
| 795 |
{"campaign_id":105,"campaign_name":"Summer Search","channel_id":1,"start_date":"2024-06-01","budget":6000},
|
| 796 |
+
{"campaign_id":106,"campaign_name":"Promo Social","channel_id":2,"start_date":"2024-06-05","budget":3500}]},
|
|
|
|
| 797 |
{"name":"ad_stats","pk":["campaign_id","day"],
|
| 798 |
"columns":[
|
| 799 |
{"name":"campaign_id","type":"INTEGER"},
|
| 800 |
{"name":"day","type":"TEXT"},
|
| 801 |
{"name":"impressions","type":"INTEGER"},
|
| 802 |
{"name":"clicks","type":"INTEGER"},
|
| 803 |
+
{"name":"spend","type":"REAL"}],
|
|
|
|
| 804 |
"fks":[{"columns":["campaign_id"],"ref_table":"campaigns","ref_columns":["campaign_id"]}],
|
| 805 |
"rows":[
|
| 806 |
{"campaign_id":101,"day":"2024-03-12","impressions":10000,"clicks":500,"spend":200.0},
|
|
|
|
| 808 |
{"campaign_id":102,"day":"2024-03-12","impressions":8000,"clicks":400,"spend":150.0},
|
| 809 |
{"campaign_id":103,"day":"2024-03-12","impressions":5000,"clicks":250,"spend":80.0},
|
| 810 |
{"campaign_id":104,"day":"2024-03-12","impressions":7000,"clicks":210,"spend":110.0},
|
| 811 |
+
{"campaign_id":106,"day":"2024-06-12","impressions":9500,"clicks":520,"spend":190.0}]},
|
|
|
|
| 812 |
{"name":"leads","pk":["lead_id"],
|
| 813 |
"columns":[
|
| 814 |
{"name":"lead_id","type":"INTEGER"},
|
| 815 |
{"name":"campaign_id","type":"INTEGER"},
|
| 816 |
{"name":"source","type":"TEXT"},
|
| 817 |
{"name":"qualified","type":"INTEGER"},
|
| 818 |
+
{"name":"revenue","type":"REAL"}],
|
|
|
|
| 819 |
"fks":[{"columns":["campaign_id"],"ref_table":"campaigns","ref_columns":["campaign_id"]}],
|
| 820 |
"rows":[
|
| 821 |
{"lead_id":1,"campaign_id":101,"source":"LP1","qualified":1,"revenue":400},
|
|
|
|
| 823 |
{"lead_id":3,"campaign_id":102,"source":"FB","qualified":1,"revenue":250},
|
| 824 |
{"lead_id":4,"campaign_id":103,"source":"Email","qualified":1,"revenue":300},
|
| 825 |
{"lead_id":5,"campaign_id":104,"source":"DSP","qualified":0,"revenue":0},
|
| 826 |
+
{"lead_id":6,"campaign_id":106,"source":"FB","qualified":1,"revenue":500}]}]
|
|
|
|
|
|
|
| 827 |
},
|
| 828 |
"questions":[
|
| 829 |
{"id":"M_Q01","category":"SELECT *","difficulty":1,
|
|
|
|
| 966 |
req_aliases = [a.strip() for a in re.split(r"[,\s]+", req_aliases) if a.strip()]
|
| 967 |
elif not isinstance(req_aliases, list):
|
| 968 |
req_aliases = []
|
| 969 |
+
|
| 970 |
+
# Soften ambiguous aggregation prompts so users know what to return
|
| 971 |
+
if cat == "Aggregation":
|
| 972 |
+
pl = (prompt or "").lower()
|
| 973 |
+
if ("for each" in pl or "per " in pl) and not re.search(r"`[^`]+`", prompt or ""):
|
| 974 |
+
prompt += "\n\n*Note:* Return the grouping column and the aggregate. If the wording says “for each …”, include entities with zero by using a LEFT JOIN."
|
| 975 |
+
|
| 976 |
return {
|
| 977 |
"id": str(q.get("id") or f"LLM_{int(time.time()*1000)}_{random.randint(100,999)}"),
|
| 978 |
"category": cat,
|
|
|
|
| 1110 |
def bootstrap_domain_with_llm_or_fallback(prev_domain: Optional[str], preferred_domain: str):
|
| 1111 |
obj, err, model_used, stats = llm_generate_domain_and_questions(prev_domain, preferred_domain)
|
| 1112 |
if obj is None:
|
|
|
|
| 1113 |
schema, questions = get_fallback_pack_for(preferred_domain)
|
| 1114 |
info = {"source":"fallback","model":None,"error":err,"accepted":0,"dropped":0}
|
| 1115 |
return schema, questions, info
|
|
|
|
| 1119 |
schema, questions, info = bootstrap_domain_with_llm_or_fallback(prev_domain, preferred_domain)
|
| 1120 |
install_schema(CONN, schema)
|
| 1121 |
if not questions:
|
|
|
|
| 1122 |
schema2, questions2 = get_fallback_pack_for(preferred_domain)
|
| 1123 |
install_schema(CONN, schema2)
|
| 1124 |
schema, questions, info = schema2, questions2, {"source":"fallback","model":None,"error":"No usable questions from LLM","accepted":0,"dropped":0}
|
|
|
|
| 1145 |
return ALLOWED_DOMAINS[DOMAIN_CYCLE_POS]
|
| 1146 |
|
| 1147 |
# -------------------- Initialize first domain --------------------
|
|
|
|
| 1148 |
CURRENT_SCHEMA, CURRENT_QS, CURRENT_INFO = install_schema_and_prepare_questions(prev_domain=None, preferred_domain="bookstore")
|
| 1149 |
DOMAIN_CYCLE_POS = _index_of_domain(CURRENT_SCHEMA.get("domain","bookstore"))
|
| 1150 |
|
|
|
|
| 1235 |
return "Possible cartesian product: no join condition detected."
|
| 1236 |
return None
|
| 1237 |
|
| 1238 |
+
# Column enforcement policy — relax unless prompt makes it explicit
|
| 1239 |
def should_enforce_columns(q: Dict[str, Any]) -> bool:
|
| 1240 |
+
"""
|
| 1241 |
+
Enforce exact projection only when the prompt explicitly asks for columns,
|
| 1242 |
+
or when DDL creates a new object where column shape matters.
|
| 1243 |
+
"""
|
| 1244 |
cat = (q.get("category") or "").strip()
|
| 1245 |
+
if cat in ("SELECT columns", "VIEW", "CTAS / SELECT INTO"):
|
| 1246 |
return True
|
| 1247 |
+
|
| 1248 |
+
prompt = (q.get("prompt_md") or "")
|
| 1249 |
+
prompt_l = prompt.lower()
|
| 1250 |
+
|
| 1251 |
+
# Signals that projection is explicitly requested
|
| 1252 |
+
if re.search(r"`[^`]+`", prompt): # backticked names
|
| 1253 |
return True
|
| 1254 |
+
if re.search(r"\((?:show|return|display)[^)]+\)", prompt_l):
|
| 1255 |
return True
|
| 1256 |
+
if re.search(r"\b(show|return|display|select)\b[^.]{0,120}\b(columns?|fields?|name|title|price|count|sum|avg|max|min)\b", prompt_l):
|
| 1257 |
return True
|
| 1258 |
+
|
| 1259 |
return False
|
| 1260 |
|
| 1261 |
def _normalize_columns(df: pd.DataFrame) -> pd.DataFrame:
|
|
|
|
| 1358 |
continue
|
| 1359 |
return None
|
| 1360 |
|
| 1361 |
+
def _canonical_expects_left_join(q: Dict[str, Any]) -> bool:
|
| 1362 |
+
canon = " ".join(q.get("answer_sql", [])).lower()
|
| 1363 |
+
return " left join " in canon
|
| 1364 |
+
|
| 1365 |
def validate_answer(q: Dict[str,Any], student_sql: str, df_student: Optional[pd.DataFrame]) -> Tuple[bool, str]:
|
| 1366 |
df_expected = answer_df(q["answer_sql"])
|
| 1367 |
if df_expected is None:
|
| 1368 |
return (df_student is not None), f"**Explanation:** Your statement executed successfully for this task."
|
| 1369 |
if df_student is None:
|
| 1370 |
return False, f"**Explanation:** Expected data result differs."
|
| 1371 |
+
|
| 1372 |
+
enforce_cols = should_enforce_columns(q)
|
| 1373 |
+
if enforce_cols:
|
| 1374 |
ok, note = results_equal_or_superset(df_student, df_expected)
|
| 1375 |
if ok and note == "extra_columns":
|
| 1376 |
return True, "**Note:** You returned extra columns. The rows match; try selecting only the requested columns next time."
|
|
|
|
| 1378 |
return True, "**Explanation:** Your result matches a canonical solution."
|
| 1379 |
return False, f"**Explanation:** Compare your result to a canonical solution."
|
| 1380 |
else:
|
| 1381 |
+
# Projection not enforced → compare coverage (row counts)
|
| 1382 |
+
if df_student.shape[0] == df_expected.shape[0]:
|
| 1383 |
return True, "**Explanation:** Columns weren’t specified for this task; row count matches the canonical answer."
|
| 1384 |
+
if df_student.shape[0] < df_expected.shape[0] and _canonical_expects_left_join(q):
|
| 1385 |
+
return False, "**Hint:** This task expects *every* entity (including those with zero related rows). Use a `LEFT JOIN` from the dimension table."
|
| 1386 |
return False, "**Explanation:** For this task we compared row counts (projection not enforced) and they didn’t match."
|
| 1387 |
|
| 1388 |
def log_attempt(user_id: str, qid: str, category: str, correct: bool, sql_text: str,
|
|
|
|
| 1435 |
return gr.update(value="", visible=False), draw_dynamic_erd(CURRENT_SCHEMA)
|
| 1436 |
hi_tables, hi_edges = sql_highlights(s, CURRENT_SCHEMA)
|
| 1437 |
erd = draw_dynamic_erd(CURRENT_SCHEMA, highlight_tables=hi_tables, highlight_edges=hi_edges)
|
| 1438 |
+
html = highlight_sql_html(s)
|
| 1439 |
+
return gr.update(value=html, visible=True), erd
|
| 1440 |
|
| 1441 |
def submit_answer(sql_text: str, session: dict):
|
| 1442 |
if not session or "user_id" not in session or "q" not in session:
|
|
|
|
| 1565 |
return pd.DataFrame([{"error": str(e)}])
|
| 1566 |
|
| 1567 |
# -------------------- UI --------------------
|
| 1568 |
+
SQL_PREVIEW_STYLES = """
|
| 1569 |
+
<style>
|
| 1570 |
+
.sql-preview pre { white-space: pre-wrap; margin: 0; }
|
| 1571 |
+
.sql-preview code { font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, "Liberation Mono", monospace; font-size: 13px; }
|
| 1572 |
+
.kw { font-weight: 600; }
|
| 1573 |
+
.kw-core { color: #1f6feb; } /* SELECT, FROM, WHERE, etc. */
|
| 1574 |
+
.kw-join { color: #b80672; } /* JOIN family */
|
| 1575 |
+
.kw-group { color: #107c41; } /* GROUP BY / HAVING */
|
| 1576 |
+
.kw-order { color: #6f42c1; } /* ORDER BY / LIMIT / OFFSET */
|
| 1577 |
+
.kw-ddl { color: #b26c12; } /* CREATE/INSERT/UPDATE/DELETE/etc. */
|
| 1578 |
+
</style>
|
| 1579 |
+
"""
|
| 1580 |
+
|
| 1581 |
with gr.Blocks(title="Adaptive SQL Trainer — Randomized Domains") as demo:
|
| 1582 |
+
gr.Markdown(SQL_PREVIEW_STYLES)
|
| 1583 |
gr.Markdown(
|
| 1584 |
"""
|
| 1585 |
# 🧪 Adaptive SQL Trainer — Randomized Domains (SQLite)
|
| 1586 |
- Uses **OpenAI** (if configured) to randomize a domain (bookstore, retail sales, wholesaler,
|
| 1587 |
sales tax, oil & gas wells, marketing), generate **3–4 tables** and **8–12** questions.
|
| 1588 |
- Practice `SELECT`, `WHERE`, `JOIN` (INNER/LEFT), **aliases**, **views**, and **CTAS / SELECT INTO**.
|
| 1589 |
+
- **ERD shows tables only** and bolds the ones referenced in your SQL.
|
| 1590 |
"""
|
| 1591 |
)
|
| 1592 |
|
|
|
|
| 1677 |
)
|
| 1678 |
|
| 1679 |
if __name__ == "__main__":
|
| 1680 |
+
demo.launch()
|