Spaces:
Running
Running
Commit Β·
057135b
1
Parent(s): 4791304
sql 'per' pattern checker
Browse files- ai/signatures.py +31 -0
- ai/sql_pattern_checker.py +25 -0
ai/signatures.py
CHANGED
|
@@ -160,6 +160,29 @@ class AnalyzeAndPlan(dspy.Signature):
|
|
| 160 |
There is no product_master, products, or product_catalog table.
|
| 161 |
Use product_id as the only product identifier. Never invent table names.
|
| 162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 164 |
RULE 1E β WHICH TABLE OWNS WHICH COLUMNS (DO NOT MIX)
|
| 165 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -482,6 +505,14 @@ class SQLGeneration(dspy.Signature):
|
|
| 482 |
|
| 483 |
4d. NO product_master table β never reference it; use product_id only.
|
| 484 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 485 |
4e. TABLE COLUMN OWNERSHIP β never use a column from the wrong table:
|
| 486 |
sales_order_line_pricing β has: gold_amount_per_unit, diamond_amount_per_unit,
|
| 487 |
making_charges_per_unit, line_total, quantity, sol_id, variant_sku, product_id
|
|
|
|
| 160 |
There is no product_master, products, or product_catalog table.
|
| 161 |
Use product_id as the only product identifier. Never invent table names.
|
| 162 |
|
| 163 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 164 |
+
RULE 1D2 β "PER X" DENOMINATOR β READ THE QUESTION CAREFULLY
|
| 165 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 166 |
+
The word after "per" tells you exactly what the denominator must be.
|
| 167 |
+
Using the wrong denominator gives a completely different metric.
|
| 168 |
+
|
| 169 |
+
"per order" β COUNT(DISTINCT so_id) β number of sales orders
|
| 170 |
+
"per unit" β SUM(quantity) β number of pieces/items sold
|
| 171 |
+
"per customer" β COUNT(DISTINCT customer_id)
|
| 172 |
+
"per product" β COUNT(DISTINCT product_id)
|
| 173 |
+
"per vendor" β COUNT(DISTINCT vendor_id)
|
| 174 |
+
"per SKU" β COUNT(DISTINCT variant_sku)
|
| 175 |
+
|
| 176 |
+
WRONG β "per order" using quantity as denominator:
|
| 177 |
+
SUM(lp.line_total) / SUM(sol.quantity) β this is revenue per UNIT, not per ORDER
|
| 178 |
+
|
| 179 |
+
CORRECT β "per order" using distinct order count:
|
| 180 |
+
SUM(lp.line_total) / COUNT(DISTINCT so.so_id) β this is revenue per ORDER
|
| 181 |
+
|
| 182 |
+
Similarly for AOV (average order value):
|
| 183 |
+
AVG(total_amount) or SUM(total_amount) / COUNT(DISTINCT so_id)
|
| 184 |
+
NEVER SUM(total_amount) / SUM(quantity)
|
| 185 |
+
|
| 186 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 187 |
RULE 1E β WHICH TABLE OWNS WHICH COLUMNS (DO NOT MIX)
|
| 188 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 505 |
|
| 506 |
4d. NO product_master table β never reference it; use product_id only.
|
| 507 |
|
| 508 |
+
4d2. "PER X" DENOMINATOR β use the correct divisor for what "per" refers to:
|
| 509 |
+
"per order" β COUNT(DISTINCT so.so_id) NOT SUM(quantity)
|
| 510 |
+
"per unit" β SUM(quantity) NOT COUNT(DISTINCT so_id)
|
| 511 |
+
"per customer" β COUNT(DISTINCT so.customer_id)
|
| 512 |
+
"per vendor" β COUNT(DISTINCT vendor_id)
|
| 513 |
+
WRONG: SUM(line_total) / SUM(quantity) β revenue per unit, not per order
|
| 514 |
+
CORRECT: SUM(line_total) / COUNT(DISTINCT so.so_id) β revenue per order
|
| 515 |
+
|
| 516 |
4e. TABLE COLUMN OWNERSHIP β never use a column from the wrong table:
|
| 517 |
sales_order_line_pricing β has: gold_amount_per_unit, diamond_amount_per_unit,
|
| 518 |
making_charges_per_unit, line_total, quantity, sol_id, variant_sku, product_id
|
ai/sql_pattern_checker.py
CHANGED
|
@@ -232,6 +232,31 @@ def check_sql_patterns(sql: str) -> list[dict[str, Any]]:
|
|
| 232 |
),
|
| 233 |
})
|
| 234 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
# ββ Pattern 4 ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 236 |
# Schema-aware: detect alias.column where column doesn't exist in that table.
|
| 237 |
# Generic β works for gold_kt on pricing table, or any future similar mistake.
|
|
|
|
| 232 |
),
|
| 233 |
})
|
| 234 |
|
| 235 |
+
# ββ Pattern 3b βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 236 |
+
# "per order" metric computed with SUM(quantity) as denominator instead of
|
| 237 |
+
# COUNT(DISTINCT so_id). SUM(quantity) = revenue per unit; "per order"
|
| 238 |
+
# requires COUNT(DISTINCT so_id).
|
| 239 |
+
# Heuristic: division where the denominator contains sum(...quantity...)
|
| 240 |
+
if re.search(r"/\s*sum\s*\([^)]*quantit", sql_lower):
|
| 241 |
+
issues.append({
|
| 242 |
+
"pattern_name": "per_unit_instead_of_per_order",
|
| 243 |
+
"description": (
|
| 244 |
+
"POSSIBLE BUG β dividing by SUM(quantity) gives revenue per UNIT (per piece). "
|
| 245 |
+
"If the question asks for 'per order', the denominator must be "
|
| 246 |
+
"COUNT(DISTINCT so_id), not SUM(quantity). "
|
| 247 |
+
"These are completely different metrics: "
|
| 248 |
+
"SUM(line_total)/SUM(quantity) = avg revenue per item sold; "
|
| 249 |
+
"SUM(line_total)/COUNT(DISTINCT so_id) = avg revenue each time product appears in an order."
|
| 250 |
+
),
|
| 251 |
+
"correction": (
|
| 252 |
+
"Check the question: does it say 'per order' or 'per unit/piece'?\n"
|
| 253 |
+
" 'per order' β SUM(lp.line_total) / COUNT(DISTINCT so.so_id)\n"
|
| 254 |
+
" 'per unit' β SUM(lp.line_total) / SUM(lp.quantity)\n"
|
| 255 |
+
" 'per customer' β SUM(lp.line_total) / COUNT(DISTINCT so.customer_id)\n"
|
| 256 |
+
"If the question says 'per order', rewrite using COUNT(DISTINCT so.so_id)."
|
| 257 |
+
),
|
| 258 |
+
})
|
| 259 |
+
|
| 260 |
# ββ Pattern 4 ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 261 |
# Schema-aware: detect alias.column where column doesn't exist in that table.
|
| 262 |
# Generic β works for gold_kt on pricing table, or any future similar mistake.
|