Spaces:
Running
Running
Commit Β·
29bf2eb
1
Parent(s): 057135b
postgres ROUND fixing
Browse files- ai/signatures.py +48 -0
- ai/sql_pattern_checker.py +65 -0
ai/signatures.py
CHANGED
|
@@ -160,6 +160,44 @@ class AnalyzeAndPlan(dspy.Signature):
|
|
| 160 |
There is no product_master, products, or product_catalog table.
|
| 161 |
Use product_id as the only product identifier. Never invent table names.
|
| 162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 164 |
RULE 1D2 β "PER X" DENOMINATOR β READ THE QUESTION CAREFULLY
|
| 165 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -505,6 +543,16 @@ class SQLGeneration(dspy.Signature):
|
|
| 505 |
|
| 506 |
4d. NO product_master table β never reference it; use product_id only.
|
| 507 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 508 |
4d2. "PER X" DENOMINATOR β use the correct divisor for what "per" refers to:
|
| 509 |
"per order" β COUNT(DISTINCT so.so_id) NOT SUM(quantity)
|
| 510 |
"per unit" β SUM(quantity) NOT COUNT(DISTINCT so_id)
|
|
|
|
| 160 |
There is no product_master, products, or product_catalog table.
|
| 161 |
Use product_id as the only product identifier. Never invent table names.
|
| 162 |
|
| 163 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 164 |
+
RULE 1D0 β PERCENTAGE / RATIO WITH CASE WHEN β NEVER PRE-FILTER STATUS
|
| 165 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 166 |
+
When computing a percentage breakdown across different statuses
|
| 167 |
+
(e.g. "% closed vs % cancelled"), the denominator must be ALL orders.
|
| 168 |
+
Adding WHERE status IN ('closed', 'cancelled') before grouping removes
|
| 169 |
+
other statuses from the denominator β inflated percentages.
|
| 170 |
+
|
| 171 |
+
WRONG (WHERE filter shrinks denominator):
|
| 172 |
+
SELECT customer_id,
|
| 173 |
+
SUM(CASE WHEN status = 'closed' THEN total_amount ELSE 0 END) * 100.0
|
| 174 |
+
/ SUM(total_amount) AS pct_closed
|
| 175 |
+
FROM sales_table_v2_sales_order
|
| 176 |
+
WHERE status IN ('closed', 'cancelled') β removes open/processing rows
|
| 177 |
+
GROUP BY customer_id
|
| 178 |
+
|
| 179 |
+
CORRECT (no WHERE on status β CASE WHEN handles the split):
|
| 180 |
+
SELECT cm.customer_id, cm.customer_name,
|
| 181 |
+
ROUND((SUM(CASE WHEN so.status = 'closed' THEN so.total_amount ELSE 0 END)
|
| 182 |
+
* 100.0 / SUM(so.total_amount))::numeric, 2) AS pct_closed,
|
| 183 |
+
ROUND((SUM(CASE WHEN so.status = 'cancelled' THEN so.total_amount ELSE 0 END)
|
| 184 |
+
* 100.0 / SUM(so.total_amount))::numeric, 2) AS pct_cancelled
|
| 185 |
+
FROM sales_table_v2_sales_order so
|
| 186 |
+
JOIN sales_table_v2_customer_master cm ON so.customer_id = cm.customer_id
|
| 187 |
+
GROUP BY cm.customer_id, cm.customer_name
|
| 188 |
+
|
| 189 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 190 |
+
RULE 1D1 β PostgreSQL ROUND() REQUIRES ::numeric CAST
|
| 191 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 192 |
+
PostgreSQL's ROUND(value, N) only accepts numeric as the first argument.
|
| 193 |
+
Division or SUM() results are often double precision β passing them to
|
| 194 |
+
ROUND() directly raises: "function round(double precision, integer) does not exist".
|
| 195 |
+
|
| 196 |
+
WRONG: ROUND(SUM(x) * 100.0 / SUM(y), 2)
|
| 197 |
+
CORRECT: ROUND((SUM(x) * 100.0 / SUM(y))::numeric, 2)
|
| 198 |
+
|
| 199 |
+
Always cast the expression to ::numeric inside every ROUND(..., N) call.
|
| 200 |
+
|
| 201 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 202 |
RULE 1D2 β "PER X" DENOMINATOR β READ THE QUESTION CAREFULLY
|
| 203 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 543 |
|
| 544 |
4d. NO product_master table β never reference it; use product_id only.
|
| 545 |
|
| 546 |
+
4d0. PERCENTAGE WITH CASE WHEN β never add WHERE status filter on the same column:
|
| 547 |
+
When splitting by status with CASE WHEN, the denominator must include ALL rows.
|
| 548 |
+
WRONG: WHERE status IN ('closed','cancelled') ... SUM(total_amount) as denominator
|
| 549 |
+
CORRECT: No WHERE on status. CASE WHEN handles split; SUM(total_amount) = all orders.
|
| 550 |
+
|
| 551 |
+
4d1. ROUND() IN PostgreSQL β always cast to ::numeric first:
|
| 552 |
+
WRONG: ROUND(SUM(x) / SUM(y), 2)
|
| 553 |
+
CORRECT: ROUND((SUM(x) / SUM(y))::numeric, 2)
|
| 554 |
+
Applies to every ROUND(..., N) call β division results are double precision by default.
|
| 555 |
+
|
| 556 |
4d2. "PER X" DENOMINATOR β use the correct divisor for what "per" refers to:
|
| 557 |
"per order" β COUNT(DISTINCT so.so_id) NOT SUM(quantity)
|
| 558 |
"per unit" β SUM(quantity) NOT COUNT(DISTINCT so_id)
|
ai/sql_pattern_checker.py
CHANGED
|
@@ -232,6 +232,43 @@ def check_sql_patterns(sql: str) -> list[dict[str, Any]]:
|
|
| 232 |
),
|
| 233 |
})
|
| 234 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
# ββ Pattern 3b βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 236 |
# "per order" metric computed with SUM(quantity) as denominator instead of
|
| 237 |
# COUNT(DISTINCT so_id). SUM(quantity) = revenue per unit; "per order"
|
|
@@ -257,6 +294,34 @@ def check_sql_patterns(sql: str) -> list[dict[str, Any]]:
|
|
| 257 |
),
|
| 258 |
})
|
| 259 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
# ββ Pattern 4 ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 261 |
# Schema-aware: detect alias.column where column doesn't exist in that table.
|
| 262 |
# Generic β works for gold_kt on pricing table, or any future similar mistake.
|
|
|
|
| 232 |
),
|
| 233 |
})
|
| 234 |
|
| 235 |
+
# ββ Pattern 3a βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 236 |
+
# WHERE status filter alongside CASE WHEN status β wrong denominator.
|
| 237 |
+
# When computing "percentage of X vs Y", the WHERE clause must NOT pre-filter
|
| 238 |
+
# by status because that shrinks the denominator (misses open/processing orders).
|
| 239 |
+
# CASE WHEN inside SUM() handles the split; no WHERE on status needed.
|
| 240 |
+
if (
|
| 241 |
+
re.search(r"\bcase\s+when\b.*?\bstatus\b", sql_lower, re.DOTALL)
|
| 242 |
+
and re.search(r"\bwhere\b.*?\bstatus\s+in\s*\(", sql_lower, re.DOTALL)
|
| 243 |
+
and re.search(r"\bsum\s*\(", sql_lower)
|
| 244 |
+
):
|
| 245 |
+
issues.append({
|
| 246 |
+
"pattern_name": "case_when_status_with_where_filter",
|
| 247 |
+
"description": (
|
| 248 |
+
"WRONG DENOMINATOR β a WHERE status IN (...) filter is combined with "
|
| 249 |
+
"CASE WHEN so.status = ... inside SUM(). "
|
| 250 |
+
"The WHERE clause removes rows before aggregation, making the denominator "
|
| 251 |
+
"(SUM of all orders) too small and inflating every percentage. "
|
| 252 |
+
"For percentage breakdowns across statuses, the CASE WHEN handles the split "
|
| 253 |
+
"and the WHERE clause on status must be removed."
|
| 254 |
+
),
|
| 255 |
+
"correction": (
|
| 256 |
+
"Remove the WHERE status filter. Let CASE WHEN handle the split:\n"
|
| 257 |
+
"\n"
|
| 258 |
+
"CORRECT pattern:\n"
|
| 259 |
+
"SELECT cm.customer_id, cm.customer_name,\n"
|
| 260 |
+
" ROUND((SUM(CASE WHEN so.status = 'closed' THEN so.total_amount ELSE 0 END)\n"
|
| 261 |
+
" * 100.0 / SUM(so.total_amount))::numeric, 2) AS pct_closed,\n"
|
| 262 |
+
" ROUND((SUM(CASE WHEN so.status = 'cancelled' THEN so.total_amount ELSE 0 END)\n"
|
| 263 |
+
" * 100.0 / SUM(so.total_amount))::numeric, 2) AS pct_cancelled\n"
|
| 264 |
+
"FROM sales_table_v2_sales_order so\n"
|
| 265 |
+
"JOIN sales_table_v2_customer_master cm ON so.customer_id = cm.customer_id\n"
|
| 266 |
+
"GROUP BY cm.customer_id, cm.customer_name\n"
|
| 267 |
+
"\n"
|
| 268 |
+
"No WHERE on status β SUM(so.total_amount) must include ALL orders as denominator."
|
| 269 |
+
),
|
| 270 |
+
})
|
| 271 |
+
|
| 272 |
# ββ Pattern 3b βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 273 |
# "per order" metric computed with SUM(quantity) as denominator instead of
|
| 274 |
# COUNT(DISTINCT so_id). SUM(quantity) = revenue per unit; "per order"
|
|
|
|
| 294 |
),
|
| 295 |
})
|
| 296 |
|
| 297 |
+
# ββ Pattern 3c βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 298 |
+
# PostgreSQL ROUND() requires numeric, not double precision.
|
| 299 |
+
# ROUND(expr, N) fails with "function round(double precision, integer) does not exist"
|
| 300 |
+
# if expr evaluates to double precision. Fix: cast to ::numeric before ROUND().
|
| 301 |
+
if re.search(r"\bround\s*\(", sql_lower):
|
| 302 |
+
# Check if any ROUND( call lacks a ::numeric cast inside it
|
| 303 |
+
round_calls = re.findall(r"round\s*\(([^;]+?),\s*\d+\s*\)", sql, re.IGNORECASE)
|
| 304 |
+
for call in round_calls:
|
| 305 |
+
if "::numeric" not in call.lower() and "::decimal" not in call.lower():
|
| 306 |
+
issues.append({
|
| 307 |
+
"pattern_name": "round_missing_numeric_cast",
|
| 308 |
+
"description": (
|
| 309 |
+
"PostgreSQL TYPE ERROR β ROUND(value, N) only accepts numeric as first "
|
| 310 |
+
"argument. If value is double precision (e.g. result of division or "
|
| 311 |
+
"SUM()), PostgreSQL raises: "
|
| 312 |
+
"'function round(double precision, integer) does not exist'. "
|
| 313 |
+
"You must cast to ::numeric before calling ROUND."
|
| 314 |
+
),
|
| 315 |
+
"correction": (
|
| 316 |
+
"Always cast the expression to ::numeric inside ROUND:\n"
|
| 317 |
+
" WRONG: ROUND(SUM(x) * 100.0 / SUM(y), 2)\n"
|
| 318 |
+
" CORRECT: ROUND((SUM(x) * 100.0 / SUM(y))::numeric, 2)\n"
|
| 319 |
+
"\n"
|
| 320 |
+
"Apply this to every ROUND(..., N) call in the query."
|
| 321 |
+
),
|
| 322 |
+
})
|
| 323 |
+
break # one report per query is enough
|
| 324 |
+
|
| 325 |
# ββ Pattern 4 ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 326 |
# Schema-aware: detect alias.column where column doesn't exist in that table.
|
| 327 |
# Generic β works for gold_kt on pricing table, or any future similar mistake.
|