jashdoshi77 commited on
Commit
29bf2eb
Β·
1 Parent(s): 057135b

postgres ROUND fixing

Browse files
Files changed (2) hide show
  1. ai/signatures.py +48 -0
  2. ai/sql_pattern_checker.py +65 -0
ai/signatures.py CHANGED
@@ -160,6 +160,44 @@ class AnalyzeAndPlan(dspy.Signature):
160
  There is no product_master, products, or product_catalog table.
161
  Use product_id as the only product identifier. Never invent table names.
162
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  ══════════════════════════════════════════════════════════════
164
  RULE 1D2 β€” "PER X" DENOMINATOR β€” READ THE QUESTION CAREFULLY
165
  ══════════════════════════════════════════════════════════════
@@ -505,6 +543,16 @@ class SQLGeneration(dspy.Signature):
505
 
506
  4d. NO product_master table β€” never reference it; use product_id only.
507
 
 
 
 
 
 
 
 
 
 
 
508
  4d2. "PER X" DENOMINATOR β€” use the correct divisor for what "per" refers to:
509
  "per order" β†’ COUNT(DISTINCT so.so_id) NOT SUM(quantity)
510
  "per unit" β†’ SUM(quantity) NOT COUNT(DISTINCT so_id)
 
160
  There is no product_master, products, or product_catalog table.
161
  Use product_id as the only product identifier. Never invent table names.
162
 
163
+ ══════════════════════════════════════════════════════════════
164
+ RULE 1D0 β€” PERCENTAGE / RATIO WITH CASE WHEN β€” NEVER PRE-FILTER STATUS
165
+ ══════════════════════════════════════════════════════════════
166
+ When computing a percentage breakdown across different statuses
167
+ (e.g. "% closed vs % cancelled"), the denominator must be ALL orders.
168
+ Adding WHERE status IN ('closed', 'cancelled') before grouping removes
169
+ other statuses from the denominator β†’ inflated percentages.
170
+
171
+ WRONG (WHERE filter shrinks denominator):
172
+ SELECT customer_id,
173
+ SUM(CASE WHEN status = 'closed' THEN total_amount ELSE 0 END) * 100.0
174
+ / SUM(total_amount) AS pct_closed
175
+ FROM sales_table_v2_sales_order
176
+ WHERE status IN ('closed', 'cancelled') ← removes open/processing rows
177
+ GROUP BY customer_id
178
+
179
+ CORRECT (no WHERE on status β€” CASE WHEN handles the split):
180
+ SELECT cm.customer_id, cm.customer_name,
181
+ ROUND((SUM(CASE WHEN so.status = 'closed' THEN so.total_amount ELSE 0 END)
182
+ * 100.0 / SUM(so.total_amount))::numeric, 2) AS pct_closed,
183
+ ROUND((SUM(CASE WHEN so.status = 'cancelled' THEN so.total_amount ELSE 0 END)
184
+ * 100.0 / SUM(so.total_amount))::numeric, 2) AS pct_cancelled
185
+ FROM sales_table_v2_sales_order so
186
+ JOIN sales_table_v2_customer_master cm ON so.customer_id = cm.customer_id
187
+ GROUP BY cm.customer_id, cm.customer_name
188
+
189
+ ══════════════════════════════════════════════════════════════
190
+ RULE 1D1 β€” PostgreSQL ROUND() REQUIRES ::numeric CAST
191
+ ══════════════════════════════════════════════════════════════
192
+ PostgreSQL's ROUND(value, N) only accepts numeric as the first argument.
193
+ Division or SUM() results are often double precision β€” passing them to
194
+ ROUND() directly raises: "function round(double precision, integer) does not exist".
195
+
196
+ WRONG: ROUND(SUM(x) * 100.0 / SUM(y), 2)
197
+ CORRECT: ROUND((SUM(x) * 100.0 / SUM(y))::numeric, 2)
198
+
199
+ Always cast the expression to ::numeric inside every ROUND(..., N) call.
200
+
201
  ══════════════════════════════════════════════════════════════
202
  RULE 1D2 β€” "PER X" DENOMINATOR β€” READ THE QUESTION CAREFULLY
203
  ══════════════════════════════════════════════════════════════
 
543
 
544
  4d. NO product_master table β€” never reference it; use product_id only.
545
 
546
+ 4d0. PERCENTAGE WITH CASE WHEN β€” never add WHERE status filter on the same column:
547
+ When splitting by status with CASE WHEN, the denominator must include ALL rows.
548
+ WRONG: WHERE status IN ('closed','cancelled') ... SUM(total_amount) as denominator
549
+ CORRECT: No WHERE on status. CASE WHEN handles split; SUM(total_amount) = all orders.
550
+
551
+ 4d1. ROUND() IN PostgreSQL β€” always cast to ::numeric first:
552
+ WRONG: ROUND(SUM(x) / SUM(y), 2)
553
+ CORRECT: ROUND((SUM(x) / SUM(y))::numeric, 2)
554
+ Applies to every ROUND(..., N) call β€” division results are double precision by default.
555
+
556
  4d2. "PER X" DENOMINATOR β€” use the correct divisor for what "per" refers to:
557
  "per order" β†’ COUNT(DISTINCT so.so_id) NOT SUM(quantity)
558
  "per unit" β†’ SUM(quantity) NOT COUNT(DISTINCT so_id)
ai/sql_pattern_checker.py CHANGED
@@ -232,6 +232,43 @@ def check_sql_patterns(sql: str) -> list[dict[str, Any]]:
232
  ),
233
  })
234
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
  # ── Pattern 3b ───────────────────────────────────────────────────────────
236
  # "per order" metric computed with SUM(quantity) as denominator instead of
237
  # COUNT(DISTINCT so_id). SUM(quantity) = revenue per unit; "per order"
@@ -257,6 +294,34 @@ def check_sql_patterns(sql: str) -> list[dict[str, Any]]:
257
  ),
258
  })
259
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
  # ── Pattern 4 ────────────────────────────────────────────────────────────
261
  # Schema-aware: detect alias.column where column doesn't exist in that table.
262
  # Generic β€” works for gold_kt on pricing table, or any future similar mistake.
 
232
  ),
233
  })
234
 
235
+ # ── Pattern 3a ───────────────────────────────────────────────────────────
236
+ # WHERE status filter alongside CASE WHEN status β€” wrong denominator.
237
+ # When computing "percentage of X vs Y", the WHERE clause must NOT pre-filter
238
+ # by status because that shrinks the denominator (misses open/processing orders).
239
+ # CASE WHEN inside SUM() handles the split; no WHERE on status needed.
240
+ if (
241
+ re.search(r"\bcase\s+when\b.*?\bstatus\b", sql_lower, re.DOTALL)
242
+ and re.search(r"\bwhere\b.*?\bstatus\s+in\s*\(", sql_lower, re.DOTALL)
243
+ and re.search(r"\bsum\s*\(", sql_lower)
244
+ ):
245
+ issues.append({
246
+ "pattern_name": "case_when_status_with_where_filter",
247
+ "description": (
248
+ "WRONG DENOMINATOR β€” a WHERE status IN (...) filter is combined with "
249
+ "CASE WHEN so.status = ... inside SUM(). "
250
+ "The WHERE clause removes rows before aggregation, making the denominator "
251
+ "(SUM of all orders) too small and inflating every percentage. "
252
+ "For percentage breakdowns across statuses, the CASE WHEN handles the split "
253
+ "and the WHERE clause on status must be removed."
254
+ ),
255
+ "correction": (
256
+ "Remove the WHERE status filter. Let CASE WHEN handle the split:\n"
257
+ "\n"
258
+ "CORRECT pattern:\n"
259
+ "SELECT cm.customer_id, cm.customer_name,\n"
260
+ " ROUND((SUM(CASE WHEN so.status = 'closed' THEN so.total_amount ELSE 0 END)\n"
261
+ " * 100.0 / SUM(so.total_amount))::numeric, 2) AS pct_closed,\n"
262
+ " ROUND((SUM(CASE WHEN so.status = 'cancelled' THEN so.total_amount ELSE 0 END)\n"
263
+ " * 100.0 / SUM(so.total_amount))::numeric, 2) AS pct_cancelled\n"
264
+ "FROM sales_table_v2_sales_order so\n"
265
+ "JOIN sales_table_v2_customer_master cm ON so.customer_id = cm.customer_id\n"
266
+ "GROUP BY cm.customer_id, cm.customer_name\n"
267
+ "\n"
268
+ "No WHERE on status β€” SUM(so.total_amount) must include ALL orders as denominator."
269
+ ),
270
+ })
271
+
272
  # ── Pattern 3b ───────────────────────────────────────────────────────────
273
  # "per order" metric computed with SUM(quantity) as denominator instead of
274
  # COUNT(DISTINCT so_id). SUM(quantity) = revenue per unit; "per order"
 
294
  ),
295
  })
296
 
297
+ # ── Pattern 3c ───────────────────────────────────────────────────────────
298
+ # PostgreSQL ROUND() requires numeric, not double precision.
299
+ # ROUND(expr, N) fails with "function round(double precision, integer) does not exist"
300
+ # if expr evaluates to double precision. Fix: cast to ::numeric before ROUND().
301
+ if re.search(r"\bround\s*\(", sql_lower):
302
+ # Check if any ROUND( call lacks a ::numeric cast inside it
303
+ round_calls = re.findall(r"round\s*\(([^;]+?),\s*\d+\s*\)", sql, re.IGNORECASE)
304
+ for call in round_calls:
305
+ if "::numeric" not in call.lower() and "::decimal" not in call.lower():
306
+ issues.append({
307
+ "pattern_name": "round_missing_numeric_cast",
308
+ "description": (
309
+ "PostgreSQL TYPE ERROR β€” ROUND(value, N) only accepts numeric as first "
310
+ "argument. If value is double precision (e.g. result of division or "
311
+ "SUM()), PostgreSQL raises: "
312
+ "'function round(double precision, integer) does not exist'. "
313
+ "You must cast to ::numeric before calling ROUND."
314
+ ),
315
+ "correction": (
316
+ "Always cast the expression to ::numeric inside ROUND:\n"
317
+ " WRONG: ROUND(SUM(x) * 100.0 / SUM(y), 2)\n"
318
+ " CORRECT: ROUND((SUM(x) * 100.0 / SUM(y))::numeric, 2)\n"
319
+ "\n"
320
+ "Apply this to every ROUND(..., N) call in the query."
321
+ ),
322
+ })
323
+ break # one report per query is enough
324
+
325
  # ── Pattern 4 ────────────────────────────────────────────────────────────
326
  # Schema-aware: detect alias.column where column doesn't exist in that table.
327
  # Generic β€” works for gold_kt on pricing table, or any future similar mistake.