jashdoshi77 commited on
Commit
c442c5a
Β·
1 Parent(s): c36c8e5

added column mismatch detector

Browse files
Files changed (2) hide show
  1. ai/signatures.py +48 -0
  2. ai/sql_pattern_checker.py +95 -0
ai/signatures.py CHANGED
@@ -160,6 +160,43 @@ class AnalyzeAndPlan(dspy.Signature):
160
  There is no product_master, products, or product_catalog table.
161
  Use product_id as the only product identifier. Never invent table names.
162
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  ══════════════════════════════════════════════════════════════
164
  RULE 1A β€” FAN-OUT: DEDUPLICATE BEFORE AGGREGATING ON JOIN CHAINS
165
  ══════════════════════════════════════════════════════════════
@@ -436,6 +473,17 @@ class SQLGeneration(dspy.Signature):
436
 
437
  4d. NO product_master table β€” never reference it; use product_id only.
438
 
 
 
 
 
 
 
 
 
 
 
 
439
  5. USE PRE-COMPUTED TOTALS β€” NEVER RECONSTRUCT THEM:
440
  - For order-level metrics (revenue, AOV): use sales_table_v2_sales_order.total_amount
441
  - For PO totals: use purchase_orders_v6_purchase_order.total_amount
 
160
  There is no product_master, products, or product_catalog table.
161
  Use product_id as the only product identifier. Never invent table names.
162
 
163
+ ══════════════════════════════════════════════════════════════
164
+ RULE 1E β€” WHICH TABLE OWNS WHICH COLUMNS (DO NOT MIX)
165
+ ══════════════════════════════════════════════════════════════
166
+ sales_order_line_pricing β†’ financial rollup only:
167
+ gold_amount_per_unit, diamond_amount_per_unit, making_charges_per_unit,
168
+ base_price_per_unit, selling_price_per_unit, line_total, final_amount,
169
+ quantity, sol_id, variant_sku, product_id
170
+ βœ— Does NOT have: gold_kt, gold_colour, gold_rate_per_gm, metal_weight,
171
+ diamond_id, shape, quality, pointer, carats
172
+
173
+ sales_order_line_gold β†’ physical gold attributes:
174
+ gold_kt, gold_colour, gold_rate_per_gm, metal_weight_per_unit,
175
+ finding_per_unit, gross_weight_per_unit, gold_amount_per_unit, sol_id
176
+ β†’ JOIN to pricing on sol_id when you need both gold attributes AND costs.
177
+
178
+ sales_order_line_diamond β†’ physical diamond attributes:
179
+ diamond_id, shape, quality, size_mm, pointer, pieces_per_unit,
180
+ carats_per_unit, rate_per_carat, diamond_amount_per_unit, sol_id
181
+ β†’ JOIN to pricing on sol_id ONLY when the question asks about diamond
182
+ properties (shape, quality, karat, carat) β€” NOT for cost aggregation.
183
+
184
+ RULE: If the question asks "by karat" / "by gold_kt" / "by colour" etc.,
185
+ you MUST join sales_order_line_gold. You cannot get gold_kt from pricing.
186
+
187
+ Example β€” total costs by karat type:
188
+ SELECT g.gold_kt,
189
+ SUM(lp.gold_amount_per_unit * lp.quantity) AS total_gold_amount,
190
+ SUM(lp.diamond_amount_per_unit * lp.quantity) AS total_diamond_amount,
191
+ SUM(lp.making_charges_per_unit * lp.quantity) AS total_making_charges
192
+ FROM sales_table_v2_sales_order_line_pricing lp
193
+ JOIN sales_table_v2_sales_order_line_gold g ON lp.sol_id = g.sol_id
194
+ JOIN sales_table_v2_sales_order_line sol ON lp.sol_id = sol.sol_id
195
+ JOIN sales_table_v2_sales_order so ON sol.so_id = so.so_id
196
+ WHERE so.status = 'closed'
197
+ GROUP BY g.gold_kt
198
+ ORDER BY g.gold_kt
199
+
200
  ══════════════════════════════════════════════════════════════
201
  RULE 1A β€” FAN-OUT: DEDUPLICATE BEFORE AGGREGATING ON JOIN CHAINS
202
  ══════════════════════════════════════════════════════════════
 
473
 
474
  4d. NO product_master table β€” never reference it; use product_id only.
475
 
476
+ 4e. TABLE COLUMN OWNERSHIP β€” never use a column from the wrong table:
477
+ sales_order_line_pricing β†’ has: gold_amount_per_unit, diamond_amount_per_unit,
478
+ making_charges_per_unit, line_total, quantity, sol_id, variant_sku, product_id
479
+ βœ— does NOT have: gold_kt, gold_colour, shape, quality, diamond_id
480
+ sales_order_line_gold β†’ has: gold_kt, gold_colour, gold_rate_per_gm,
481
+ metal_weight_per_unit (JOIN on sol_id when grouping/filtering by karat or colour)
482
+ sales_order_line_diamond β†’ has: shape, quality, diamond_id, carats_per_unit
483
+ (JOIN on sol_id only for property filters, never for cost aggregation)
484
+ WRONG: SELECT lp.gold_kt ... FROM sales_order_line_pricing lp
485
+ CORRECT: JOIN sales_order_line_gold g ON lp.sol_id = g.sol_id, then use g.gold_kt
486
+
487
  5. USE PRE-COMPUTED TOTALS β€” NEVER RECONSTRUCT THEM:
488
  - For order-level metrics (revenue, AOV): use sales_table_v2_sales_order.total_amount
489
  - For PO totals: use purchase_orders_v6_purchase_order.total_amount
ai/sql_pattern_checker.py CHANGED
@@ -9,6 +9,96 @@ import re
9
  from typing import Any
10
 
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  def check_sql_patterns(sql: str) -> list[dict[str, Any]]:
13
  """Detect known bad patterns in a generated SQL string.
14
 
@@ -142,6 +232,11 @@ def check_sql_patterns(sql: str) -> list[dict[str, Any]]:
142
  ),
143
  })
144
 
 
 
 
 
 
145
  return issues
146
 
147
 
 
9
  from typing import Any
10
 
11
 
12
+ def _build_alias_map(sql: str) -> dict[str, str]:
13
+ """Extract alias β†’ full_table_name mapping from FROM / JOIN clauses.
14
+
15
+ Handles: FROM table_name alias
16
+ FROM table_name AS alias
17
+ JOIN table_name alias
18
+ JOIN table_name AS alias
19
+ Returns lower-cased keys and values.
20
+ """
21
+ alias_map: dict[str, str] = {}
22
+ pattern = re.compile(
23
+ r'(?:FROM|JOIN)\s+"?(\w+)"?\s+(?:AS\s+)?"?(\w+)"?',
24
+ re.IGNORECASE,
25
+ )
26
+ for table, alias in pattern.findall(sql):
27
+ alias_map[alias.lower()] = table.lower()
28
+ # also map table β†’ table in case no alias is used
29
+ alias_map[table.lower()] = table.lower()
30
+ return alias_map
31
+
32
+
33
+ def check_column_table_mismatches(sql: str) -> list[dict[str, Any]]:
34
+ """Schema-aware check: detect alias.column references where the column
35
+ does not exist in the aliased table.
36
+
37
+ Uses the live database schema so it works for ANY table/column β€” nothing
38
+ is hardcoded. Returns issue dicts in the same format as check_sql_patterns.
39
+ """
40
+ try:
41
+ from db.schema import get_schema
42
+ schema = get_schema()
43
+ except Exception:
44
+ return [] # schema unavailable, skip check
45
+
46
+ # Build {table_name_lower: {col_lower, ...}}
47
+ table_cols: dict[str, set[str]] = {
48
+ t.lower(): {c["column_name"].lower() for c in cols}
49
+ for t, cols in schema.items()
50
+ }
51
+
52
+ alias_map = _build_alias_map(sql)
53
+ issues: list[dict[str, Any]] = []
54
+ seen: set[str] = set()
55
+
56
+ # Find all alias.column references in the SQL
57
+ for alias, col in re.findall(r'\b(\w+)\.(\w+)\b', sql):
58
+ alias_l = alias.lower()
59
+ col_l = col.lower()
60
+ key = f"{alias_l}.{col_l}"
61
+ if key in seen:
62
+ continue
63
+ seen.add(key)
64
+
65
+ table_l = alias_map.get(alias_l)
66
+ if table_l is None:
67
+ continue # unknown alias (subquery alias, CTE name, etc.) β€” skip
68
+ if table_l not in table_cols:
69
+ continue # table not in schema β€” already caught by schema validator
70
+
71
+ if col_l not in table_cols[table_l]:
72
+ # Find which tables DO have this column
73
+ tables_with_col = [
74
+ t for t, cols in table_cols.items() if col_l in cols
75
+ ]
76
+ # Build a helpful correction hint
77
+ if tables_with_col:
78
+ hint = (
79
+ f"Column '{col}' does NOT exist in '{table_l}'. "
80
+ f"It is available in: {', '.join(tables_with_col)}. "
81
+ f"JOIN the correct table on sol_id / so_id / po_id as appropriate "
82
+ f"and reference that table's alias instead."
83
+ )
84
+ else:
85
+ hint = (
86
+ f"Column '{col}' does NOT exist in '{table_l}' "
87
+ f"or any other table in the schema. "
88
+ f"Remove it or use a column that actually exists."
89
+ )
90
+ issues.append({
91
+ "pattern_name": f"wrong_table_for_{col_l}",
92
+ "description": (
93
+ f"CRITICAL BUG β€” column '{col}' referenced via alias '{alias}' "
94
+ f"which maps to table '{table_l}', but that column does not exist there."
95
+ ),
96
+ "correction": hint,
97
+ })
98
+
99
+ return issues
100
+
101
+
102
  def check_sql_patterns(sql: str) -> list[dict[str, Any]]:
103
  """Detect known bad patterns in a generated SQL string.
104
 
 
232
  ),
233
  })
234
 
235
+ # ── Pattern 4 ────────────────────────────────────────────────────────────
236
+ # Schema-aware: detect alias.column where column doesn't exist in that table.
237
+ # Generic β€” works for gold_kt on pricing table, or any future similar mistake.
238
+ issues.extend(check_column_table_mismatches(sql))
239
+
240
  return issues
241
 
242