bhavika24 commited on
Commit
8c6bb96
Β·
verified Β·
1 Parent(s): 4ddd28a

Upload engine.py

Browse files
Files changed (1) hide show
  1. engine.py +97 -29
engine.py CHANGED
@@ -38,6 +38,25 @@ def load_metadata():
38
 
39
 
40
  def resolve_operator(op, value):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  mapping = {
42
  "equals": "=",
43
  "not_equals": "!=",
@@ -57,18 +76,27 @@ def resolve_operator(op, value):
57
 
58
  sql_op = mapping[op]
59
 
 
 
 
 
60
  if op == "contains":
61
- return sql_op, f"'%{value}%'"
 
62
  if op == "starts_with":
63
- return sql_op, f"'{value}%'"
 
64
  if op == "ends_with":
65
- return sql_op, f"'%{value}'"
 
66
  if op in ("in", "not_in"):
67
  if not isinstance(value, list):
68
  raise ValueError("IN operator requires list")
69
- return sql_op, f"({','.join(map(repr, value))})"
 
 
 
70
 
71
- return sql_op, f"'{value}'"
72
 
73
 
74
  # =========================
@@ -156,10 +184,10 @@ def safe_json_loads(text):
156
  # INTENT PARSING (LLM)
157
  # =========================
158
 
159
- def parse_intent(question):
160
  meta = load_metadata()
161
 
162
- # βœ… Build schema safely (skip empty modules)
163
  schema_description = "\n".join([
164
  f"{module}: {', '.join(fields)}"
165
  for module in meta["modules"]
@@ -173,39 +201,61 @@ def parse_intent(question):
173
  You are a SQL query planner.
174
 
175
  You MUST only use fields listed below.
176
- If a field does not exist, choose the closest valid field.
177
  Do NOT invent column names.
178
 
179
  Available schema:
180
  {schema_description}
181
 
182
- Extract:
183
- - module
184
- - filters (field, operator, value)
185
- - selected fields
186
-
187
- Return ONLY valid JSON.
 
 
 
 
 
 
 
 
 
188
 
189
  User question:
190
  {question}
191
  """
192
 
193
- res = client.chat.completions.create(
194
- model="gpt-4.1-mini",
195
- messages=[
196
- {"role": "system", "content": "You are a JSON-only API. Return valid JSON only."},
197
- {"role": "user", "content": prompt}
198
- ],
199
- temperature=0
200
- )
 
 
 
 
 
201
 
 
 
202
 
203
- # βœ… Safe JSON parsing
204
- try:
205
- return json.loads(res.choices[0].message.content)
206
- except json.JSONDecodeError:
207
- raise ValueError("LLM returned invalid JSON")
 
208
 
 
 
 
 
 
209
 
210
  # =========================
211
  # SQL GENERATOR
@@ -214,7 +264,9 @@ User question:
214
  def build_sql(plan):
215
  meta = load_metadata()
216
 
217
- module = plan["module"]
 
 
218
  if module not in meta["modules"]:
219
  raise ValueError(f"Unknown module: {module}")
220
 
@@ -255,12 +307,15 @@ def build_sql(plan):
255
  f"{table}.{column} {sql_op} {sql_value}"
256
  )
257
 
 
 
 
258
  # ---------- FINAL SQL ----------
259
  sql = f"""
260
  SELECT {select_sql}
261
  FROM {base_table}
262
  {' '.join(joins)}
263
- WHERE {' AND '.join(where_clauses)}
264
  LIMIT 100
265
  """
266
 
@@ -268,6 +323,7 @@ def build_sql(plan):
268
 
269
 
270
 
 
271
  # =========================
272
  # VALIDATION
273
  # =========================
@@ -290,6 +346,18 @@ def validate_sql(sql):
290
 
291
  def run(question):
292
  plan = parse_intent(question)
 
 
 
 
 
 
 
 
 
 
 
 
293
  sql = build_sql(plan)
294
  sql = validate_sql(sql)
295
 
 
38
 
39
 
40
  def resolve_operator(op, value):
41
+ # πŸ”΄ Normalize operator input
42
+ op = op.lower().strip()
43
+
44
+ OPERATOR_ALIASES = {
45
+ "=": "equals",
46
+ "==": "equals",
47
+ "eq": "equals",
48
+ "!=": "not_equals",
49
+ "<>": "not_equals",
50
+ ">": "greater_than",
51
+ "<": "less_than",
52
+ ">=": "greater_or_equal",
53
+ "<=": "less_or_equal",
54
+ "greater than": "greater_than",
55
+ "less than": "less_than"
56
+ }
57
+
58
+ op = OPERATOR_ALIASES.get(op, op)
59
+
60
  mapping = {
61
  "equals": "=",
62
  "not_equals": "!=",
 
76
 
77
  sql_op = mapping[op]
78
 
79
+ # πŸ”΄ Escape string values safely
80
+ def sql_escape(val):
81
+ return str(val).replace("'", "''")
82
+
83
  if op == "contains":
84
+ return sql_op, f"'%{sql_escape(value)}%'"
85
+
86
  if op == "starts_with":
87
+ return sql_op, f"'{sql_escape(value)}%'"
88
+
89
  if op == "ends_with":
90
+ return sql_op, f"'%{sql_escape(value)}'"
91
+
92
  if op in ("in", "not_in"):
93
  if not isinstance(value, list):
94
  raise ValueError("IN operator requires list")
95
+ escaped = [f"'{sql_escape(v)}'" for v in value]
96
+ return sql_op, f"({','.join(escaped)})"
97
+
98
+ return sql_op, f"'{sql_escape(value)}'"
99
 
 
100
 
101
 
102
  # =========================
 
184
  # INTENT PARSING (LLM)
185
  # =========================
186
 
187
+ def parse_intent(question, retries=2):
188
  meta = load_metadata()
189
 
190
+ # βœ… Build schema safely
191
  schema_description = "\n".join([
192
  f"{module}: {', '.join(fields)}"
193
  for module in meta["modules"]
 
201
  You are a SQL query planner.
202
 
203
  You MUST only use fields listed below.
204
+ If a field does not exist, OMIT it.
205
  Do NOT invent column names.
206
 
207
  Available schema:
208
  {schema_description}
209
 
210
+ Extract EXACTLY this JSON structure:
211
+ {{
212
+ "module": "string",
213
+ "select": ["string"],
214
+ "filters": [
215
+ {{
216
+ "field": "string",
217
+ "operator": "equals|not_equals|greater_than|less_than|contains|in",
218
+ "value": "string|number|array"
219
+ }}
220
+ ]
221
+ }}
222
+
223
+ Return ONLY valid minified JSON.
224
+ No explanation. No comments.
225
 
226
  User question:
227
  {question}
228
  """
229
 
230
+ for attempt in range(retries):
231
+ try:
232
+ res = client.chat.completions.create(
233
+ model="gpt-4.1-mini",
234
+ messages=[
235
+ {
236
+ "role": "system",
237
+ "content": "Return ONLY valid minified JSON. No text. No explanation."
238
+ },
239
+ {"role": "user", "content": prompt}
240
+ ],
241
+ temperature=0
242
+ )
243
 
244
+ content = res.choices[0].message.content
245
+ plan = safe_json_loads(content)
246
 
247
+ # βœ… NORMALIZE + STABILIZE INTENT SHAPE
248
+ if "module" in plan:
249
+ plan["module"] = plan["module"].lower().strip()
250
+
251
+ plan.setdefault("filters", [])
252
+ plan.setdefault("select", [])
253
 
254
+ return plan
255
+
256
+ except Exception:
257
+ if attempt == retries - 1:
258
+ raise ValueError("LLM failed to return valid JSON")
259
 
260
  # =========================
261
  # SQL GENERATOR
 
264
  def build_sql(plan):
265
  meta = load_metadata()
266
 
267
+ # πŸ”΄ Defensive: normalize module
268
+ module = plan["module"].lower().strip()
269
+
270
  if module not in meta["modules"]:
271
  raise ValueError(f"Unknown module: {module}")
272
 
 
307
  f"{table}.{column} {sql_op} {sql_value}"
308
  )
309
 
310
+ # πŸ”΄ FIX: safe WHERE clause
311
+ where_sql = f"WHERE {' AND '.join(where_clauses)}" if where_clauses else ""
312
+
313
  # ---------- FINAL SQL ----------
314
  sql = f"""
315
  SELECT {select_sql}
316
  FROM {base_table}
317
  {' '.join(joins)}
318
+ {where_sql}
319
  LIMIT 100
320
  """
321
 
 
323
 
324
 
325
 
326
+
327
  # =========================
328
  # VALIDATION
329
  # =========================
 
346
 
347
  def run(question):
348
  plan = parse_intent(question)
349
+
350
+ # πŸ”΄ REQUIRED: validate minimum intent
351
+ if not isinstance(plan, dict):
352
+ raise ValueError("Invalid intent format")
353
+
354
+ if "module" not in plan:
355
+ raise ValueError("Unable to determine module from question")
356
+
357
+ # Optional but safe defaults
358
+ plan.setdefault("filters", [])
359
+ plan.setdefault("select", [])
360
+
361
  sql = build_sql(plan)
362
  sql = validate_sql(sql)
363