Spaces:
Sleeping
Sleeping
Upload engine.py
Browse files
engine.py
CHANGED
|
@@ -38,6 +38,25 @@ def load_metadata():
|
|
| 38 |
|
| 39 |
|
| 40 |
def resolve_operator(op, value):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
mapping = {
|
| 42 |
"equals": "=",
|
| 43 |
"not_equals": "!=",
|
|
@@ -57,18 +76,27 @@ def resolve_operator(op, value):
|
|
| 57 |
|
| 58 |
sql_op = mapping[op]
|
| 59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
if op == "contains":
|
| 61 |
-
return sql_op, f"'%{value}%'"
|
|
|
|
| 62 |
if op == "starts_with":
|
| 63 |
-
return sql_op, f"'{value}%'"
|
|
|
|
| 64 |
if op == "ends_with":
|
| 65 |
-
return sql_op, f"'%{value}'"
|
|
|
|
| 66 |
if op in ("in", "not_in"):
|
| 67 |
if not isinstance(value, list):
|
| 68 |
raise ValueError("IN operator requires list")
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
-
return sql_op, f"'{value}'"
|
| 72 |
|
| 73 |
|
| 74 |
# =========================
|
|
@@ -156,10 +184,10 @@ def safe_json_loads(text):
|
|
| 156 |
# INTENT PARSING (LLM)
|
| 157 |
# =========================
|
| 158 |
|
| 159 |
-
def parse_intent(question):
|
| 160 |
meta = load_metadata()
|
| 161 |
|
| 162 |
-
# β
Build schema safely
|
| 163 |
schema_description = "\n".join([
|
| 164 |
f"{module}: {', '.join(fields)}"
|
| 165 |
for module in meta["modules"]
|
|
@@ -173,39 +201,61 @@ def parse_intent(question):
|
|
| 173 |
You are a SQL query planner.
|
| 174 |
|
| 175 |
You MUST only use fields listed below.
|
| 176 |
-
If a field does not exist,
|
| 177 |
Do NOT invent column names.
|
| 178 |
|
| 179 |
Available schema:
|
| 180 |
{schema_description}
|
| 181 |
|
| 182 |
-
Extract:
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
|
| 189 |
User question:
|
| 190 |
{question}
|
| 191 |
"""
|
| 192 |
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
|
|
|
|
|
|
|
| 202 |
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
|
|
|
| 208 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
|
| 210 |
# =========================
|
| 211 |
# SQL GENERATOR
|
|
@@ -214,7 +264,9 @@ User question:
|
|
| 214 |
def build_sql(plan):
|
| 215 |
meta = load_metadata()
|
| 216 |
|
| 217 |
-
|
|
|
|
|
|
|
| 218 |
if module not in meta["modules"]:
|
| 219 |
raise ValueError(f"Unknown module: {module}")
|
| 220 |
|
|
@@ -255,12 +307,15 @@ def build_sql(plan):
|
|
| 255 |
f"{table}.{column} {sql_op} {sql_value}"
|
| 256 |
)
|
| 257 |
|
|
|
|
|
|
|
|
|
|
| 258 |
# ---------- FINAL SQL ----------
|
| 259 |
sql = f"""
|
| 260 |
SELECT {select_sql}
|
| 261 |
FROM {base_table}
|
| 262 |
{' '.join(joins)}
|
| 263 |
-
|
| 264 |
LIMIT 100
|
| 265 |
"""
|
| 266 |
|
|
@@ -268,6 +323,7 @@ def build_sql(plan):
|
|
| 268 |
|
| 269 |
|
| 270 |
|
|
|
|
| 271 |
# =========================
|
| 272 |
# VALIDATION
|
| 273 |
# =========================
|
|
@@ -290,6 +346,18 @@ def validate_sql(sql):
|
|
| 290 |
|
| 291 |
def run(question):
|
| 292 |
plan = parse_intent(question)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 293 |
sql = build_sql(plan)
|
| 294 |
sql = validate_sql(sql)
|
| 295 |
|
|
|
|
| 38 |
|
| 39 |
|
| 40 |
def resolve_operator(op, value):
|
| 41 |
+
# π΄ Normalize operator input
|
| 42 |
+
op = op.lower().strip()
|
| 43 |
+
|
| 44 |
+
OPERATOR_ALIASES = {
|
| 45 |
+
"=": "equals",
|
| 46 |
+
"==": "equals",
|
| 47 |
+
"eq": "equals",
|
| 48 |
+
"!=": "not_equals",
|
| 49 |
+
"<>": "not_equals",
|
| 50 |
+
">": "greater_than",
|
| 51 |
+
"<": "less_than",
|
| 52 |
+
">=": "greater_or_equal",
|
| 53 |
+
"<=": "less_or_equal",
|
| 54 |
+
"greater than": "greater_than",
|
| 55 |
+
"less than": "less_than"
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
op = OPERATOR_ALIASES.get(op, op)
|
| 59 |
+
|
| 60 |
mapping = {
|
| 61 |
"equals": "=",
|
| 62 |
"not_equals": "!=",
|
|
|
|
| 76 |
|
| 77 |
sql_op = mapping[op]
|
| 78 |
|
| 79 |
+
# π΄ Escape string values safely
|
| 80 |
+
def sql_escape(val):
|
| 81 |
+
return str(val).replace("'", "''")
|
| 82 |
+
|
| 83 |
if op == "contains":
|
| 84 |
+
return sql_op, f"'%{sql_escape(value)}%'"
|
| 85 |
+
|
| 86 |
if op == "starts_with":
|
| 87 |
+
return sql_op, f"'{sql_escape(value)}%'"
|
| 88 |
+
|
| 89 |
if op == "ends_with":
|
| 90 |
+
return sql_op, f"'%{sql_escape(value)}'"
|
| 91 |
+
|
| 92 |
if op in ("in", "not_in"):
|
| 93 |
if not isinstance(value, list):
|
| 94 |
raise ValueError("IN operator requires list")
|
| 95 |
+
escaped = [f"'{sql_escape(v)}'" for v in value]
|
| 96 |
+
return sql_op, f"({','.join(escaped)})"
|
| 97 |
+
|
| 98 |
+
return sql_op, f"'{sql_escape(value)}'"
|
| 99 |
|
|
|
|
| 100 |
|
| 101 |
|
| 102 |
# =========================
|
|
|
|
| 184 |
# INTENT PARSING (LLM)
|
| 185 |
# =========================
|
| 186 |
|
| 187 |
+
def parse_intent(question, retries=2):
|
| 188 |
meta = load_metadata()
|
| 189 |
|
| 190 |
+
# β
Build schema safely
|
| 191 |
schema_description = "\n".join([
|
| 192 |
f"{module}: {', '.join(fields)}"
|
| 193 |
for module in meta["modules"]
|
|
|
|
| 201 |
You are a SQL query planner.
|
| 202 |
|
| 203 |
You MUST only use fields listed below.
|
| 204 |
+
If a field does not exist, OMIT it.
|
| 205 |
Do NOT invent column names.
|
| 206 |
|
| 207 |
Available schema:
|
| 208 |
{schema_description}
|
| 209 |
|
| 210 |
+
Extract EXACTLY this JSON structure:
|
| 211 |
+
{{
|
| 212 |
+
"module": "string",
|
| 213 |
+
"select": ["string"],
|
| 214 |
+
"filters": [
|
| 215 |
+
{{
|
| 216 |
+
"field": "string",
|
| 217 |
+
"operator": "equals|not_equals|greater_than|less_than|contains|in",
|
| 218 |
+
"value": "string|number|array"
|
| 219 |
+
}}
|
| 220 |
+
]
|
| 221 |
+
}}
|
| 222 |
+
|
| 223 |
+
Return ONLY valid minified JSON.
|
| 224 |
+
No explanation. No comments.
|
| 225 |
|
| 226 |
User question:
|
| 227 |
{question}
|
| 228 |
"""
|
| 229 |
|
| 230 |
+
for attempt in range(retries):
|
| 231 |
+
try:
|
| 232 |
+
res = client.chat.completions.create(
|
| 233 |
+
model="gpt-4.1-mini",
|
| 234 |
+
messages=[
|
| 235 |
+
{
|
| 236 |
+
"role": "system",
|
| 237 |
+
"content": "Return ONLY valid minified JSON. No text. No explanation."
|
| 238 |
+
},
|
| 239 |
+
{"role": "user", "content": prompt}
|
| 240 |
+
],
|
| 241 |
+
temperature=0
|
| 242 |
+
)
|
| 243 |
|
| 244 |
+
content = res.choices[0].message.content
|
| 245 |
+
plan = safe_json_loads(content)
|
| 246 |
|
| 247 |
+
# β
NORMALIZE + STABILIZE INTENT SHAPE
|
| 248 |
+
if "module" in plan:
|
| 249 |
+
plan["module"] = plan["module"].lower().strip()
|
| 250 |
+
|
| 251 |
+
plan.setdefault("filters", [])
|
| 252 |
+
plan.setdefault("select", [])
|
| 253 |
|
| 254 |
+
return plan
|
| 255 |
+
|
| 256 |
+
except Exception:
|
| 257 |
+
if attempt == retries - 1:
|
| 258 |
+
raise ValueError("LLM failed to return valid JSON")
|
| 259 |
|
| 260 |
# =========================
|
| 261 |
# SQL GENERATOR
|
|
|
|
| 264 |
def build_sql(plan):
|
| 265 |
meta = load_metadata()
|
| 266 |
|
| 267 |
+
# π΄ Defensive: normalize module
|
| 268 |
+
module = plan["module"].lower().strip()
|
| 269 |
+
|
| 270 |
if module not in meta["modules"]:
|
| 271 |
raise ValueError(f"Unknown module: {module}")
|
| 272 |
|
|
|
|
| 307 |
f"{table}.{column} {sql_op} {sql_value}"
|
| 308 |
)
|
| 309 |
|
| 310 |
+
# π΄ FIX: safe WHERE clause
|
| 311 |
+
where_sql = f"WHERE {' AND '.join(where_clauses)}" if where_clauses else ""
|
| 312 |
+
|
| 313 |
# ---------- FINAL SQL ----------
|
| 314 |
sql = f"""
|
| 315 |
SELECT {select_sql}
|
| 316 |
FROM {base_table}
|
| 317 |
{' '.join(joins)}
|
| 318 |
+
{where_sql}
|
| 319 |
LIMIT 100
|
| 320 |
"""
|
| 321 |
|
|
|
|
| 323 |
|
| 324 |
|
| 325 |
|
| 326 |
+
|
| 327 |
# =========================
|
| 328 |
# VALIDATION
|
| 329 |
# =========================
|
|
|
|
| 346 |
|
| 347 |
def run(question):
|
| 348 |
plan = parse_intent(question)
|
| 349 |
+
|
| 350 |
+
# π΄ REQUIRED: validate minimum intent
|
| 351 |
+
if not isinstance(plan, dict):
|
| 352 |
+
raise ValueError("Invalid intent format")
|
| 353 |
+
|
| 354 |
+
if "module" not in plan:
|
| 355 |
+
raise ValueError("Unable to determine module from question")
|
| 356 |
+
|
| 357 |
+
# Optional but safe defaults
|
| 358 |
+
plan.setdefault("filters", [])
|
| 359 |
+
plan.setdefault("select", [])
|
| 360 |
+
|
| 361 |
sql = build_sql(plan)
|
| 362 |
sql = validate_sql(sql)
|
| 363 |
|