Spaces:

nilotpaldhar2004
/

text2sql-chatbot

Sleeping

App Files Files Community

nilotpaldhar2004 commited on 23 days ago

Commit

1e5473b

verified ·

1 Parent(s): 1d456a9

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -21

app.py CHANGED Viewed

@@ -97,36 +97,55 @@ def generate_sql(question: str, schema: str) -> str:
     # 2. Advanced Rule-Based Shortcuts
-    # FILTERING (e.g., "is Paris", "where answer is Paris")
-    if "is" in q or "=" in q:
-        # Extract the value (e.g., "Paris")
-        value_match = re.search(r"is\s+(['\"]?\w+['\"]?)", q)
-        if value_match:
-            val = value_match.group(1).strip("'\"")
-            # If "question" is in the text, user probably wants the question for that answer
-            select_col = col_match[0] if "question" in q else "*"
-            filter_col = target_col if target_col else col_match[1]
             return f'SELECT "{select_col}" FROM {quoted} WHERE "{filter_col}" LIKE "%{val}%"'
-    # SELECT DISTINCT (List the names) vs COUNT DISTINCT (How many)
     if re.search(r'unique|distinct', q):
         col = target_col if target_col else col_match[0]
-        if re.search(r'show|list|get|give|what are', q):
             return f'SELECT DISTINCT "{col}" FROM {quoted} LIMIT 50'
         return f'SELECT COUNT(DISTINCT "{col}") FROM {quoted}'
-    # SPECIFIC COLUMN SELECTION (e.g., "show all answers")
-    if re.search(r'show|list|get', q) and target_col:
-        if not re.search(r'count|avg|mean|sum', q):
-            return f'SELECT "{target_col}" FROM {quoted} LIMIT 50'
-    # GROUP BY
-    if re.search(r'group.*by|per|each', q):
-        col = target_col if target_col else col_match[0]
-        return f'SELECT "{col}", COUNT(*) FROM {quoted} GROUP BY "{col}"'
-    # 3. T5 Fallback (Existing logic)
-    # ... [Keep your T5 code and Sanitization here] ...
     return sql
 def execute_sql(sql: str, db_bytes: bytes) -> list[dict]:

     # 2. Advanced Rule-Based Shortcuts
+    # FILTERING (e.g., "ans is Asia")
+    if "is" in q or "where" in q:
+        # Improved value extraction: look for the last word in the sentence
+        words = q.split()
+        val = words[-1].strip("?.!")
+        # Determine columns
+        select_col = col_match[0] if "question" in q else "*"
+        filter_col = target_col if target_col else (col_match[1] if len(col_match)>1 else col_match[0])
+        # Don't trigger if the 'value' is just a common instruction word
+        if val not in ["null", "not", "the", "average", "rows"]:
             return f'SELECT "{select_col}" FROM {quoted} WHERE "{filter_col}" LIKE "%{val}%"'
+    # SELECT DISTINCT (List) vs COUNT DISTINCT (Number)
     if re.search(r'unique|distinct', q):
         col = target_col if target_col else col_match[0]
+        if re.search(r'show|list|get|give', q):
             return f'SELECT DISTINCT "{col}" FROM {quoted} LIMIT 50'
         return f'SELECT COUNT(DISTINCT "{col}") FROM {quoted}'
+    # AGGREGATIONS
+    if re.search(r'average|mean|avg', q):
+        num_col = target_col if target_col else (col_match[1] if len(col_match)>1 else col_match[0])
+        return f'SELECT AVG("{num_col}") FROM {quoted}'
+    # LIMIT/SHOW
+    if re.search(r'show|display|get|first|top', q) and not target_col:
+        n_match = re.search(r'\d+', q)
+        return f'SELECT * FROM {quoted} LIMIT {n_match.group() if n_match else 10}'
+    # 3. Transformer Fallback (MANDATORY FIX)
+    # Ensure this part is NOT skipped
+    col_hint = ", ".join(col_match) if col_match else ""
+    prompt = f"Translate English to SQL: {question} | Table: {table_name} | Columns: {col_hint}"
+    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(DEVICE)
+    with torch.no_grad():
+        outputs = model.generate(**inputs, max_new_tokens=MAX_NEW_TOKENS, num_beams=4, early_stopping=True)
+    sql = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
+    # Sanitization
+    if "|" in sql: sql = sql.split("|")[-1].strip()
+    sql = re.sub(r'\bFROM\s+("?\w+"?)', f'FROM {quoted}', sql, flags=re.IGNORECASE)
+    if not re.search(r'\bSELECT\b', sql, re.IGNORECASE):
+        sql = f'SELECT * FROM {quoted} LIMIT 10'
     return sql
 def execute_sql(sql: str, db_bytes: bytes) -> list[dict]: