Spaces:

Shizu0n
/

phi3-mini-sql-generator-demo

Sleeping

App Files Files Community

Shizu0n commited on 12 days ago

Commit

737eaac

1 Parent(s): 8aa688e

feat: add schema-aware validation

Browse files

Files changed (6) hide show

README.md +3 -3
app.py +55 -10
model_io.py +25 -10
sql_tools.py +135 -2
tests/test_chatbot_behavior.py +87 -0
tests/test_chatbot_core.py +1 -1

README.md CHANGED Viewed

@@ -63,7 +63,7 @@ The product layer exists because a fine-tuned model is not the same thing as a r
 - Intent routing limits the model path to `SQL_QUERY`.
 - Deterministic schema tools handle explicit create/edit requests without loading the model.
 - High-confidence SQL templates handle simple ranking, aggregation, count, and comparison queries before the CPU model path.
-- SQL output is validated with `sqlparse` and separated from status/error text.
 - Lazy loading keeps startup cheap; the model is downloaded and loaded only when needed.
 - Load and generation timeouts protect the UI from indefinite waits.
 - Static fallbacks make unsupported behavior visible instead of laundering it as AI.
@@ -80,7 +80,7 @@ The product layer exists because a fine-tuned model is not the same thing as a r
 3. Enter the question in the chat input.
 4. Click **Send**.
 5. Review the result in `gr.Code(language="sql")` and the source/status message.
-   - The app shows a validation badge powered by `sqlparse`.
    - Known-source errors should still identify their source path; unknown-source errors should not fake certainty.
 ## Usage Examples
@@ -127,7 +127,7 @@ The probe prints JSON with pass/fail checks for static fallback, deterministic C
 set PYTHONPATH=. && pytest tests/test_chatbot_core.py tests/test_chatbot_behavior.py -q
 ```
-Current unit suite: **129 tests**. These tests avoid loading the 3.8B model and focus on routing, deterministic tools, prompt construction, model-output rejection, SQL validation, UI schema-context synchronization, and error handling.
 ## Run Locally

 - Intent routing limits the model path to `SQL_QUERY`.
 - Deterministic schema tools handle explicit create/edit requests without loading the model.
 - High-confidence SQL templates handle simple ranking, aggregation, count, and comparison queries before the CPU model path.
+- SQL output is validated with `sqlparse`; model output is also checked against the active `CREATE TABLE` schema before it can be shown as accepted model SQL.
 - Lazy loading keeps startup cheap; the model is downloaded and loaded only when needed.
 - Load and generation timeouts protect the UI from indefinite waits.
 - Static fallbacks make unsupported behavior visible instead of laundering it as AI.
 3. Enter the question in the chat input.
 4. Click **Send**.
 5. Review the result in `gr.Code(language="sql")` and the source/status message.
+   - The app shows a validation badge powered by `sqlparse` plus active-schema checks for model output.
    - Known-source errors should still identify their source path; unknown-source errors should not fake certainty.
 ## Usage Examples
 set PYTHONPATH=. && pytest tests/test_chatbot_core.py tests/test_chatbot_behavior.py -q
 ```
+Current unit suite: **136 tests**. These tests avoid loading the 3.8B model and focus on routing, deterministic tools, prompt construction, model-output rejection, active-schema validation, SQL validation, UI schema-context synchronization, and error handling.
 ## Run Locally

app.py CHANGED Viewed

@@ -1270,15 +1270,24 @@ def generate_response(message, chat_history, active_schema, loaded_key, conversa
             source_label=SOURCE_FINE_TUNED_MODEL,
         )
-    sql_text, _chat_text, validator = model_core.format_generation_result(generated_text)
     model_def = model_by_key(loaded_key)
     if not sql_text:
         return _response_tuple(
             chat_history,
             message,
             state,
-            "The fine-tuned model output was rejected because it was not SELECT/WITH SQL.",
-            f"{SOURCE_FINE_TUNED_MODEL}. Rejected non-SELECT/WITH model output from {model_def['model_id']} in {elapsed}s.",
             sql_text="",
             validator=validator,
             status_kind="error",
@@ -1308,12 +1317,12 @@ def normalize_sql_question_to_english(message, schema=""):
     return sql_core.normalize_sql_question_to_english(message, schema)
-def format_generation_result(text):
-    return model_core.format_generation_result(text)
-def validate_sql(sql_text):
-    return sql_core.validate_sql(sql_text)
 def create_table_from_message(message):
@@ -1359,7 +1368,7 @@ def sync_on_load():
 CSS = """
 @import url('https://fonts.googleapis.com/css2?family=Space+Mono:wght@400;500;700&display=swap');
-/* Prevent Gradio dark theme from overriding text in light-bg components */
 [class*="badge"],
 [class*="validator-"],
 [class*="model-tag"],
@@ -1386,6 +1395,12 @@ CSS = """
   --amber-text: #854F0B;
 }
 * {
   box-sizing: border-box;
 }
@@ -1397,6 +1412,36 @@ CSS = """
   background: var(--bg-base) !important;
   color: var(--text-primary) !important;
   font-family: Space Mono, ui-monospace, SFMono-Regular, Menlo, Consolas, monospace !important;
 }
 .app-shell {
@@ -1561,7 +1606,7 @@ CSS = """
 }
 .model-card.selected .model-score span {
-  color: var(--teal);
 }
 .model-score small,
@@ -1779,7 +1824,7 @@ CSS = """
 }
 .schema-context span {
-  color: var(--teal);
   font-size: 11px;
   font-weight: 500;
 }

             source_label=SOURCE_FINE_TUNED_MODEL,
         )
+    sql_text, _chat_text, validator = model_core.format_generation_result(
+        generated_text,
+        state.active_schema,
+    )
     model_def = model_by_key(loaded_key)
     if not sql_text:
+        rejection_reason = model_core.model_sql_rejection_reason(generated_text, state.active_schema)
+        rejection_detail = (
+            f"The fine-tuned model output was rejected because {rejection_reason}."
+            if rejection_reason
+            else "The fine-tuned model output was rejected by SQL/schema guardrails."
+        )
         return _response_tuple(
             chat_history,
             message,
             state,
+            rejection_detail,
+            f"{SOURCE_FINE_TUNED_MODEL}. Rejected non-SELECT/WITH model output or schema-invalid model output from {model_def['model_id']} in {elapsed}s.",
             sql_text="",
             validator=validator,
             status_kind="error",
     return sql_core.normalize_sql_question_to_english(message, schema)
+def format_generation_result(text, schema=""):
+    return model_core.format_generation_result(text, schema)
+def validate_sql(sql_text, schema=""):
+    return sql_core.validate_sql(sql_text, schema)
 def create_table_from_message(message):
 CSS = """
 @import url('https://fonts.googleapis.com/css2?family=Space+Mono:wght@400;500;700&display=swap');
+/* Keep app contrast stable regardless of Spaces light/dark host theme. */
 [class*="badge"],
 [class*="validator-"],
 [class*="model-tag"],
   --amber-text: #854F0B;
 }
+html,
+body,
+:root {
+  color-scheme: dark !important;
+}
 * {
   box-sizing: border-box;
 }
   background: var(--bg-base) !important;
   color: var(--text-primary) !important;
   font-family: Space Mono, ui-monospace, SFMono-Regular, Menlo, Consolas, monospace !important;
+  --body-text-color: var(--text-primary) !important;
+  --body-text-color-subdued: var(--text-secondary) !important;
+  --block-title-text-color: var(--text-secondary) !important;
+  --block-label-text-color: var(--text-secondary) !important;
+  --input-placeholder-color: var(--text-muted) !important;
+}
+.top-panel h1,
+.model-card h3,
+.model-score span,
+.evidence-copy h2,
+.evidence-card strong,
+.loading-title {
+  color: var(--text-primary) !important;
+}
+.top-panel p,
+.step-title,
+.model-card code,
+.model-score small,
+.model-card-footer,
+.evidence-copy p,
+.evidence-card span,
+.evidence-card small,
+.status-pill,
+.schema-context,
+.field-label,
+.preset-label,
+.message-box {
+  color: var(--text-secondary) !important;
 }
 .app-shell {
 }
 .model-card.selected .model-score span {
+  color: var(--teal) !important;
 }
 .model-score small,
 }
 .schema-context span {
+  color: var(--teal) !important;
   font-size: 11px;
   font-weight: 500;
 }

model_io.py CHANGED Viewed

@@ -59,33 +59,48 @@ def is_sql_like(text):
     return sql_tools.is_sql_like(text)
-def is_model_sql_allowed(text):
     text = (text or "").strip()
     if not text:
-        return False
     try:
         statements = [statement for statement in sqlparse.parse(text) if str(statement).strip()]
     except Exception:
-        return False
     if len(statements) != 1:
-        return False
     statement = statements[0]
     first_token = statement.token_first(skip_cm=True)
     starter = first_token.value.strip().upper() if first_token is not None else ""
     if starter not in MODEL_SQL_STARTERS or statement.get_type().upper() != "SELECT":
-        return False
     for token in statement.flatten():
         keyword = token.value.strip().upper()
         if token.ttype in (sql_tokens.Keyword.DDL, sql_tokens.Keyword.DML):
             if keyword in MODEL_SQL_FORBIDDEN_KEYWORDS:
-                return False
-    return "validator-ok" in sql_tools.validate_sql(text)
-def format_generation_result(text):
     cleaned = extract_sql_candidate(text)
-    if is_model_sql_allowed(cleaned):
-        return str(cleaned), "", sql_tools.validate_sql(cleaned)
     return "", "", sql_tools.validate_sql("")

     return sql_tools.is_sql_like(text)
+def model_sql_validation_issue(text, schema=""):
     text = (text or "").strip()
     if not text:
+        return "empty model output"
     try:
         statements = [statement for statement in sqlparse.parse(text) if str(statement).strip()]
     except Exception:
+        return "sqlparse could not parse model output"
     if len(statements) != 1:
+        return "model output contains multiple SQL statements"
     statement = statements[0]
     first_token = statement.token_first(skip_cm=True)
     starter = first_token.value.strip().upper() if first_token is not None else ""
     if starter not in MODEL_SQL_STARTERS or statement.get_type().upper() != "SELECT":
+        return "model output is not SELECT/WITH SQL"
     for token in statement.flatten():
         keyword = token.value.strip().upper()
         if token.ttype in (sql_tokens.Keyword.DDL, sql_tokens.Keyword.DML):
             if keyword in MODEL_SQL_FORBIDDEN_KEYWORDS:
+                return f"model output contains unsupported SQL keyword: {keyword}"
+    schema_issue = sql_tools.sql_schema_validation_issue(text, schema)
+    if schema_issue:
+        return schema_issue
+    validator = sql_tools.validate_sql(text, schema)
+    if "validator-ok" not in validator:
+        return "model output failed SQL/schema validation"
+    return ""
+def is_model_sql_allowed(text, schema=""):
+    return not model_sql_validation_issue(text, schema)
+def model_sql_rejection_reason(text, schema=""):
+    cleaned = extract_sql_candidate(text)
+    return model_sql_validation_issue(cleaned, schema)
+def format_generation_result(text, schema=""):
     cleaned = extract_sql_candidate(text)
+    if is_model_sql_allowed(cleaned, schema):
+        return str(cleaned), "", sql_tools.validate_sql(cleaned, schema)
     return "", "", sql_tools.validate_sql("")

sql_tools.py CHANGED Viewed

@@ -198,7 +198,133 @@ def is_sql_intent(message, schema=""):
     return bool(schema and is_sql_like(message))
-def validate_sql(sql_text):
     sql_text = (sql_text or "").strip()
     if not sql_text:
         return '<span class="validator-badge validator-empty">No SQL yet</span>'
@@ -224,7 +350,7 @@ def validate_sql(sql_text):
             f'<span class="validator-detail">First token: {escaped_token}</span>'
         )
     trailing_keyword = re.search(
-        r"\b(AND|ASC|BY|DESC|FROM|GROUP|HAVING|JOIN|LIMIT|NOT|ON|OR|ORDER|SELECT|WHERE)\s*;?\s*$",
         sql_text,
         flags=re.IGNORECASE,
     )
@@ -245,6 +371,13 @@ def validate_sql(sql_text):
             '<span class="validator-badge validator-warn">Check syntax</span>'
             f'<span class="validator-detail">Incomplete negated predicate: NOT {escaped_token}</span>'
         )
     return '<span class="validator-badge validator-ok">Valid SQL</span>'

     return bool(schema and is_sql_like(message))
+SQL_SCHEMA_FUNCTION_NAMES = {
+    "AVG",
+    "COALESCE",
+    "COUNT",
+    "LOWER",
+    "MAX",
+    "MIN",
+    "ROUND",
+    "SUM",
+    "UPPER",
+}
+SQL_ALIAS_STOPWORDS = {
+    "FULL",
+    "GROUP",
+    "HAVING",
+    "INNER",
+    "JOIN",
+    "LEFT",
+    "LIMIT",
+    "ON",
+    "ORDER",
+    "RIGHT",
+    "WHERE",
+}
+def _without_sql_literals(sql_text):
+    return re.sub(
+        r"'(?:''|[^'])*'|\"(?:\"\"|[^\"])*\"|\b\d+(?:\.\d+)?\b",
+        " ",
+        sql_text or "",
+    )
+def _identifier_names(sql_text):
+    try:
+        statements = [stmt for stmt in sqlparse.parse(sql_text) if str(stmt).strip()]
+    except Exception:
+        return []
+    names = []
+    for statement in statements:
+        flattened = list(statement.flatten())
+        for index, token in enumerate(flattened):
+            if token.ttype not in sqlparse.tokens.Name:
+                continue
+            previous_value = ""
+            next_value = ""
+            for previous in reversed(flattened[:index]):
+                if not previous.is_whitespace:
+                    previous_value = previous.value
+                    break
+            for next_token in flattened[index + 1:]:
+                if not next_token.is_whitespace:
+                    next_value = next_token.value
+                    break
+            names.append((token.value, previous_value, next_value))
+    return names
+def sql_schema_validation_issue(sql_text, schema):
+    table_name, columns = parse_create_table_schema(schema)
+    if not table_name or not columns:
+        return ""
+    expected_table = table_name.lower()
+    allowed_columns = {name.lower() for name, _column_type in columns}
+    scrubbed_sql = _without_sql_literals(sql_text)
+    cte_aliases = {
+        match.group(1).lower()
+        for match in re.finditer(
+            r"(?:WITH|,)\s+([A-Za-z_][\w]*)\s+AS\s*\(",
+            scrubbed_sql,
+            flags=re.IGNORECASE,
+        )
+    }
+    table_refs = [
+        match.group(1).lower()
+        for match in re.finditer(
+            r"\b(?:FROM|JOIN)\s+([A-Za-z_][\w]*)",
+            scrubbed_sql,
+            flags=re.IGNORECASE,
+        )
+    ]
+    if not table_refs:
+        return f"Model SQL does not reference active table: {table_name}"
+    for table_ref in table_refs:
+        if table_ref not in {expected_table, *cte_aliases}:
+            return f"Unknown table for active schema: {table_ref}"
+    table_aliases = set()
+    table_alias_pattern = (
+        r"\b(?:FROM|JOIN)\s+"
+        rf"({re.escape(table_name)})"
+        r"\s+(?:AS\s+)?([A-Za-z_][\w]*)"
+    )
+    for match in re.finditer(table_alias_pattern, scrubbed_sql, flags=re.IGNORECASE):
+        alias = match.group(2)
+        if alias.upper() not in SQL_ALIAS_STOPWORDS:
+            table_aliases.add(alias.lower())
+    output_aliases = {
+        match.group(1).lower()
+        for match in re.finditer(r"\bAS\s+([A-Za-z_][\w]*)", scrubbed_sql, flags=re.IGNORECASE)
+    }
+    allowed_non_columns = {
+        expected_table,
+        *cte_aliases,
+        *table_aliases,
+        *output_aliases,
+    }
+    for name, _previous_value, next_value in _identifier_names(sql_text):
+        normalized_name = name.lower()
+        if next_value == ".":
+            continue
+        if normalized_name in allowed_non_columns:
+            continue
+        if name.upper() in SQL_SCHEMA_FUNCTION_NAMES:
+            continue
+        if normalized_name in allowed_columns:
+            continue
+        return f"Unknown column for active schema: {name}"
+    return ""
+def validate_sql(sql_text, schema=""):
     sql_text = (sql_text or "").strip()
     if not sql_text:
         return '<span class="validator-badge validator-empty">No SQL yet</span>'
             f'<span class="validator-detail">First token: {escaped_token}</span>'
         )
     trailing_keyword = re.search(
+        r"\b(AND|BY|FROM|GROUP|HAVING|JOIN|LIMIT|NOT|ON|OR|ORDER|SELECT|WHERE)\s*;?\s*$",
         sql_text,
         flags=re.IGNORECASE,
     )
             '<span class="validator-badge validator-warn">Check syntax</span>'
             f'<span class="validator-detail">Incomplete negated predicate: NOT {escaped_token}</span>'
         )
+    schema_issue = sql_schema_validation_issue(sql_text, schema)
+    if schema_issue:
+        escaped_issue = html.escape(schema_issue)
+        return (
+            '<span class="validator-badge validator-warn">Check schema</span>'
+            f'<span class="validator-detail">{escaped_issue}</span>'
+        )
     return '<span class="validator-badge validator-ok">Valid SQL</span>'

tests/test_chatbot_behavior.py CHANGED Viewed

@@ -500,6 +500,55 @@ def test_sql_model_rejects_non_sql_output_as_chat_capability(monkeypatch):
     assert "Rejected non-SELECT/WITH model output" in status_html(result)
 def test_sql_intent_detected():
     assert app.is_sql_intent("What is the average salary per department?", app.PRESETS["employees"])
     assert app.is_sql_intent("what is the most expensive product?", app.PRESETS["products"])
@@ -595,6 +644,16 @@ def test_format_generation_result_accepts_with_query():
     assert "validator-ok" in validator
 # ---------------------------------------------------------------------------
 # validate_sql — starters beyond SELECT
 # ---------------------------------------------------------------------------
@@ -625,6 +684,34 @@ def test_validate_sql_bare_negated_predicate_returns_warn():
     )
 def test_validate_sql_empty_returns_empty_badge():
     assert app.validate_sql("") == app.EMPTY_VALIDATOR

     assert "Rejected non-SELECT/WITH model output" in status_html(result)
+def test_sql_model_rejects_hallucinated_schema_column(monkeypatch):
+    app._model = types.SimpleNamespace(generation_config=types.SimpleNamespace(eos_token_id=0))
+    app._tokenizer = types.SimpleNamespace(eos_token_id=0, pad_token_id=0)
+    app._current_model_id = app.FINE_TUNED_MODEL_ID
+    monkeypatch.setattr(
+        app,
+        "_generate_model_text",
+        lambda *a, **k: ("SELECT email FROM employees WHERE name = 'Alice';", 1),
+    )
+    result = app.generate_response(
+        "find employees named Alice",
+        [],
+        app.PRESETS["employees"],
+        app.FINE_TUNED_MODEL_KEY,
+        None,
+    )
+    assert sql_output(result) == ""
+    assert "email" in assistant_text(result)
+    assert "schema-invalid model output" in status_html(result)
+    assert app.SOURCE_FINE_TUNED_MODEL in status_html(result)
+def test_sql_model_accepts_valid_schema_column_output(monkeypatch):
+    app._model = types.SimpleNamespace(generation_config=types.SimpleNamespace(eos_token_id=0))
+    app._tokenizer = types.SimpleNamespace(eos_token_id=0, pad_token_id=0)
+    app._current_model_id = app.FINE_TUNED_MODEL_ID
+    monkeypatch.setattr(
+        app,
+        "_generate_model_text",
+        lambda *a, **k: ("SELECT * FROM employees WHERE name = 'Alice';", 1),
+    )
+    result = app.generate_response(
+        "find employees named Alice",
+        [],
+        app.PRESETS["employees"],
+        app.FINE_TUNED_MODEL_KEY,
+        None,
+    )
+    assert "SELECT * FROM employees WHERE name = 'Alice';" in sql_output(result)
+    assert "validator-ok" in result[5]
+    assert app.SOURCE_FINE_TUNED_MODEL in status_html(result)
 def test_sql_intent_detected():
     assert app.is_sql_intent("What is the average salary per department?", app.PRESETS["employees"])
     assert app.is_sql_intent("what is the most expensive product?", app.PRESETS["products"])
     assert "validator-ok" in validator
+def test_format_generation_result_rejects_unknown_column_against_schema():
+    sql, chat, validator = app.format_generation_result(
+        "SELECT email FROM employees;",
+        app.PRESETS["employees"],
+    )
+    assert sql == ""
+    assert chat == ""
+    assert validator == app.EMPTY_VALIDATOR
 # ---------------------------------------------------------------------------
 # validate_sql — starters beyond SELECT
 # ---------------------------------------------------------------------------
     )
+def test_validate_sql_rejects_unknown_schema_column():
+    assert "validator-warn" in app.validate_sql(
+        "SELECT email FROM employees;",
+        app.PRESETS["employees"],
+    )
+def test_validate_sql_rejects_unknown_schema_table():
+    assert "validator-warn" in app.validate_sql(
+        "SELECT name FROM departments;",
+        app.PRESETS["employees"],
+    )
+def test_validate_sql_rejects_date_when_not_in_schema():
+    assert "validator-warn" in app.validate_sql(
+        "SELECT date FROM employees;",
+        app.PRESETS["employees"],
+    )
+def test_validate_sql_accepts_schema_alias_and_output_alias():
+    assert "validator-ok" in app.validate_sql(
+        "SELECT e.name, COUNT(*) AS total FROM employees e GROUP BY e.name ORDER BY total DESC;",
+        app.PRESETS["employees"],
+    )
 def test_validate_sql_empty_returns_empty_badge():
     assert app.validate_sql("") == app.EMPTY_VALIDATOR

tests/test_chatbot_core.py CHANGED Viewed

@@ -100,7 +100,7 @@ def test_zoologico_transcript_with_mocked_sql_model(monkeypatch):
     def fake_generate(prompt, generation_kind):
         assert generation_kind == app.model_core.SQL_GENERATION
         assert "CREATE TABLE zoologico" in prompt
-        return "SELECT * FROM zoologico WHERE cidade = 'Sao Paulo';", 1
     monkeypatch.setattr(app, "_generate_model_text", fake_generate)

     def fake_generate(prompt, generation_kind):
         assert generation_kind == app.model_core.SQL_GENERATION
         assert "CREATE TABLE zoologico" in prompt
+        return "SELECT * FROM zoologico WHERE city = 'Sao Paulo';", 1
     monkeypatch.setattr(app, "_generate_model_text", fake_generate)