Spaces:

tjhalanigrid
/

text2sql-demo

Sleeping

App Files Files Community

tjhalanigrid commited on 11 days ago

Commit

862cf68

1 Parent(s): 14b487f

wdf

Browse files

Files changed (37) hide show

README.md +8 -7
app.py +209 -499
best_rlhf_model/README.md +9 -0
best_rlhf_model/adapter_config.json +21 -0
{int8_dynamic/tokenizer → best_rlhf_model}/merges.txt +0 -0
{int8_dynamic/tokenizer → best_rlhf_model}/special_tokens_map.json +0 -0
{int8_dynamic/tokenizer → best_rlhf_model}/tokenizer_config.json +0 -1
best_rlhf_model/vocab.json +0 -0
int8_dynamic/meta.json +0 -7
int8_dynamic/model.pt +0 -3
int8_dynamic/tokenizer/tokenizer.json +0 -0
int8_dynamic/tokenizer/vocab.json +0 -0
requirements.txt +3 -5
scripts/benchmark_parallel_reward.py +0 -202
scripts/benchmark_quantization.py +0 -108
scripts/benchmark_rollout_generation.py +0 -66
scripts/error_dashboard.py +0 -99
scripts/evaluate.py +0 -170
scripts/plot_task2.py +0 -58
scripts/plot_task3.py +0 -15
scripts/plot_task3_plotly.py +0 -103
scripts/quantize_export.py +0 -86
scripts/quantized_infer_harness.py +0 -46
src/constrained_decoding.py +0 -1058
src/constrained_decoding_sample.py +0 -516
src/eval_rl_fixed.py +329 -619
src/evaluate_without_constraied.py +0 -503
src/execution_reward copy.py +0 -831
src/execution_reward.py +322 -744
src/execution_reward_soft.py +0 -211
src/load_lora_model.py +8 -70
src/quantization_utils.py +0 -222
src/quantized_text2sql_engine.py +0 -243
src/schema_encoder.py +49 -33
src/schema_utils.py +0 -222
src/sql_validator.py +32 -314
src/text2sql_engine.py +36 -682

README.md CHANGED Viewed

@@ -1,13 +1,14 @@
 ---
 title: Text2sql Demo
-emoji: 📊
-colorFrom: blue
-colorTo: green
 sdk: gradio
-sdk_version: 5.8.0
 app_file: app.py
 pinned: false
 license: mit
-python_version: 3.10.13
-short_description: 'Text to SQL with RLHF'
----

 ---
 title: Text2sql Demo
+emoji: 🐨
+colorFrom: yellow
+colorTo: pink
 sdk: gradio
+sdk_version: 6.8.0
 app_file: app.py
 pinned: false
 license: mit
+short_description: 'to show the gradio interface '
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-GRADIO DEMO UI - LAZY LOADING EDITION
 NL → SQL → Result Table
 """
@@ -7,563 +7,273 @@ import gradio as gr
 import pandas as pd
 import re
 import time
-import os
-import torch
-import sys
-import json
-import subprocess
-import base64
-import io
-from pathlib import Path
-from typing import Iterator
-# ==========================================
-# RELATIVE PATH RESOLUTION (GLOBAL)
-# ==========================================
-try:
-    PROJECT_ROOT = Path(__file__).resolve().parent
-except NameError:
-    PROJECT_ROOT = Path(".").resolve()
-if (PROJECT_ROOT / "data" / "database").exists():
-    DB_ROOT = PROJECT_ROOT / "data" / "database"
-else:
-    DB_ROOT = PROJECT_ROOT / "final_databases"
-def get_db_path(db_id: str) -> str:
-    path1 = DB_ROOT / db_id / f"{db_id}.sqlite"
-    path2 = DB_ROOT / f"{db_id}.sqlite"
-    return str(path1) if path1.exists() else str(path2)
-# ==========================================
-# 🔥 CUDA MOCK PATCH FOR MAC (MPS) / CPU
-# ==========================================
-if not torch.cuda.is_available():
-    class MockCUDAEvent:
-        def __init__(self, enable_timing=False, blocking=False, interprocess=False):
-            self.t = 0.0
-        def record(self, stream=None):
-            self.t = time.perf_counter()
-        def elapsed_time(self, end_event):
-            return (end_event.t - self.t) * 1000.0
-    torch.cuda.Event = MockCUDAEvent
-    if not hasattr(torch.cuda, 'synchronize'):
-        torch.cuda.synchronize = lambda: None
-# ==========================================
-# IMPORTS & ENGINE SETUP
-# ==========================================
-from src.quantized_text2sql_engine import QuantizedText2SQLEngine
-from src.schema_encoder import SchemaEncoder
-DEFAULT_QUANT_ARTIFACT = str(PROJECT_ROOT / "int8_dynamic")
-_ENGINE_CACHE = {}
-_QUERY_LOG = []
-_PERF_LOG = []
-_SUCCESS_LOG = []
-_OP_STATS = {
-    "SELECT": {"ok": 0, "fail": 0}, "WHERE": {"ok": 0, "fail": 0}, "JOIN": {"ok": 0, "fail": 0},
-    "GROUP_BY": {"ok": 0, "fail": 0}, "ORDER_BY": {"ok": 0, "fail": 0}, "HAVING": {"ok": 0, "fail": 0}, "LIMIT": {"ok": 0, "fail": 0},
-}
-def get_quant_engine(artifact_dir: str, use_constrained: bool = False, exec_workers: int = 8, use_cache: bool = True):
-    key = (artifact_dir, bool(use_constrained), int(exec_workers), bool(use_cache))
-    if key not in _ENGINE_CACHE:
-        try:
-            _ENGINE_CACHE[key] = QuantizedText2SQLEngine(artifact_dir, device="cpu", use_constrained=bool(use_constrained), exec_workers=int(exec_workers), use_cache=bool(use_cache))
-        except TypeError:
-            _ENGINE_CACHE[key] = QuantizedText2SQLEngine(artifact_dir)
-    return _ENGINE_CACHE[key]
-# 🚨 LAZY LOADING: We DO NOT load the model here! We only load the fast Schema Encoder.
-quant_engine = None
-try:
-    schema_encoder = SchemaEncoder(DB_ROOT)
-except Exception as e:
-    print(f"Warning: SchemaEncoder failed to load: {e}")
-    schema_encoder = None
 SAMPLES = [
-    ("Show 10 distinct employee first names.", "chinook_1"), ("Which artist has the most albums?", "chinook_1"),
-    ("List all the tracks that belong to the 'Rock' genre.", "chinook_1"), ("What are the names of all the cities?", "flight_1"),
-    ("Find the flight number and cost of the cheapest flight.", "flight_1"), ("List the airlines that fly out of New York.", "flight_1"),
-    ("Which campus was opened between 1935 and 1939?", "csu_1"), ("Count the number of students in each department.", "college_2"),
-    ("List the names of all clubs.", "club_1"), ("How many members does each club have?", "club_1"),
-    ("Show the names of all cinemas.", "cinema"), ("Which cinema has the most screens?", "cinema")
 ]
 SAMPLE_QUESTIONS = [q[0] for q in SAMPLES]
 def explain_sql(sql):
-    if not sql: return ""
     explanation = "This SQL query retrieves information from the database."
     sql_lower = sql.lower()
-    if "join" in sql_lower: explanation += "\n• It combines data from multiple tables using JOIN."
-    if "where" in sql_lower: explanation += "\n• It filters rows using a WHERE condition."
-    if "group by" in sql_lower: explanation += "\n• It groups results using GROUP BY."
-    if "order by" in sql_lower: explanation += "\n• It sorts the results using ORDER BY."
-    if "limit" in sql_lower: explanation += "\n• It limits the number of returned rows."
     return explanation
-def sql_ops(sql: str) -> list[str]:
-    s = (sql or "").lower()
-    ops = ["SELECT"]
-    if " where " in f" {s} ": ops.append("WHERE")
-    if " join " in f" {s} ": ops.append("JOIN")
-    if " group by " in f" {s} ": ops.append("GROUP_BY")
-    if " order by " in f" {s} ": ops.append("ORDER_BY")
-    if " having " in f" {s} ": ops.append("HAVING")
-    if " limit " in f" {s} ": ops.append("LIMIT")
-    return ops
-def classify_error(sql: str, error_msg: str | None = None, *, timed_out: bool = False):
-    s = (sql or "").lower()
-    m = (error_msg or "").lower()
-    if timed_out or "interrupted" in m or "timeout" in m: return "timeout"
-    if not s.strip().startswith(("select", "with")): return "syntax_error"
-    if " join " in f" {s} " and " on " not in f" {s} ": return "missing_join"
-    if " where " in f" {s} " and not any(op in s for op in ["=", ">", "<", " in ", " like ", " between ", " is null", " is not null"]): return "wrong_where"
-    if ("is null" in s or "is not null" in s) and ("no such column" in m or "misuse" in m): return "null_handling"
-    if "no such table" in m: return "missing_table"
-    if "no such column" in m: return "missing_column"
-    if "ambiguous column name" in m: return "ambiguous_column"
-    if "datatype mismatch" in m or "type mismatch" in m: return "type_mismatch"
-    if "misuse of aggregate" in m or "misuse of aggregate function" in m: return "wrong_aggregation"
-    if "syntax error" in m: return "syntax_error"
-    if "near" in m and "syntax error" in m: return "syntax_error"
-    if "runtime" in m or "constraint failed" in m: return "runtime_error"
-    return "other"
-def get_hint(error_type):
-    hints = {
-        "missing_join": "Check JOIN conditions between tables.", "wrong_aggregation": "Use proper aggregation like avg(column).",
-        "wrong_where": "Check WHERE condition syntax.", "syntax_error": "Ensure SQL starts with SELECT.",
-        "missing_table": "Use only tables from the provided schema.", "missing_column": "Use only columns from the provided schema.",
-        "ambiguous_column": "Disambiguate by using table.column.", "timeout": "Query took too long; simplify joins.", "other": "Review SQL logic."
-    }
-    return hints.get(error_type, "Review query.")
-def is_relevant_to_schema(question, db_id):
-    if schema_encoder is None: return True
-    try: raw_schema = schema_encoder.structured_schema(db_id).lower()
-    except: return True
-    schema_words = set(re.findall(r'[a-z0-9_]+', raw_schema))
-    q_words = re.findall(r'[a-z0-9_]+', question.lower())
-    stop_words = {"show", "list", "all", "what", "is", "the", "how", "many", "count", "find", "get", "me", "a", "an", "of", "in", "for", "from", "with", "which", "are", "there", "give", "tell", "details", "info", "data", "everything"}
-    meaningful_q_words = [w for w in q_words if w not in stop_words and not w.isdigit()]
-    if not meaningful_q_words: return True
-    for word in meaningful_q_words:
-        singular_word = word[:-1] if word.endswith('s') else word
-        if word in schema_words or singular_word in schema_words: return True
-    return False
 def run_query(method, sample_q, custom_q, db_id):
-    global quant_engine
-    # 🚨 LAZY LOADING: We load the heavy AI model ONLY when the button is clicked.
-    if quant_engine is None:
-        print(f"First request detected! Loading AI model from {DEFAULT_QUANT_ARTIFACT}...", flush=True)
-        try:
-            quant_engine = get_quant_engine(DEFAULT_QUANT_ARTIFACT, use_constrained=False, exec_workers=8, use_cache=True)
-            if quant_engine is None:
-                return "-- ❌ ENGINE CRASH", pd.DataFrame(columns=["Error"]), "Failed to load model. Did you move the tokenizer files and add config.json to int8_dynamic/?"
-        except Exception as e:
-            return f"-- ❌ ENGINE CRASH\n-- {str(e)}", pd.DataFrame(columns=["Error Status"]), f"Critical failure loading model: {e}"
-    def _log(error_type: str, *, question: str, db_id_val: str, sql: str = "", error_msg: str = "") -> None:
-        _QUERY_LOG.append({"t": time.time(), "db_id": str(db_id_val), "question": str(question), "sql": str(sql), "error_type": str(error_type), "error_msg": str(error_msg)})
-    def _perf_log(payload: dict) -> None:
-        _PERF_LOG.append(payload)
-        if len(_PERF_LOG) > 1000: del _PERF_LOG[:200]
-    raw_question = sample_q if method == "💡 Pick a Sample" else custom_q
-    if not raw_question or str(raw_question).strip() == "":
-        return "-- No input provided", pd.DataFrame(columns=["Warning"]), "⚠️ Please enter a question."
-    if not db_id or str(db_id).strip() == "":
-        return "-- No database selected", pd.DataFrame(columns=["Warning"]), "⚠️ Please select a database."
-    typo_corrections = [(r'\bshaw\b', 'show'), (r'\bshw\b', 'show'), (r'\bsho\b', 'show'), (r'\blsit\b', 'list'), (r'\blis\b', 'list'), (r'\bfidn\b', 'find'), (r'\bfnd\b', 'find'), (r'\bgte\b', 'get')]
-    question = str(raw_question)
-    for bad, good in typo_corrections: question = re.sub(bad, good, question, flags=re.IGNORECASE)
-    q_lower = question.strip().lower()
-    if len(q_lower.split()) < 2:
-        _log("gibberish", question=question, db_id_val=str(db_id), error_msg="gibberish filtered")
-        return "-- Input Blocked", pd.DataFrame(columns=["Warning"]), "⚠️ Please enter a clear, meaningful natural language question (more than one word)."
-    if re.search(r'\b(delete|update|insert|drop|alter|truncate)\b', q_lower):
-        _log("blocked_dml", question=question, db_id_val=str(db_id), error_msg="DML blocked")
-        return "-- ❌ BLOCKED: Data Modification", pd.DataFrame(columns=["Security Alert"]), "🛑 Security Alert: Modifying or deleting data is strictly prohibited."
-    if not is_relevant_to_schema(question, db_id):
-        _log("out_of_domain", question=question, db_id_val=str(db_id), error_msg="out of domain")
-        return "-- ❌ BLOCKED: Out of Domain", pd.DataFrame(columns=["Domain Alert"]), f"🛑 Relevance Alert: I don't see anything related to your question in the '{db_id}' schema."
     start_time = time.time()
-    t0 = time.perf_counter()
-    ui_warnings = ""
     try:
-        try:
-            result = quant_engine.ask(question, str(db_id), num_beams=4, max_new_tokens=120, timeout_s=2.0)
-        except TypeError:
-            result = quant_engine.ask(question, str(db_id))
     except Exception as e:
-        _log("backend_crash", question=question, db_id_val=str(db_id), error_msg=str(e))
-        return f"-- ❌ BACKEND CRASH\n-- {str(e)}", pd.DataFrame(columns=["Error Status"]), f"❌ CRITICAL BACKEND CRASH:\n{str(e)}"
-    final_sql = str(result.get("sql", ""))
-    model_sql = final_sql
-    num_match = re.search(r'\b(?:show|list|top|limit|get|first|last|sample|of)\s+(?:[a-zA-Z_]+\s+)?(\d+)\b', q_lower)
-    if not num_match and q_lower.startswith(("show", "list", "get")):
-        num_match = re.search(r'\b(\d+)\b', q_lower)
-    if num_match and final_sql:
-        limit_val = num_match.group(1)
-        final_sql = re.sub(rf"(?i)\s*(?:where|having|and)?\s*count\s*\(\s*\*\s*\)\s*=\s*{limit_val}", "", final_sql)
-        final_sql = re.sub(rf"(?i)\s*(?:where|and)\s+[a-zA-Z0-9_.]+\s*=\s*['\"]?{limit_val}['\"]?", "", final_sql)
-        final_sql = re.sub(r"(?i)\s*where\s*$", "", final_sql)
-        final_sql = re.sub(r"(?i)\s*where\s+(group by|order by|limit)", r" \1", final_sql)
-        agg_kws = ["most", "top", "highest", "lowest", "count", "many", "group", "frequent", "popular"]
-        if not any(k in q_lower for k in agg_kws):
-            final_sql = re.sub(r"(?i)\s*group by\s+[a-zA-Z0-9_.]+\s*order by\s+count\(\*\)\s*(?:desc|asc)?", "", final_sql)
-            final_sql = re.sub(r"(?i)\s*order by\s+count\(\*\)\s*(?:desc|asc)?", "", final_sql)
-            final_sql = re.sub(r"(?i),\s*count\(\*\)", "", final_sql)
-            final_sql = re.sub(r"(?i)count\(\*\)\s*,", "", final_sql)
-        if "group by" in final_sql.lower() and not re.search(r'(?i)\b(count|sum|avg|max|min)\b\(', final_sql):
-            final_sql = re.sub(r"(?i)\s*group by\s+[a-zA-Z0-9_.]+", "", final_sql)
-        if "limit" not in final_sql.lower():
-            final_sql = f"{final_sql.strip().rstrip(';')} LIMIT {limit_val}"
-    # Execution
-    from src.sql_validator import validate_sql_schema
-    db_path = get_db_path(str(db_id))
-    try: strict_valid, _ = validate_sql_schema(final_sql, db_path)
-    except Exception: strict_valid = False
-    error_msg = None
-    rows, cols = [], []
-    sqlite_success = False
-    try:
-        rows, cols = quant_engine._execute_one(final_sql, db_path, timeout_s=2.0)
-        sqlite_success = True
-    except Exception as e:
-        error_msg = str(e)
-        sqlite_success = False
-    if not sqlite_success and model_sql and model_sql != final_sql:
-        try:
-            alt_rows, alt_cols = quant_engine._execute_one(model_sql, db_path, timeout_s=2.0)
-            final_sql = model_sql
-            rows, cols = alt_rows, alt_cols
-            sqlite_success = True
-            error_msg = None
-        except Exception: pass
-    valid = sqlite_success
-    if error_msg or not valid:
-        et = classify_error(final_sql, str(error_msg or ""), timed_out=("interrupted" in str(error_msg or "").lower()))
-        _log(et, question=str(question), db_id_val=str(db_id), sql=str(final_sql), error_msg=str(error_msg or "Execution failed"))
-    latency = round(time.time() - start_time, 3)
-    t1 = time.perf_counter()
-    engine_stats_after = quant_engine.stats() if hasattr(quant_engine, 'stats') else {}
-    perf = {
-        "db_id": str(db_id), "use_constrained_decoding": False, "num_beams": 4,
-        "latency_total_ms": round((t1 - t0) * 1000.0, 2), "constraint_ok": bool(strict_valid), "has_error": bool(error_msg),
-        "exec_cache_hit_rate": float(engine_stats_after.get("exec_cache_hit_rate", 0.0) or 0.0),
-    }
-    _perf_log(perf)
-    window = _PERF_LOG[-50:]
-    avg_ms = sum(float(x.get("latency_total_ms", 0.0) or 0.0) for x in window) / len(window) if window else 0.0
-    constraint_rate = sum(1 for x in window if x.get("constraint_ok")) / len(window) if window else 0.0
-    perf_block = (
-        "\n\n---\nPerformance (task impact)\n"
-        f"- Total latency (ms): {perf['latency_total_ms']}\n"
-        f"- Strict Python Validator OK (Task 3): {perf['constraint_ok']}\n"
-        f"- Exec cache hit-rate (Task 1/5): {round(perf['exec_cache_hit_rate'], 3)}\n"
-        f"- Rolling avg latency last 50 (ms): {round(avg_ms, 2)}\n"
-        f"- Rolling constraint rate last 50: {round(constraint_rate, 3)}\n"
-    )
-    if error_msg or not valid:
-        display_sql = final_sql if final_sql.strip() else "-- ❌ INVALID SQL"
-        explanation = f"{ui_warnings}❌ Error Details:\n\n"
-        if error_msg: explanation += f"{error_msg}\n\n"
-        error_type = classify_error(final_sql, str(error_msg or ""))
-        explanation += f"Error Type: {error_type}\nHint: {get_hint(error_type)}"
-        explanation += perf_block
-        ops = sql_ops(final_sql)
-        for op in ops:
-            if op in _OP_STATS: _OP_STATS[op]["fail"] += 1
-        return display_sql, pd.DataFrame(columns=["Execution Notice"]), explanation
-    safe_cols = cols if cols else ["Result"]
-    explanation = f"{ui_warnings}✅ Query executed successfully\n\nRows returned: {len(rows)}\nExecution Time: {latency} sec\n\n{explain_sql(final_sql)}{perf_block}"
-    ops = sql_ops(final_sql)
-    for op in ops:
-        if op in _OP_STATS: _OP_STATS[op]["ok"] += 1
-    _SUCCESS_LOG.append({"t": time.time(), "db_id": str(db_id), "question": question, "sql": final_sql, "ops": ops})
     limit_match = re.search(r'LIMIT\s+(\d+)', final_sql, re.IGNORECASE)
-    if limit_match and len(rows) < int(limit_match.group(1)):
-        explanation += f"\n\nℹ️ Query allowed up to {int(limit_match.group(1))} rows but only {len(rows)} matched."
-    return final_sql, pd.DataFrame(rows, columns=safe_cols), explanation
-def task1_benchmark(n_rollouts: int, max_workers: int) -> Iterator[tuple[str, str]]:
-    project_root = str(PROJECT_ROOT)
-    env = os.environ.copy()
-    env["PYTHONPATH"] = project_root + (os.pathsep + env["PYTHONPATH"] if env.get("PYTHONPATH") else "")
-    env.setdefault("MPLBACKEND", "Agg")
-    env.setdefault("MPLCONFIGDIR", "/tmp/mplconfig")
-    try: os.makedirs(env["MPLCONFIGDIR"], exist_ok=True)
-    except Exception: pass
-    cmd = [sys.executable, "-u", "scripts/benchmark_parallel_reward.py", "--n", str(int(n_rollouts)), "--max-workers", str(int(max_workers)), "--skip-profile"]
-    proc = subprocess.Popen(cmd, cwd=project_root, env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1)
-    last_yield = time.perf_counter()
-    lines: list[str] = []
-    yield "Running Task 1 benchmark...\n", "<i>Running...</i>"
-    assert proc.stdout is not None
-    for line in proc.stdout:
-        lines.append(line)
-        now = time.perf_counter()
-        if now - last_yield >= 0.5:
-            last_yield = now
-            yield "".join(lines[-200:]).strip(), "<i>Running...</i>"
-    proc.wait()
-    out = "".join(lines).strip()
-    plot_path = str(PROJECT_ROOT / "results" / "task1_plot.png")
-    if os.path.exists(plot_path):
-        try:
-            b64 = base64.b64encode(Path(plot_path).read_bytes()).decode("ascii")
-            yield out, f"<img src='data:image/png;base64,{b64}' style='max-width: 100%; border: 1px solid #e2e8f0; border-radius: 8px;' />"
-            return
-        except Exception:
-            yield out, f"<pre>{plot_path}</pre>"
-            return
-    yield out, "<i>No plot generated</i>"
-def task2_dashboard_structured():
-    if not _QUERY_LOG:
-        empty_counts = pd.DataFrame(columns=["error_type", "count", "hint"])
-        empty_recent = pd.DataFrame(columns=["time", "db_id", "error_type", "question", "error_msg"])
-        return empty_counts, empty_recent, gr.update(choices=[], value=None)
-    counts = {}
-    for r in _QUERY_LOG[-1000:]:
-        k = r.get("error_type") or "other"
-        counts[k] = counts.get(k, 0) + 1
-    rows = [{"error_type": k, "count": int(v), "hint": get_hint(k)} for k, v in sorted(counts.items(), key=lambda x: (-x[1], x[0]))]
-    counts_df = pd.DataFrame(rows)
-    recent = []
-    for r in _QUERY_LOG[-100:]:
-        ts = r.get("t")
-        try: ts_s = time.strftime("%H:%M:%S", time.localtime(float(ts))) if ts else ""
-        except Exception: ts_s = ""
-        recent.append({"time": ts_s, "db_id": r.get("db_id", ""), "error_type": r.get("error_type", ""), "question": r.get("question", ""), "error_msg": r.get("error_msg", "")})
-    recent_df = pd.DataFrame(recent)
-    choices = [str(x["error_type"]) for x in rows]
-    default = choices[0] if choices else None
-    return counts_df, recent_df, gr.update(choices=choices, value=default)
-def task2_error_examples(error_type: str) -> str:
-    if not error_type: return ""
-    hint = get_hint(error_type)
-    matches = [r for r in reversed(_QUERY_LOG) if (r.get("error_type") or "") == str(error_type)][:3]
-    if not matches: return f"Error type: {error_type}\nHint: {hint}\n\nNo examples yet."
-    out = [f"Error type: {error_type}", f"Hint: {hint}", ""]
-    for i, r in enumerate(matches, 1):
-        out.extend([f"Example {i}", f"DB: {r.get('db_id','')}", f"Q: {r.get('question','')}", f"SQL: {r.get('sql','')}", f"Msg: {r.get('error_msg','')}", ""])
-    return "\n".join(out).strip()
-def _plot_op_stats_html() -> str:
-    try:
-        import matplotlib.pyplot as plt
-        labels = list(_OP_STATS.keys())
-        oks = [int(_OP_STATS[k]["ok"]) for k in labels]
-        fails = [int(_OP_STATS[k]["fail"]) for k in labels]
-        fig, ax = plt.subplots(figsize=(9, 3.5))
-        x = list(range(len(labels)))
-        ax.bar(x, oks, label="ok", color="#16a34a")
-        ax.bar(x, fails, bottom=oks, label="fail", color="#dc2626")
-        ax.set_xticks(x)
-        ax.set_xticklabels(labels, rotation=30, ha="right")
-        ax.set_title("Success/Failure by SQL operation")
-        ax.legend()
-        fig.tight_layout()
-        buf = io.BytesIO()
-        fig.savefig(buf, format="png", dpi=160)
-        plt.close(fig)
-        b64 = base64.b64encode(buf.getvalue()).decode("ascii")
-        return f"<img src='data:image/png;base64,{b64}' style='max-width: 100%; border: 1px solid #e2e8f0; border-radius: 8px;' />"
-    except Exception as e: return f"<pre>Plot error: {e}</pre>"
-def task2_ops_table():
-    rows = []
-    for op, d in _OP_STATS.items():
-        ok = int(d.get("ok", 0))
-        fail = int(d.get("fail", 0))
-        total = ok + fail
-        rows.append({"op": op, "ok": ok, "fail": fail, "total": total, "success_rate": (ok / total) if total else 0.0})
-    return pd.DataFrame(rows), _plot_op_stats_html()
 def toggle_input_method(method, current_sample):
     if method == "💡 Pick a Sample":
         db = next((db for q, db in SAMPLES if q == current_sample), "chinook_1")
-        return (gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(value=db, interactive=False))
-    return (gr.update(visible=False), gr.update(visible=True), gr.update(visible=True), gr.update(interactive=True))
 def load_sample(selected_question):
-    if not selected_question: return gr.update()
-    return gr.update(value=next((db for q, db in SAMPLES if q == selected_question), "chinook_1"))
 def clear_inputs():
-    return (gr.update(value="💡 Pick a Sample"), gr.update(value=SAMPLE_QUESTIONS[0], visible=True), gr.update(visible=False), gr.update(value="", visible=False), gr.update(value="chinook_1", interactive=False), "", pd.DataFrame(), "")
 def update_schema(db_id):
-    if not db_id or schema_encoder is None: return ""
     try:
-        raw_schema = schema_encoder.structured_schema(db_id)
         html_output = "<div style='max-height: 250px; overflow-y: auto; background: #f8fafc; padding: 12px; border-radius: 8px; border: 1px solid #e2e8f0; font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace; font-size: 0.9em; line-height: 1.6;'>"
         for line in raw_schema.strip().split('\n'):
             line = line.strip()
             if not line: continue
             match = re.search(r'^([a-zA-Z0-9_]+)\s*\((.*)\)', line)
-            if match: html_output += f"<div style='margin-bottom: 8px;'><strong style='color: #0f172a; font-size: 1.05em; font-weight: 800;'>{match.group(1).upper()}</strong> <span style='color: #64748b;'>( {match.group(2).lower()} )</span></div>"
-            else: html_output += f"<div style='color: #475569;'>{line}</div>"
         html_output += "</div>"
         return html_output
-    except Exception as e: return f"<div style='color: red;'>Error loading schema: {str(e)}</div>"
 # =========================
 # UI LAYOUT
 # =========================
-with gr.Blocks(title="Text-to-SQL RLHF") as demo:
-    gr.HTML("""
         <div style="text-align: center; background-color: #e0e7ff; padding: 20px; border-radius: 10px; margin-bottom: 20px; border: 1px solid #c7d2fe;">
             <h1 style="color: #3730a3; margin-top: 0; margin-bottom: 10px; font-size: 2.2em;"> Text-to-SQL using RLHF + Execution Reward</h1>
             <p style="color: #4f46e5; font-size: 1.1em; margin: 0;">Convert Natural Language to SQL, strictly validated and safely executed on local SQLite databases.</p>
         </div>
-    """)
-    DBS = sorted(["flight_1", "student_assessment", "store_1", "bike_1", "book_2", "chinook_1", "academic", "aircraft", "car_1", "cinema", "club_1", "csu_1", "college_1", "college_2", "company_1", "company_employee", "customer_complaints", "department_store", "employee_hire_evaluation", "museum_visit", "products_for_hire", "restaurant_1", "school_finance", "shop_membership", "small_bank_1", "student_1", "tvshow", "voter_1", "world_1"])
-    with gr.Tabs():
-        with gr.Tab("Inference"):
             with gr.Row():
-                with gr.Column(scale=1):
-                    gr.Markdown("### 1. Configuration & Input")
-                    input_method = gr.Radio(choices=["💡 Pick a Sample", "✍️ Type my own"], value="💡 Pick a Sample", label="How do you want to ask?")
-                    sample_dropdown = gr.Dropdown(choices=SAMPLE_QUESTIONS, value=SAMPLE_QUESTIONS[0], label="Select a Sample Question", info="The database will be selected automatically.", visible=True)
-                    type_own_warning = gr.Markdown("**⚠️ Please select a Database first, then type your custom question below:**", visible=False)
-                    gr.Markdown("---")
-                    db_id = gr.Dropdown(choices=DBS, value="chinook_1", label="Select Database", interactive=False)
-                    custom_question = gr.Textbox(label="Ask your Custom Question", placeholder="Type your own question here...", lines=3, visible=False)
-                    gr.Markdown("#### 📋 Database Structure")
-                    gr.HTML("<p style='font-size: 0.85em; color: #64748b; margin-top: -10px; margin-bottom: 5px;'>Use these exact names! Table names are <strong>Dark</strong>, Column names are <span style='color: #94a3b8;'>Light</span>.</p>")
-                    schema_display = gr.HTML(value=update_schema("chinook_1"))
-                    with gr.Row():
-                        clear_btn = gr.Button("🗑️ Clear", variant="secondary")
-                        run_btn = gr.Button(" Generate & Run SQL", variant="primary")
-                with gr.Column(scale=2):
-                    gr.Markdown("### 2. Execution Results")
-                    final_sql = gr.Code(language="sql", label="Final Executed SQL")
-                    result_table = gr.Dataframe(label="Query Result Table", interactive=False, wrap=True)
-                    explanation = gr.Textbox(label="AI Explanation + Execution Details", lines=8)
-        with gr.Tab("Diagnostics"):
-            gr.Markdown("## Diagnostics & Telemetry")
-            with gr.Accordion("Task 1: Parallel Reward Benchmark", open=False):
-                gr.Markdown("*(Simulates the heavy RLHF training workload by running hundreds of complex SQL queries concurrently to test SQLite multi-threading performance.)*")
-                t1_n = gr.Number(value=20, precision=0, label="Rollouts (n)")
-                t1_workers = gr.Number(value=10, precision=0, label="Max workers")
-                t1_run = gr.Button("Run Task 1 benchmark")
-                t1_out = gr.Textbox(label="Output", lines=12)
-                t1_plot = gr.HTML(label="Plot (if generated)")
-                t1_run.click(fn=task1_benchmark, inputs=[t1_n, t1_workers], outputs=[t1_out, t1_plot])
-            with gr.Accordion("Task 2: Error Dashboard", open=True):
-                gr.Markdown("*(Live telemetry tracking the most common SQL failures. Populates automatically when queries fail in the Inference tab.)*")
-                t2_refresh = gr.Button("Refresh dashboard")
-                t2_counts = gr.Dataframe(label="Error counts", interactive=False, wrap=True)
-                t2_recent = gr.Dataframe(label="Recent errors", interactive=False, wrap=True)
-                t2_type = gr.Dropdown(choices=[], value=None, label="Select error type")
-                t2_examples = gr.Textbox(label="Examples + hint", lines=10)
-                t2_refresh.click(fn=task2_dashboard_structured, inputs=[], outputs=[t2_counts, t2_recent, t2_type])
-                t2_type.change(fn=task2_error_examples, inputs=[t2_type], outputs=[t2_examples])
-            with gr.Accordion("Task 2: Clause Telemetry", open=False):
-                gr.Markdown("*(Analyzes which specific SQL clauses—SELECT, WHERE, JOIN, etc.—are most prone to errors during natural language generation.)*")
-                t2_ops_refresh = gr.Button("Refresh SQL-op stats")
-                t2_ops_tbl = gr.Dataframe(label="Success/failure by op", interactive=False, wrap=True)
-                t2_ops_plot = gr.HTML(label="Op plot")
-                t2_ops_refresh.click(fn=task2_ops_table, inputs=[], outputs=[t2_ops_tbl, t2_ops_plot])
-    # EVENT BINDING: The .then() forces the diagnostic tab to update live in the background!
-    input_method.change(fn=toggle_input_method, inputs=[input_method, sample_dropdown], outputs=[sample_dropdown, type_own_warning, custom_question, db_id])
     sample_dropdown.change(fn=load_sample, inputs=[sample_dropdown], outputs=[db_id])
     db_id.change(fn=update_schema, inputs=[db_id], outputs=[schema_display])
     run_btn.click(
-        fn=run_query,
-        inputs=[input_method, sample_dropdown, custom_question, db_id],
         outputs=[final_sql, result_table, explanation]
-    ).then(
-        fn=task2_dashboard_structured, inputs=[], outputs=[t2_counts, t2_recent, t2_type]
-    ).then(
-        fn=task2_ops_table, inputs=[], outputs=[t2_ops_tbl, t2_ops_plot]
     )
-    clear_btn.click(fn=clear_inputs, inputs=[], outputs=[input_method, sample_dropdown, type_own_warning, custom_question, db_id, final_sql, result_table, explanation])
 if __name__ == "__main__":
-    server_name = os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0")
-    base_port = int(os.environ.get("GRADIO_SERVER_PORT", 7860))
-    max_retries = 10
-    for port in range(base_port, base_port + max_retries):
-        try:
-            print(f"Attempting to start Gradio UI on {server_name}:{port}...", flush=True)
-            demo.launch(server_name=server_name, server_port=port)
-            break  # If successful, exit the loop
-        except OSError as e:
-            if "Cannot find empty port" in str(e) or "Address already in use" in str(e):
-                print(f"⚠️ Port {port} is in use, trying next port...")
-                continue
-            else:
-                # If it's a different OSError, raise it normally
-                raise e
-    else:
-        print(f"❌ Could not find an open port between {base_port} and {base_port + max_retries - 1}.")

 """
+GRADIO DEMO UI
 NL → SQL → Result Table
 """
 import pandas as pd
 import re
 import time
+from src.text2sql_engine import get_engine
+engine = get_engine()
+# =========================
+# SAMPLE QUESTIONS DATA
+# =========================
 SAMPLES = [
+    ("Show 10 distinct employee first names.", "chinook_1"),
+    ("Which artist has the most albums?", "chinook_1"),
+    ("List all the tracks that belong to the 'Rock' genre.", "chinook_1"),
+    ("What are the names of all the cities?", "flight_1"),
+    ("Find the flight number and cost of the cheapest flight.", "flight_1"),
+    ("List the airlines that fly out of New York.", "flight_1"),
+    ("Which campus was opened between 1935 and 1939?", "csu_1"),
+    ("Count the number of students in each department.", "college_2"),
+    ("List the names of all clubs.", "club_1"),
+    ("How many members does each club have?", "club_1"),
+    ("Show the names of all cinemas.", "cinema"),
+    ("Which cinema has the most screens?", "cinema")
 ]
 SAMPLE_QUESTIONS = [q[0] for q in SAMPLES]
+# =========================
+# SQL EXPLAINER
+# =========================
 def explain_sql(sql):
     explanation = "This SQL query retrieves information from the database."
     sql_lower = sql.lower()
+    if "join" in sql_lower:
+        explanation += "\n• It combines data from multiple tables using JOIN."
+    if "where" in sql_lower:
+        explanation += "\n• It filters rows using a WHERE condition."
+    if "group by" in sql_lower:
+        explanation += "\n• It groups results using GROUP BY."
+    if "order by" in sql_lower:
+        explanation += "\n• It sorts the results using ORDER BY."
+    if "limit" in sql_lower:
+        explanation += "\n• It limits the number of returned rows."
     return explanation
+# =========================
+# CORE FUNCTIONS
+# =========================
 def run_query(method, sample_q, custom_q, db_id):
+    # 1. Safely determine the question
+    question = sample_q if method == "💡 Pick a Sample" else custom_q
+    # 2. Validate inputs before hitting the engine
+    if not question or str(question).strip() == "":
+        return "", pd.DataFrame(), "⚠️ Please enter a question."
+    if not db_id or str(db_id).strip() == "":
+        return "", pd.DataFrame(), "⚠️ Please select a database."
     start_time = time.time()
+    # 3. GIANT SAFETY NET to prevent infinite loading spinners
     try:
+        result = engine.ask(str(question), str(db_id))
     except Exception as e:
+        return "", pd.DataFrame(), f"❌ CRITICAL BACKEND CRASH:\n{str(e)}"
+    final_sql = result.get("sql", "")
+    error_msg = result.get("error", None)
+    rows = result.get("rows", [])
+    cols = result.get("columns", [])
+    end_time = time.time()
+    latency = round(end_time - start_time, 3)
+    # 4. Handle SQL generation/execution errors
+    if error_msg:
+        return final_sql, pd.DataFrame(), f"❌ SQL Error:\n{error_msg}"
+    # 5. Handle Zero Rows gracefully
+    if not rows:
+        df = pd.DataFrame(columns=cols if cols else [])
+        explanation = f"✅ Query executed successfully\n\nRows returned: 0\nExecution Time: {latency} sec\n\n{explain_sql(final_sql)}"
+        return final_sql, df, explanation
+    # 6. Handle successful execution
+    df = pd.DataFrame(rows, columns=cols)
+    actual_rows = len(rows)
+    explanation = f"✅ Query executed successfully\n\nRows returned: {actual_rows}\nExecution Time: {latency} sec\n\n{explain_sql(final_sql)}"
     limit_match = re.search(r'LIMIT\s+(\d+)', final_sql, re.IGNORECASE)
+    if limit_match:
+        requested_limit = int(limit_match.group(1))
+        if actual_rows < requested_limit:
+            explanation += f"\n\nℹ️ Query allowed up to {requested_limit} rows but only {actual_rows} matched."
+    return final_sql, df, explanation
 def toggle_input_method(method, current_sample):
     if method == "💡 Pick a Sample":
+        # Find the DB matching the current sample (fallback to 'chinook_1')
         db = next((db for q, db in SAMPLES if q == current_sample), "chinook_1")
+        return (
+            gr.update(visible=True),   # Show sample_dropdown
+            gr.update(visible=False),  # Hide type_own_warning
+            gr.update(visible=False),  # Hide custom_question
+            gr.update(value=db, interactive=False) # Lock and reset db_id
+        )
+    else:
+        return (
+            gr.update(visible=False),  # Hide sample_dropdown
+            gr.update(visible=True),   # Show type_own_warning
+            gr.update(visible=True),   # Show custom_question
+            gr.update(interactive=True) # Unlock db_id
+        )
 def load_sample(selected_question):
+    if not selected_question:
+        return gr.update()
+    db = next((db for q, db in SAMPLES if q == selected_question), "chinook_1")
+    return gr.update(value=db)
 def clear_inputs():
+    return (
+        gr.update(value="💡 Pick a Sample"),
+        gr.update(value=SAMPLE_QUESTIONS[0], visible=True), # sample_dropdown
+        gr.update(visible=False),                           # type_own_warning
+        gr.update(value="", visible=False),                 # custom_question
+        gr.update(value="chinook_1", interactive=False),    # db_id
+        "", pd.DataFrame(), ""                              # Outputs (SQL, Table, Explanation)
+    )
 def update_schema(db_id):
+    if not db_id:
+        return ""
     try:
+        raw_schema = engine.get_schema(db_id)
         html_output = "<div style='max-height: 250px; overflow-y: auto; background: #f8fafc; padding: 12px; border-radius: 8px; border: 1px solid #e2e8f0; font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace; font-size: 0.9em; line-height: 1.6;'>"
         for line in raw_schema.strip().split('\n'):
             line = line.strip()
             if not line: continue
             match = re.search(r'^([a-zA-Z0-9_]+)\s*\((.*)\)', line)
+            if match:
+                table_name = match.group(1).upper()
+                columns = match.group(2).lower()
+                html_output += f"<div style='margin-bottom: 8px;'><strong style='color: #0f172a; font-size: 1.05em; font-weight: 800;'>{table_name}</strong> <span style='color: #64748b;'>( {columns} )</span></div>"
+            else:
+                html_output += f"<div style='color: #475569;'>{line}</div>"
         html_output += "</div>"
         return html_output
+    except Exception as e:
+        return f"<div style='color: red;'>Error loading schema: {str(e)}</div>"
 # =========================
 # UI LAYOUT
 # =========================
+with gr.Blocks(theme=gr.themes.Soft(), title="Text-to-SQL RLHF") as demo:
+    gr.HTML(
+        """
         <div style="text-align: center; background-color: #e0e7ff; padding: 20px; border-radius: 10px; margin-bottom: 20px; border: 1px solid #c7d2fe;">
             <h1 style="color: #3730a3; margin-top: 0; margin-bottom: 10px; font-size: 2.2em;"> Text-to-SQL using RLHF + Execution Reward</h1>
             <p style="color: #4f46e5; font-size: 1.1em; margin: 0;">Convert Natural Language to SQL, strictly validated and safely executed on local SQLite databases.</p>
         </div>
+        """
+    )
+    DBS = sorted([
+        "flight_1", "student_assessment", "store_1", "bike_1", "book_2", "chinook_1",
+        "academic", "aircraft", "car_1", "cinema", "club_1", "csu_1",
+        "college_1", "college_2", "company_1", "company_employee",
+        "customer_complaints", "department_store", "employee_hire_evaluation",
+        "museum_visit", "products_for_hire", "restaurant_1",
+        "school_finance", "shop_membership", "small_bank_1",
+        "soccer_1", "student_1", "tvshow", "voter_1", "world_1"
+    ])
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("### 1. Configuration & Input")
+            input_method = gr.Radio(
+                choices=["💡 Pick a Sample", "✍️ Type my own"],
+                value="💡 Pick a Sample",
+                label="How do you want to ask?"
+            )
+            # --- SAMPLE SECTION ---
+            sample_dropdown = gr.Dropdown(
+                choices=SAMPLE_QUESTIONS,
+                value=SAMPLE_QUESTIONS[0],
+                label="Select a Sample Question",
+                info="The database will be selected automatically.",
+                visible=True
+            )
+            # --- CUSTOM TYPE WARNING ---
+            type_own_warning = gr.Markdown(
+                "**⚠️ Please select a Database first, then type your custom question below:**",
+                visible=False
+            )
+            gr.Markdown("---")
+            # --- DATABASE SELECTION (Moved Up) ---
+            db_id = gr.Dropdown(
+                choices=DBS,
+                value="chinook_1",
+                label="Select Database",
+                interactive=False
+            )
+            # --- CUSTOM QUESTION BOX ---
+            custom_question = gr.Textbox(
+                label="Ask your Custom Question",
+                placeholder="Type your own question here...",
+                lines=3,
+                visible=False
+            )
+            gr.Markdown("#### 📋 Database Structure")
+            gr.HTML("<p style='font-size: 0.85em; color: #64748b; margin-top: -10px; margin-bottom: 5px;'>Use these exact names! Table names are <strong>Dark</strong>, Column names are <span style='color: #94a3b8;'>Light</span>.</p>")
+            schema_display = gr.HTML(value=update_schema("chinook_1"))
             with gr.Row():
+                clear_btn = gr.Button("🗑️ Clear", variant="secondary")
+                run_btn = gr.Button(" Generate & Run SQL", variant="primary")
+        with gr.Column(scale=2):
+            gr.Markdown("### 2. Execution Results")
+            final_sql = gr.Code(language="sql", label="Final Executed SQL")
+            result_table = gr.Dataframe(label="Query Result Table", interactive=False, wrap=True)
+            explanation = gr.Textbox(label="AI Explanation + Execution Details", lines=8)
+    # =========================
+    # EVENT LISTENERS
+    # =========================
+    # Updated to handle the new Markdown warning toggle
+    input_method.change(
+        fn=toggle_input_method,
+        inputs=[input_method, sample_dropdown],
+        outputs=[sample_dropdown, type_own_warning, custom_question, db_id]
+    )
     sample_dropdown.change(fn=load_sample, inputs=[sample_dropdown], outputs=[db_id])
     db_id.change(fn=update_schema, inputs=[db_id], outputs=[schema_display])
     run_btn.click(
+        fn=run_query,
+        inputs=[input_method, sample_dropdown, custom_question, db_id],
         outputs=[final_sql, result_table, explanation]
     )
+    clear_btn.click(
+        fn=clear_inputs,
+        inputs=[],
+        # Output list matches the updated clear_inputs() return values
+        outputs=[input_method, sample_dropdown, type_own_warning, custom_question, db_id, final_sql, result_table, explanation]
+    )
 if __name__ == "__main__":
+    demo.launch()

best_rlhf_model/README.md ADDED Viewed

	@@ -0,0 +1,9 @@

+---
+library_name: peft
+---
+## Training procedure
+### Framework versions
+- PEFT 0.4.0

best_rlhf_model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "Salesforce/codet5-base",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "revision": null,
+  "target_modules": [
+    "q",
+    "v"
+  ],
+  "task_type": "SEQ_2_SEQ_LM"
+}

{int8_dynamic/tokenizer → best_rlhf_model}/merges.txt RENAMED Viewed

File without changes

{int8_dynamic/tokenizer → best_rlhf_model}/special_tokens_map.json RENAMED Viewed

File without changes

{int8_dynamic/tokenizer → best_rlhf_model}/tokenizer_config.json RENAMED Viewed

@@ -954,6 +954,5 @@
   "pad_token": "<pad>",
   "sep_token": "</s>",
   "tokenizer_class": "RobertaTokenizer",
-  "trim_offsets": true,
   "unk_token": "<unk>"
 }

   "pad_token": "<pad>",
   "sep_token": "</s>",
   "tokenizer_class": "RobertaTokenizer",
   "unk_token": "<unk>"
 }

best_rlhf_model/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

int8_dynamic/meta.json DELETED Viewed

@@ -1,7 +0,0 @@
-{
-  "mode": "int8_dynamic",
-  "base_model": "Salesforce/codet5-base",
-  "adapter_path": "checkpoints/best_rlhf_model_2",
-  "created_at_s": 1774418718.320342,
-  "estimated_model_bytes": 98804736
-}

int8_dynamic/model.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f398e044cd49fc84553b746d26ad79beb1dd565d90cf8f6f5e50d27f48d08228
-size 322871519

int8_dynamic/tokenizer/tokenizer.json DELETED Viewed

The diff for this file is too large to render. See raw diff

int8_dynamic/tokenizer/vocab.json DELETED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt CHANGED Viewed

@@ -1,10 +1,8 @@
-gradio==5.8.0
 pandas
 sqlparse
 transformers
-torch --index-url https://download.pytorch.org/whl/cpu
 peft
 trl
-sentencepiece
-matplotlib
-huggingface_hub

+gradio
 pandas
 sqlparse
 transformers
+torch
 peft
 trl
+sentencepiece

scripts/benchmark_parallel_reward.py DELETED Viewed

@@ -1,202 +0,0 @@
-import os
-# Ensure headless-safe matplotlib + writable cache when called from Gradio/subprocess.
-os.environ.setdefault("MPLBACKEND", "Agg")
-os.environ.setdefault("MPLCONFIGDIR", os.environ.get("MPLCONFIGDIR", "/tmp/mplconfig"))
-import time
-import json
-import argparse
-import matplotlib.pyplot as plt
-import numpy as np
-import sys
-from pathlib import Path
-# ==========================================
-# RELATIVE PATH RESOLUTION
-# ==========================================
-PROJECT_ROOT = Path(__file__).resolve().parent.parent
-sys.path.append(str(PROJECT_ROOT))
-# Dynamically resolve where the databases are kept
-if (PROJECT_ROOT / "data" / "database").exists() and list((PROJECT_ROOT / "data" / "database").rglob("*.sqlite")):
-    DB_ROOT = PROJECT_ROOT / "data" / "database"
-else:
-    DB_ROOT = PROJECT_ROOT / "final_databases"
-from src.execution_reward import (
-    execution_reward_batch_sequential,
-    execution_reward_batch_parallel,
-    execution_reward_batch_parallel_by_db,
-    execution_reward_timed,
-    set_use_cache,
-    set_use_schema_validation,
-    clear_result_cache
-)
-def generate_mock_rollouts(num_rollouts: int = 100, heavy_n: int = 500_000):
-    """Generates heavy queries across multiple databases to properly test true concurrency."""
-    print(f"\nGenerating {num_rollouts} heavy rollouts to simulate RLHF query workload...", flush=True)
-    # Smart search for real databases
-    real_dbs = [str(p) for p in DB_ROOT.rglob("*.sqlite")]
-    if real_dbs:
-        print(f"Found {len(real_dbs)} real SQLite databases in {DB_ROOT}. Distributing workload...", flush=True)
-    else:
-        print(f"❌ CRITICAL ERROR: No real databases found in {DB_ROOT}. Cannot run benchmark.", flush=True)
-        sys.exit(1)
-    rollouts = []
-    for i in range(num_rollouts):
-        db_path = real_dbs[i % len(real_dbs)]
-        # Heavy deterministic CPU-ish query (may be cut off by the 2s timeout depending on machine).
-        heavy_sql = f"""
-        WITH RECURSIVE cnt(x) AS (
-            SELECT 1
-            UNION ALL
-            SELECT x+1 FROM cnt WHERE x < {heavy_n + (i % 10_000)}
-        )
-        SELECT sum(x) FROM cnt;
-        """
-        clean_sql = heavy_sql.replace("\n", " ").strip()
-        rollouts.append((clean_sql, db_path, clean_sql))
-        if num_rollouts >= 500 and (i + 1) % 250 == 0:
-            print(f"  generated {i + 1}/{num_rollouts}...", flush=True)
-    return rollouts
-def profile_bottlenecks(rollouts, sample_size: int = 20, print_every: int = 5):
-    """Profiles CPU usage to identify time spent in parsing, planning, and execution."""
-    print("\n" + "="*65)
-    print(" 🔍 CPU PROFILING: IDENTIFYING BOTTLENECKS (100 Rollouts)")
-    print("="*65)
-    clear_result_cache()
-    set_use_cache(False) # Disable cache to force real work
-    set_use_schema_validation(False)  # CTE-heavy benchmark queries may fail schema validation
-    total_parse = 0.0
-    total_plan = 0.0
-    total_exec = 0.0
-    # Profile a small subset by default so the script prints quickly.
-    sample_size = min(int(sample_size), len(rollouts))
-    sample_rollouts = rollouts[:sample_size]
-    for i, (pred, db, gold) in enumerate(sample_rollouts, 1):
-        _, timings = execution_reward_timed(pred, db, gold, measure_plan=True)
-        total_parse += timings['parse_s']
-        total_plan += timings['plan_s']
-        total_exec += timings['exec_s']
-        if print_every and (i % int(print_every) == 0 or i == sample_size):
-            print(f"  profiled {i}/{sample_size}...", flush=True)
-    total_time = total_parse + total_plan + total_exec
-    if total_time == 0: total_time = 0.0001 # Prevent div by zero
-    print(f"{'Phase':<15} | {'Avg Time (ms)':<15} | {'% of Total CPU':<15}")
-    print("-" * 65)
-    print(f"{'Regex Parsing':<15} | {(total_parse/sample_size)*1000:<15.2f} | {(total_parse/total_time)*100:<14.1f}%")
-    print(f"{'Query Planning':<15} | {(total_plan/sample_size)*1000:<15.2f} | {(total_plan/total_time)*100:<14.1f}%")
-    print(f"{'DB Execution':<15} | {(total_exec/sample_size)*1000:<15.2f} | {(total_exec/total_time)*100:<14.1f}%")
-    print("="*65 + "\n")
-def run_benchmark_for_setting(rollouts, use_cache: bool, max_workers: int):
-    set_use_cache(use_cache)
-    set_use_schema_validation(False)  # benchmark focuses on execution speed
-    # Sequential
-    clear_result_cache()
-    start_time = time.perf_counter()
-    execution_reward_batch_sequential(rollouts)
-    sequential_s = time.perf_counter() - start_time
-    # Parallel
-    clear_result_cache()
-    start_time = time.perf_counter()
-    # 1 thread per DB (recommended)
-    execution_reward_batch_parallel_by_db(rollouts, max_workers=max_workers)
-    parallel_s = time.perf_counter() - start_time
-    speedup = sequential_s / parallel_s if parallel_s > 0 else 0
-    return {
-        "sequential_s": sequential_s,
-        "parallel_s": parallel_s,
-        "speedup": speedup
-    }
-def print_comparison_table(results):
-    print("="*65)
-    print(f"{'Setting':<16} | {'Sequential (s)':<14} | {'Parallel (s)':<14} | {'Speedup':<10}")
-    print("-" * 65)
-    for setting, key in [("With Cache", "with_cache"), ("Without Cache", "without_cache")]:
-        seq = results[key]['sequential_s']
-        par = results[key]['parallel_s']
-        spd = results[key]['speedup']
-        print(f"{setting:<16} | {seq:<14.4f} | {par:<14.4f} | {spd:<9.2f}x")
-    print("="*65 + "\n")
-def plot_results(results, output_path: str):
-    labels = ['With Cache', 'Without Cache']
-    seq_times = [results['with_cache']['sequential_s'], results['without_cache']['sequential_s']]
-    par_times = [results['with_cache']['parallel_s'], results['without_cache']['parallel_s']]
-    x = np.arange(len(labels))
-    width = 0.35
-    fig, ax = plt.subplots(figsize=(8, 6))
-    ax.bar(x - width/2, seq_times, width, label='Sequential', color='#4C72B0')
-    ax.bar(x + width/2, par_times, width, label='Parallel', color='#DD8452')
-    ax.set_ylabel('Execution Time (seconds)')
-    ax.set_title('Text2SQL Reward Execution: Sequential vs Parallel')
-    ax.set_xticks(x)
-    ax.set_xticklabels(labels)
-    ax.legend()
-    for container in ax.containers:
-        ax.bar_label(container, fmt='%.2f', padding=3)
-    fig.tight_layout()
-    plt.savefig(output_path, dpi=300)
-    plt.close()
-def main():
-    parser = argparse.ArgumentParser(description="Benchmark SQL Execution Reward")
-    parser.add_argument("--n", type=int, default=1000, help="Number of rollouts to benchmark")
-    parser.add_argument("--max-workers", type=int, default=20, help="Max workers for parallel execution")
-    parser.add_argument("--heavy-n", type=int, default=200_000, help="Recursive CTE upper bound (controls heaviness)")
-    parser.add_argument("--skip-profile", action="store_true", help="Skip the CPU profiling section for faster startup")
-    parser.add_argument("--profile-n", type=int, default=20, help="Number of rollouts to use for CPU profiling")
-    args = parser.parse_args()
-    os.makedirs(str(PROJECT_ROOT / "results"), exist_ok=True)
-    rollouts = generate_mock_rollouts(args.n, heavy_n=args.heavy_n)
-    if not args.skip_profile:
-        profile_bottlenecks(rollouts, sample_size=args.profile_n)
-    print("Starting Main Scalability Benchmarks...")
-    print("Running Experiment A: Cache ENABLED...")
-    results_with_cache = run_benchmark_for_setting(rollouts, use_cache=True, max_workers=args.max_workers)
-    print("Running Experiment B: Cache DISABLED...")
-    results_without_cache = run_benchmark_for_setting(rollouts, use_cache=False, max_workers=args.max_workers)
-    final_results = {
-        "with_cache": results_with_cache,
-        "without_cache": results_without_cache
-    }
-    json_path = str(PROJECT_ROOT / "results" / "task1_results.json")
-    with open(json_path, 'w') as f:
-        json.dump(final_results, f, indent=4)
-    print_comparison_table(final_results)
-    plot_results(final_results, str(PROJECT_ROOT / "results" / "task1_plot.png"))
-if __name__ == "__main__":
-    main()

scripts/benchmark_quantization.py DELETED Viewed

@@ -1,108 +0,0 @@
-from __future__ import annotations
-import argparse
-import json
-import os
-import time
-from pathlib import Path
-from typing import Dict, List, Tuple
-import numpy as np
-import torch
-from src.execution_reward import execution_reward
-from src.prompting import encode_prompt
-from src.quantization_utils import load_fp32_model, load_quant_artifact
-def _load_dev_items(root: Path, n: int, seed: int = 42) -> List[dict]:
-    data = json.loads((root / "data" / "dev.json").read_text())
-    if n >= len(data):
-        return data
-    rng = np.random.default_rng(seed)
-    idxs = rng.choice(len(data), size=n, replace=False)
-    return [data[int(i)] for i in idxs]
-def _bench_variant(name: str, tok, model, items: List[dict], device: str) -> Dict[str, float]:
-    latencies: List[float] = []
-    ex = 0
-    # Warmup (1 item)
-    if items:
-        it = items[0]
-        _ = encode_prompt(tok, it["question"], it["db_id"], device=device, max_input_tokens=512).unsqueeze(0)
-    for it in items:
-        db_id = it["db_id"]
-        q = it["question"]
-        gold = it["query"]
-        db_path = str(Path("data") / "database" / db_id / f"{db_id}.sqlite")
-        input_ids = encode_prompt(tok, q, db_id, device=device, max_input_tokens=512).unsqueeze(0)
-        t0 = time.perf_counter()
-        out = model.generate(input_ids=input_ids, max_new_tokens=120, num_beams=8, repetition_penalty=1.2)
-        dt = time.perf_counter() - t0
-        latencies.append(dt)
-        pred = tok.decode(out[0], skip_special_tokens=True).strip()
-        r = execution_reward(pred, db_path, gold)
-        if float(r) >= 1.0:
-            ex += 1
-    p50 = float(np.percentile(latencies, 50)) if latencies else 0.0
-    p90 = float(np.percentile(latencies, 90)) if latencies else 0.0
-    mean = float(np.mean(latencies)) if latencies else 0.0
-    return {
-        "n": float(len(items)),
-        "ex": float(ex / max(len(items), 1)),
-        "lat_mean_s": mean,
-        "lat_p50_s": p50,
-        "lat_p90_s": p90,
-    }
-def main() -> None:
-    p = argparse.ArgumentParser(description="Benchmark fp32 vs quantized artifacts (CPU-focused).")
-    p.add_argument("--base_model", default=os.environ.get("BASE_MODEL", "Salesforce/codet5-base"))
-    p.add_argument("--adapter", default="", help="Optional adapter for fp32 baseline.")
-    p.add_argument("--artifact_int8", default="", help="Artifact dir exported by scripts/quantize_export.py")
-    p.add_argument("--artifact_int8_decoder", default="", help="Artifact dir for decoder-only int8")
-    p.add_argument("--num_samples", type=int, default=100)
-    p.add_argument("--seed", type=int, default=42)
-    p.add_argument("--out", default="results/task5_quant_bench.json")
-    p.add_argument("--local_only", action="store_true")
-    args = p.parse_args()
-    device = "cpu"
-    root = Path(".")
-    items = _load_dev_items(root, args.num_samples, args.seed)
-    report: Dict[str, Dict[str, float]] = {}
-    tok, fp32 = load_fp32_model(
-        args.base_model,
-        adapter_path=args.adapter.strip() or None,
-        device=device,
-        local_only=args.local_only,
-    )
-    report["fp32"] = _bench_variant("fp32", tok, fp32, items, device)
-    if args.artifact_int8:
-        tok8, m8, _meta = load_quant_artifact(args.artifact_int8, device=device, local_only=True)
-        report["int8_dynamic"] = _bench_variant("int8_dynamic", tok8, m8, items, device)
-    if args.artifact_int8_decoder:
-        tokd, md, _meta = load_quant_artifact(args.artifact_int8_decoder, device=device, local_only=True)
-        report["int8_decoder_dynamic"] = _bench_variant("int8_decoder_dynamic", tokd, md, items, device)
-    out_path = Path(args.out)
-    out_path.parent.mkdir(parents=True, exist_ok=True)
-    out_path.write_text(json.dumps(report, indent=2))
-    print(json.dumps(report, indent=2))
-if __name__ == "__main__":
-    torch.set_grad_enabled(False)
-    main()

scripts/benchmark_rollout_generation.py DELETED Viewed

@@ -1,66 +0,0 @@
-from __future__ import annotations
-import argparse
-import json
-import os
-import time
-from pathlib import Path
-from typing import List
-import numpy as np
-import torch
-from src.prompting import encode_prompt
-from src.quantization_utils import load_fp32_model, load_quant_artifact
-def _load_items(root: Path, n: int, seed: int = 42) -> List[dict]:
-    data = json.loads((root / "data" / "dev.json").read_text())
-    if n >= len(data):
-        return data
-    rng = np.random.default_rng(seed)
-    idxs = rng.choice(len(data), size=n, replace=False)
-    return [data[int(i)] for i in idxs]
-def _bench_generate(tok, model, items: List[dict], device: str) -> float:
-    t0 = time.perf_counter()
-    for it in items:
-        input_ids = encode_prompt(tok, it["question"], it["db_id"], device=device, max_input_tokens=512).unsqueeze(0)
-        _ = model.generate(input_ids=input_ids, max_new_tokens=64, num_beams=4)
-    return time.perf_counter() - t0
-def main() -> None:
-    p = argparse.ArgumentParser(description="Benchmark rollout generation latency for RL loops.")
-    p.add_argument("--base_model", default=os.environ.get("BASE_MODEL", "Salesforce/codet5-base"))
-    p.add_argument("--adapter", default="")
-    p.add_argument("--artifact", default="", help="Quantized artifact dir (optional).")
-    p.add_argument("--num_rollouts", type=int, default=128)
-    p.add_argument("--seed", type=int, default=42)
-    p.add_argument("--local_only", action="store_true")
-    args = p.parse_args()
-    device = "cpu"
-    root = Path(".")
-    items = _load_items(root, args.num_rollouts, args.seed)
-    tok, fp32 = load_fp32_model(
-        args.base_model,
-        adapter_path=args.adapter.strip() or None,
-        device=device,
-        local_only=args.local_only,
-    )
-    t_fp32 = _bench_generate(tok, fp32, items, device)
-    print(f"fp32: {t_fp32:.2f}s for {len(items)} rollouts ({len(items)/max(t_fp32,1e-9):.2f} rollouts/s)")
-    if args.artifact:
-        tokq, mq, meta = load_quant_artifact(args.artifact, device=device, local_only=True)
-        t_q = _bench_generate(tokq, mq, items, device)
-        mode = meta.get("mode", "quant")
-        print(f"{mode}: {t_q:.2f}s for {len(items)} rollouts ({len(items)/max(t_q,1e-9):.2f} rollouts/s)")
-if __name__ == "__main__":
-    torch.set_grad_enabled(False)
-    main()

scripts/error_dashboard.py DELETED Viewed

@@ -1,99 +0,0 @@
-import json
-from collections import Counter
-# ==============================
-# LOAD LOGS
-# ==============================
-with open("results/error_logs.json") as f:
-    logs = json.load(f)
-total_errors = len(logs)
-# ==============================
-# ERROR DISTRIBUTION
-# ==============================
-error_counts = Counter([e["error_type"] for e in logs])
-print("\n" + "="*50)
-print("📊 TEXT-to-SQL ERROR DASHBOARD")
-print("="*50)
-print(f"\n🔢 Total Errors Logged: {total_errors}")
-print("\n📊 ERROR DISTRIBUTION:")
-print("-"*30)
-for k, v in error_counts.items():
-    percent = (v / total_errors) * 100
-    print(f"{k:<20} : {v:>4} ({percent:.1f}%)")
-# ==============================
-# TOP ERROR
-# ==============================
-top_error = error_counts.most_common(1)[0]
-print("\n🔥 MOST COMMON ERROR:")
-print("-"*30)
-print(f"{top_error[0]} ({top_error[1]} times)")
-# ==============================
-# SQL OPERATION ANALYSIS
-# ==============================
-join_count = 0
-where_count = 0
-group_count = 0
-order_count = 0
-for e in logs:
-    sql = e["sql"].lower()
-    if "join" in sql:
-        join_count += 1
-    if "where" in sql:
-        where_count += 1
-    if "group by" in sql:
-        group_count += 1
-    if "order by" in sql:
-        order_count += 1
-print("\n🧠 SQL OPERATION ANALYSIS:")
-print("-"*30)
-print(f"JOIN used in     : {join_count} queries")
-print(f"WHERE used in    : {where_count} queries")
-print(f"GROUP BY used in : {group_count} queries")
-print(f"ORDER BY used in : {order_count} queries")
-# ==============================
-# SAMPLE ERRORS
-# ==============================
-print("\n🧪 SAMPLE ERROR CASES:")
-print("-"*50)
-for i, e in enumerate(logs[:3], 1):
-    print(f"\nCase {i}:")
-    print(f"Q   : {e['question']}")
-    print(f"SQL : {e['sql']}")
-    print(f"Type: {e['error_type']}")
-# ==============================
-# FINAL INSIGHT
-# ==============================
-print("\n📌 FINAL INSIGHT:")
-print("-"*30)
-if top_error[0] == "wrong_column":
-    print("⚠️ Model struggles with column selection (schema understanding issue).")
-elif top_error[0] == "wrong_table":
-    print("⚠️ Model struggles with correct table mapping.")
-elif top_error[0] == "syntax_error":
-    print("⚠️ Model generates invalid SQL syntax.")
-else:
-    print("⚠️ Mixed errors — needs general improvement.")
-print("\n" + "="*50)
-print("✅ DASHBOARD COMPLETE")
-print("="*50)

scripts/evaluate.py DELETED Viewed

@@ -1,170 +0,0 @@
-from __future__ import annotations
-import os
-import sqlite3
-from contextlib import closing
-from typing import Dict, List
-import torch
-from datasets import load_dataset
-from peft import PeftModel
-from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
-from trl import AutoModelForSeq2SeqLMWithValueHead
-import sys
-PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-sys.path.append(PROJECT_ROOT)
-from src.execution_reward import execution_reward  # noqa: E402
-BASE_MODEL = os.environ.get("BASE_MODEL", "t5-small")
-DB_ROOT = os.path.join(PROJECT_ROOT, "data", "database")
-# Prefer RL best model if present; otherwise fall back.
-RL_DIR = os.path.join(PROJECT_ROOT, "outputs", "rlhf_text2sql", "best_model")
-if not os.path.isdir(RL_DIR):
-    RL_DIR = os.path.join(PROJECT_ROOT, "outputs", "rlhf_text2sql")
-SPLIT = "train[:100]"  # quick sanity check
-MAX_NEW_TOKENS = 128
-PREFIX = "translate English to SQL:"
-MAX_SCHEMA_CHARS = 1500
-MAX_INPUT_TOKENS = 512
-os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
-device = "mps" if torch.backends.mps.is_available() else "cpu"
-print("Using device:", device)
-def get_db_path(db_id: str) -> str:
-    return os.path.join(DB_ROOT, db_id, f"{db_id}.sqlite")
-_SCHEMA_CACHE: Dict[str, str] = {}
-def get_db_schema_text(db_path: str) -> str:
-    if db_path in _SCHEMA_CACHE:
-        return _SCHEMA_CACHE[db_path]
-    schema_text = ""
-    try:
-        with closing(sqlite3.connect(db_path)) as conn:
-            cur = conn.cursor()
-            tables = cur.execute(
-                "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';"
-            ).fetchall()
-            for (tname,) in tables:
-                cols = cur.execute(f'PRAGMA table_info(\"{tname}\")').fetchall()
-                col_names = [c[1] for c in cols if c and isinstance(c[1], str)]
-                schema_text += f"{tname}({', '.join(col_names)}) "
-    except Exception:
-        schema_text = ""
-    if len(schema_text) > MAX_SCHEMA_CHARS:
-        schema_text = schema_text[:MAX_SCHEMA_CHARS]
-    _SCHEMA_CACHE[db_path] = schema_text
-    return schema_text
-def encode_prompt(tokenizer, question: str, schema: str) -> torch.Tensor:
-    schema = (schema or "")[:MAX_SCHEMA_CHARS]
-    prefix_schema = f"{PREFIX}\n\nSchema:\n"
-    mid = "\n\nQuestion:\n"
-    suffix = f"{question}\n\nSQL:"
-    prefix_ids = tokenizer.encode(prefix_schema, add_special_tokens=False)
-    schema_ids = tokenizer.encode(schema, add_special_tokens=False)
-    mid_ids = tokenizer.encode(mid, add_special_tokens=False)
-    suffix_ids = tokenizer.encode(suffix, add_special_tokens=False)
-    eos_id = tokenizer.eos_token_id
-    max_without_eos = MAX_INPUT_TOKENS - (1 if eos_id is not None else 0)
-    fixed_len = len(prefix_ids) + len(mid_ids) + len(suffix_ids)
-    if fixed_len > max_without_eos:
-        keep = max(0, max_without_eos - (len(prefix_ids) + len(mid_ids)))
-        suffix_ids = suffix_ids[:keep]
-        fixed_len = len(prefix_ids) + len(mid_ids) + len(suffix_ids)
-    remaining_for_schema = max_without_eos - fixed_len
-    if remaining_for_schema < 0:
-        remaining_for_schema = 0
-    schema_ids = schema_ids[:remaining_for_schema]
-    ids = (prefix_ids + schema_ids + mid_ids + suffix_ids)[:max_without_eos]
-    if eos_id is not None:
-        ids = ids + [eos_id]
-    return torch.tensor(ids, dtype=torch.long).to(device)
-def load_model_and_tokenizer():
-    # Try loading the PPO-saved value-head model directly.
-    try:
-        tok = AutoTokenizer.from_pretrained(RL_DIR)
-        mdl = AutoModelForSeq2SeqLMWithValueHead.from_pretrained(RL_DIR).to(device)
-        return tok, mdl
-    except Exception:
-        pass
-    # Fallback: treat RL_DIR as a LoRA adapter directory.
-    tok = AutoTokenizer.from_pretrained(BASE_MODEL)
-    if tok.pad_token_id is None:
-        tok.pad_token = tok.eos_token
-    base = AutoModelForSeq2SeqLM.from_pretrained(BASE_MODEL).to(device)
-    try:
-        base = PeftModel.from_pretrained(base, RL_DIR)
-    except Exception:
-        # Final fallback: use SFT adapter (if RL adapter not found)
-        sft_dir = os.path.join(PROJECT_ROOT, "checkpoints", "sft_adapter")
-        base = PeftModel.from_pretrained(base, sft_dir)
-    return tok, base
-def main() -> None:
-    tokenizer, model = load_model_and_tokenizer()
-    model.eval()
-    ds = load_dataset("spider", split=SPLIT)
-    correct = 0
-    valid = 0
-    for i, ex in enumerate(ds, start=1):
-        question = ex["question"]
-        gold_sql = ex["query"]
-        db_id = ex["db_id"]
-        db_path = get_db_path(db_id)
-        schema = get_db_schema_text(db_path)
-        inp = encode_prompt(tokenizer, question, schema)
-        with torch.no_grad():
-            out = model.generate(
-                input_ids=inp.unsqueeze(0),
-                max_new_tokens=MAX_NEW_TOKENS,
-                do_sample=False,
-                num_beams=1,
-                pad_token_id=tokenizer.pad_token_id,
-                eos_token_id=tokenizer.eos_token_id,
-            )
-        pred_sql = tokenizer.decode(out[0], skip_special_tokens=True)
-        r = execution_reward(pred_sql, db_path, gold_sql)
-        if r > -1.0:
-            valid += 1
-        if r >= 1.0:
-            correct += 1
-        if i % 25 == 0:
-            print(f"Evaluated {i}/{len(ds)}")
-    n = len(ds)
-    print("\nRESULTS")
-    print(f"examples: {n}")
-    print(f"execution_accuracy: {correct/n:.3f}")
-    print(f"valid_sql_rate: {valid/n:.3f}")
-if __name__ == "__main__":
-    main()

scripts/plot_task2.py DELETED Viewed

@@ -1,58 +0,0 @@
-import matplotlib.pyplot as plt
-import seaborn as sns
-# ==========================================
-# 1. EXTRACTED DATA FROM TERMINAL
-# ==========================================
-# Error Distribution Data
-error_types = ['wrong_column', 'wrong_table', 'ambiguous_column', 'other']
-error_counts = [61, 11, 4, 1]
-# SQL Operation Analysis Data
-sql_ops = ['WHERE', 'JOIN', 'ORDER BY', 'GROUP BY']
-op_counts = [55, 36, 20, 14]
-# ==========================================
-# 2. SET UP THE DASHBOARD LAYOUT
-# ==========================================
-# Use a clean, modern aesthetic
-sns.set_theme(style="whitegrid")
-fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
-# ==========================================
-# 3. PLOT 1: ERROR DISTRIBUTION (Horizontal Bar)
-# ==========================================
-sns.barplot(x=error_counts, y=error_types, ax=ax1, palette="flare")
-ax1.set_title('Primary Cause of Failure (Total: 77 Errors)', fontsize=14, pad=15, fontweight='bold')
-ax1.set_xlabel('Number of Queries')
-ax1.set_ylabel('')
-# Add actual numbers next to the bars
-for i, v in enumerate(error_counts):
-    ax1.text(v + 1.5, i, f"{v}", color='#333333', va='center', fontweight='bold')
-# ==========================================
-# 4. PLOT 2: SQL OPERATIONS (Vertical Bar)
-# ==========================================
-sns.barplot(x=sql_ops, y=op_counts, ax=ax2, palette="crest")
-ax2.set_title('Clauses Present in Failed Queries', fontsize=14, pad=15, fontweight='bold')
-ax2.set_ylabel('Frequency')
-ax2.set_xlabel('')
-# Add actual numbers on top of the bars
-for i, v in enumerate(op_counts):
-    ax2.text(i, v + 1, str(v), color='#333333', ha='center', fontweight='bold')
-# ==========================================
-# 5. RENDER AND SAVE
-# ==========================================
-plt.suptitle('Text-to-SQL Error Diagnostic Dashboard', fontsize=18, fontweight='heavy', y=1.05)
-sns.despine(left=True, bottom=True) # Removes clunky borders
-plt.tight_layout()
-# Save the plot as a high-res image for your report!
-plt.savefig('error_diagnostic_plot.png', dpi=300, bbox_inches='tight')
-print("✅ Plot successfully saved as 'error_diagnostic_plot.png'")
-# Display the plot
-plt.show()

scripts/plot_task3.py DELETED Viewed

@@ -1,15 +0,0 @@
-import matplotlib.pyplot as plt
-labels = ["Without", "With"]
-constraint = [0, 88]
-plt.figure()
-plt.bar(labels, constraint)
-plt.title("Constraint Satisfaction (Task 3)")
-plt.ylabel("Percentage")
-plt.savefig("task3_constraint.png")
-plt.show()

scripts/plot_task3_plotly.py DELETED Viewed

@@ -1,103 +0,0 @@
-import plotly.graph_objects as go
-from plotly.subplots import make_subplots
-# ==========================================
-# 1. YOUR DATA
-# ==========================================
-models = ['FP32 (Base)', 'INT8 Dynamic', 'INT8 Decoder-Only']
-# Accuracy (multiplied by 100 for percentage)
-accuracy = [36.0, 36.0, 38.0]
-# Latency metrics
-lat_mean = [3.11, 1.65, 1.66]
-lat_p50  = [2.94, 1.54, 1.56]
-lat_p90  = [4.64, 2.44, 2.48]
-# ==========================================
-# 2. SET UP THE SIDE-BY-SIDE LAYOUT
-# ==========================================
-fig = make_subplots(
-    rows=1, cols=2,
-    subplot_titles=(
-        "<b>Model Accuracy (Execution)</b>",
-        "<b>Inference Latency Profile</b>"
-    ),
-    horizontal_spacing=0.1
-)
-# ==========================================
-# 3. LEFT CHART: ACCURACY
-# ==========================================
-fig.add_trace(go.Bar(
-    x=models,
-    y=accuracy,
-    name="Execution Accuracy",
-    marker_color=['#94a3b8', '#38bdf8', '#10b981'], # Gray, Blue, Green
-    text=[f"{val:.1f}%" for val in accuracy],
-    textposition='auto',
-    textfont=dict(size=14, color='white', family="Arial Black"),
-    showlegend=False
-), row=1, col=1)
-# ==========================================
-# 4. RIGHT CHART: LATENCY PROFILE
-# ==========================================
-# P50 Latency
-fig.add_trace(go.Bar(
-    x=models, y=lat_p50,
-    name="Median (P50)",
-    marker_color="#ece80a" # Light Blue
-), row=1, col=2)
-# Mean Latency
-fig.add_trace(go.Bar(
-    x=models, y=lat_mean,
-    name="Mean Latency",
-    marker_color="#3b4da9" # Standard Blue
-), row=1, col=2)
-# P90 Latency
-fig.add_trace(go.Bar(
-    x=models, y=lat_p90,
-    name="90th Percentile (P90)",
-    marker_color="#d974e2" # Dark Blue
-), row=1, col=2)
-# ==========================================
-# 5. APPLY ULTRA-MODERN STYLING
-# ==========================================
-fig.update_layout(
-    title=dict(
-        text="<b>Task 5: FP32 vs. INT8 Quantization Performance</b>",
-        font=dict(size=22, color='#1e293b'),
-        x=0.5
-    ),
-    plot_bgcolor='white',
-    paper_bgcolor='white',
-    barmode='group',
-    legend=dict(
-        orientation="h",
-        yanchor="bottom", y=1.05,
-        xanchor="center", x=0.8,
-        bgcolor='rgba(255,255,255,0.8)'
-    ),
-    font=dict(family="-apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif"),
-    margin=dict(t=120, b=60, l=60, r=40)
-)
-# Style Left Axes
-fig.update_yaxes(title_text="<b>Accuracy (%)</b>", range=[0, 45], gridcolor='#f1f5f9', row=1, col=1)
-fig.update_xaxes(tickfont=dict(weight='bold'), row=1, col=1)
-# Style Right Axes
-fig.update_yaxes(title_text="<b>Seconds per Query</b>", gridcolor='#f1f5f9', row=1, col=2)
-fig.update_xaxes(tickfont=dict(weight='bold'), row=1, col=2)
-# ==========================================
-# 6. RENDER AND SAVE
-# ==========================================
-html_file = "task5_quantization_dashboard.html"
-fig.write_html(html_file)
-print(f"✅ Interactive Plotly Dashboard saved to: {html_file}")
-fig.show()

scripts/quantize_export.py DELETED Viewed

@@ -1,86 +0,0 @@
-from __future__ import annotations
-import argparse
-import os
-from pathlib import Path
-import torch
-from src.quantization_utils import (
-    load_bnb_quantized_model,
-    load_fp32_model,
-    quantize_dynamic_int8,
-    quantize_dynamic_int8_decoder_only,
-    save_quant_artifact,
-)
-def main() -> None:
-    p = argparse.ArgumentParser(description="Export quantized Seq2Seq model artifacts for CPU inference.")
-    p.add_argument("--base_model", default=os.environ.get("BASE_MODEL", "Salesforce/codet5-base"))
-    p.add_argument("--adapter", default="", help="Optional LoRA adapter directory.")
-    p.add_argument("--out_dir", required=True, help="Output directory for artifact.")
-    p.add_argument(
-        "--mode",
-        required=True,
-        choices=["fp32", "int8_dynamic", "int8_decoder_dynamic", "int8_bnb", "int4_bnb"],
-    )
-    p.add_argument("--device", default="cpu", help="cpu|cuda (bnb requires cuda)")
-    p.add_argument("--local_only", action="store_true", help="Do not hit network; use HF cache only.")
-    args = p.parse_args()
-    adapter = args.adapter.strip() or None
-    out_dir = Path(args.out_dir)
-    if args.mode == "fp32":
-        tok, model = load_fp32_model(args.base_model, adapter_path=adapter, device=args.device, local_only=args.local_only)
-        save_quant_artifact(out_dir, mode="fp32", base_model=args.base_model, adapter_path=adapter, tokenizer=tok, model=model)
-        return
-    if args.mode == "int8_dynamic":
-        tok, model = load_fp32_model(args.base_model, adapter_path=adapter, device="cpu", local_only=args.local_only)
-        model = quantize_dynamic_int8(model)
-        save_quant_artifact(out_dir, mode="int8_dynamic", base_model=args.base_model, adapter_path=adapter, tokenizer=tok, model=model)
-        return
-    if args.mode == "int8_decoder_dynamic":
-        tok, model = load_fp32_model(args.base_model, adapter_path=adapter, device="cpu", local_only=args.local_only)
-        model = quantize_dynamic_int8_decoder_only(model)
-        save_quant_artifact(
-            out_dir,
-            mode="int8_decoder_dynamic",
-            base_model=args.base_model,
-            adapter_path=adapter,
-            tokenizer=tok,
-            model=model,
-        )
-        return
-    if args.mode == "int8_bnb":
-        tok, model = load_bnb_quantized_model(
-            args.base_model,
-            adapter_path=adapter,
-            device=args.device,
-            local_only=args.local_only,
-            load_in_8bit=True,
-        )
-        # Note: saving bnb quantized weights in a portable way is non-trivial; we still save state_dict for reference.
-        save_quant_artifact(out_dir, mode="int8_bnb", base_model=args.base_model, adapter_path=adapter, tokenizer=tok, model=model)
-        return
-    if args.mode == "int4_bnb":
-        tok, model = load_bnb_quantized_model(
-            args.base_model,
-            adapter_path=adapter,
-            device=args.device,
-            local_only=args.local_only,
-            load_in_4bit=True,
-        )
-        save_quant_artifact(out_dir, mode="int4_bnb", base_model=args.base_model, adapter_path=adapter, tokenizer=tok, model=model)
-        return
-if __name__ == "__main__":
-    torch.set_grad_enabled(False)
-    main()

scripts/quantized_infer_harness.py DELETED Viewed

@@ -1,46 +0,0 @@
-from __future__ import annotations
-import argparse
-import json
-import time
-from pathlib import Path
-from src.quantized_text2sql_engine import QuantizedText2SQLEngine
-def main() -> None:
-    p = argparse.ArgumentParser(description="Production-style inference harness for quantized artifacts.")
-    p.add_argument("--artifact", required=True, help="Quant artifact dir from scripts/quantize_export.py")
-    p.add_argument("--num_samples", type=int, default=128)
-    p.add_argument("--out", default="results/task5_quant_infer.json")
-    args = p.parse_args()
-    root = Path(".")
-    dev = json.loads((root / "data" / "dev.json").read_text())
-    dev = dev[: args.num_samples]
-    engine = QuantizedText2SQLEngine(args.artifact, device="cpu")
-    pairs = [(x["question"], x["db_id"]) for x in dev]
-    t0 = time.perf_counter()
-    results = engine.ask_batch_execute(pairs)
-    dt = time.perf_counter() - t0
-    out = {
-        "n": len(results),
-        "seconds": dt,
-        "qps": len(results) / max(dt, 1e-9),
-        "artifact": args.artifact,
-        "meta": engine.meta,
-        "results": results[:10],  # sample
-    }
-    out_path = Path(args.out)
-    out_path.parent.mkdir(parents=True, exist_ok=True)
-    out_path.write_text(json.dumps(out, indent=2))
-    print(json.dumps(out, indent=2))
-if __name__ == "__main__":
-    main()

src/constrained_decoding.py DELETED Viewed

@@ -1,1058 +0,0 @@
-# from __future__ import annotations
-# import re
-# import threading
-# from dataclasses import dataclass
-# from typing import Dict, Iterable, List, Optional, Sequence, Set
-# import torch
-# from transformers.generation.logits_process import LogitsProcessor
-# from schema_constraints import ConstraintGraph, build_constraint_graph
-# def _infer_expected_identifier(prefix_text: str) -> Optional[str]:
-#     s = re.sub(r"\s+", " ", prefix_text.lower())
-#     last_from = s.rfind(" from ")
-#     last_join = s.rfind(" join ")
-#     last_select = s.rfind(" select ")
-#     last_where = s.rfind(" where ")
-#     last_on = s.rfind(" on ")
-#     last_group = s.rfind(" group by ")
-#     last_order = s.rfind(" order by ")
-#     last_having = s.rfind(" having ")
-#     last_table_kw = max(last_from, last_join)
-#     last_col_kw = max(last_select, last_where, last_on, last_group, last_order, last_having)
-#     if last_table_kw < 0 and last_col_kw < 0:
-#         return None
-#     if last_table_kw > last_col_kw:
-#         return "table"
-#     if last_col_kw > last_table_kw:
-#         return "column"
-#     return None
-# class _TrieNode:
-#     __slots__ = ("children", "terminal")
-#     def __init__(self) -> None:
-#         self.children: Dict[int, _TrieNode] = {}
-#         self.terminal: bool = False
-#     def insert(self, token_ids: Sequence[int]) -> None:
-#         node: _TrieNode = self
-#         for tid in token_ids:
-#             tid_i = int(tid)
-#             nxt = node.children.get(tid_i)
-#             if nxt is None:
-#                 nxt = _TrieNode()
-#                 node.children[tid_i] = nxt
-#             node = nxt
-#         node.terminal = True
-#     def walk(self, prefix: Sequence[int]) -> Optional["_TrieNode"]:
-#         node: _TrieNode = self
-#         for tid in prefix:
-#             node = node.children.get(int(tid))  # type: ignore[assignment]
-#             if node is None:
-#                 return None
-#         return node
-# def _encode_identifier(tokenizer, name: str) -> List[int]:
-#     # Leading space encourages word-start markers (e.g. "Ġ" in RoBERTa BPE).
-#     return tokenizer.encode(" " + name, add_special_tokens=False)
-# def _build_trie(tokenizer, names: Iterable[str]) -> _TrieNode:
-#     trie = _TrieNode()
-#     for n in names:
-#         if not n:
-#             continue
-#         try:
-#             ids = _encode_identifier(tokenizer, n)
-#         except Exception:
-#             continue
-#         if ids:
-#             trie.insert(ids)
-#     return trie
-# def _allow_always_token_ids(tokenizer) -> torch.Tensor:
-#     # Allow common delimiters so the model can end an identifier.
-#     toks = [",", ")", "(", "\n", ".", ";"]
-#     ids: Set[int] = set()
-#     for t in toks:
-#         try:
-#             for tid in tokenizer.encode(t, add_special_tokens=False):
-#                 ids.add(int(tid))
-#         except Exception:
-#             continue
-#     return torch.tensor(sorted(ids), dtype=torch.long)
-# @dataclass
-# class _PerDbTokenSets:
-#     fp: str
-#     table_trie: _TrieNode
-#     column_trie: _TrieNode
-#     allow_always: torch.Tensor
-# _DB_TOKENSET_LOCK = threading.Lock()
-# _DB_TOKENSETS: Dict[str, _PerDbTokenSets] = {}
-# def _per_db_tokensets(tokenizer, graph: ConstraintGraph) -> _PerDbTokenSets:
-#     with _DB_TOKENSET_LOCK:
-#         cached = _DB_TOKENSETS.get(graph.db_path)
-#         if cached is not None and cached.fp == graph.fingerprint:
-#             return cached
-#     out = _PerDbTokenSets(
-#         fp=graph.fingerprint,
-#         table_trie=_build_trie(tokenizer, graph.tables),
-#         column_trie=_build_trie(tokenizer, graph.all_columns),
-#         allow_always=_allow_always_token_ids(tokenizer),
-#     )
-#     with _DB_TOKENSET_LOCK:
-#         _DB_TOKENSETS[graph.db_path] = out
-#     return out
-# class BatchSchemaConstrainedLogitsProcessor(LogitsProcessor):
-#     """
-#     Schema-aware constrained decoding per item in the generation batch.
-#     Uses a tokenizer-based trie so multi-token identifiers can be constrained.
-#     """
-#     def __init__(self, tokenizer, db_paths: Sequence[str], *, max_prefix_tokens: int = 48):
-#         self.tokenizer = tokenizer
-#         self.db_paths = list(db_paths)
-#         self.max_prefix_tokens = int(max_prefix_tokens)
-#         self._graphs = [build_constraint_graph(p) for p in self.db_paths]
-#         self._token_sets = [_per_db_tokensets(tokenizer, g) for g in self._graphs]
-#     def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
-#         if input_ids.dim() != 2 or scores.dim() != 2:
-#             return scores
-#         batch = input_ids.size(0)
-#         if batch != len(self._graphs):
-#             return scores
-#         for i in range(batch):
-#             tail_ids = input_ids[i, -self.max_prefix_tokens :].tolist()
-#             prefix_text = self.tokenizer.decode(tail_ids, skip_special_tokens=True)
-#             expected = _infer_expected_identifier(prefix_text)
-#             if expected is None:
-#                 continue
-#             if expected == "table":
-#                 m = re.search(r"(?:from|join)\s+([A-Za-z_][A-Za-z0-9_]*)$", prefix_text, flags=re.I)
-#                 partial = m.group(1) if m else None
-#                 if partial is None and not re.search(r"(?:from|join)\s*$", prefix_text, flags=re.I):
-#                     continue
-#                 trie = self._token_sets[i].table_trie
-#             else:
-#                 m = re.search(
-#                     r"(?:select|where|on|group by|order by|having)\s+([A-Za-z_][A-Za-z0-9_]*(?:\.[A-Za-z_][A-Za-z0-9_]*)?)$",
-#                     prefix_text,
-#                     flags=re.I,
-#                 )
-#                 partial = m.group(1) if m else None
-#                 if partial is None and not re.search(
-#                     r"(?:select|where|on|group by|order by|having)\s*$", prefix_text, flags=re.I
-#                 ):
-#                     continue
-#                 trie = self._token_sets[i].column_trie
-#             if not partial:
-#                 prefix_token_ids: List[int] = []
-#             else:
-#                 try:
-#                     prefix_token_ids = _encode_identifier(self.tokenizer, partial)
-#                 except Exception:
-#                     continue
-#             node = trie.walk(prefix_token_ids)
-#             if node is None or node.terminal:
-#                 continue
-#             allowed_next = sorted(node.children.keys())
-#             if not allowed_next:
-#                 continue
-#             allowed_next_t = torch.tensor(allowed_next, dtype=torch.long, device=scores.device)
-#             allow_always = self._token_sets[i].allow_always.to(scores.device)
-#             keep = torch.cat([allowed_next_t, allow_always]) if allow_always.numel() else allowed_next_t
-#             kept_scores = scores[i, keep].clone()
-#             scores[i, :] = -float("inf")
-#             scores[i, keep] = kept_scores
-#         return scores
-# # Backwards-compatible names used elsewhere in the repo.
-# class SchemaConstraintGraph:
-#     def __init__(self, db_path: str):
-#         self._graph = build_constraint_graph(db_path)
-#         self.tables = sorted(self._graph.tables)
-#         self.columns = sorted(self._graph.all_columns)
-# class SchemaConstrainedLogitsProcessor(LogitsProcessor):
-#     def __init__(self, tokenizer, schema_graph: SchemaConstraintGraph):
-#         self._proc = BatchSchemaConstrainedLogitsProcessor(tokenizer, [schema_graph._graph.db_path])
-#     def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
-#         return self._proc(input_ids, scores)
-# from __future__ import annotations
-# import re
-# import threading
-# from dataclasses import dataclass
-# from typing import Dict, Iterable, List, Optional, Sequence, Set
-# import torch
-# from transformers.generation.logits_process import LogitsProcessor
-# from schema_constraints import ConstraintGraph, build_constraint_graph
-# # =========================================================
-# # 🔍 IDENTIFIER TYPE DETECTION
-# # =========================================================
-# def _infer_expected_identifier(prefix_text: str) -> Optional[str]:
-#     s = re.sub(r"\s+", " ", prefix_text.lower())
-#     last_from = s.rfind(" from ")
-#     last_join = s.rfind(" join ")
-#     last_select = s.rfind(" select ")
-#     last_where = s.rfind(" where ")
-#     last_on = s.rfind(" on ")
-#     last_group = s.rfind(" group by ")
-#     last_order = s.rfind(" order by ")
-#     last_having = s.rfind(" having ")
-#     last_table_kw = max(last_from, last_join)
-#     last_col_kw = max(last_select, last_where, last_on, last_group, last_order, last_having)
-#     if last_table_kw < 0 and last_col_kw < 0:
-#         return None
-#     if last_table_kw > last_col_kw:
-#         return "table"
-#     if last_col_kw > last_table_kw:
-#         return "column"
-#     return None
-# # =========================================================
-# # 🌳 TRIE STRUCTURE
-# # =========================================================
-# class _TrieNode:
-#     __slots__ = ("children", "terminal")
-#     def __init__(self) -> None:
-#         self.children: Dict[int, _TrieNode] = {}
-#         self.terminal: bool = False
-#     def insert(self, token_ids: Sequence[int]) -> None:
-#         node = self
-#         for tid in token_ids:
-#             tid = int(tid)
-#             if tid not in node.children:
-#                 node.children[tid] = _TrieNode()
-#             node = node.children[tid]
-#         node.terminal = True
-#     def walk(self, prefix: Sequence[int]) -> Optional["_TrieNode"]:
-#         node = self
-#         for tid in prefix:
-#             node = node.children.get(int(tid))
-#             if node is None:
-#                 return None
-#         return node
-# # =========================================================
-# # 🔤 TOKEN ENCODING
-# # =========================================================
-# def _encode_identifier(tokenizer, name: str) -> List[int]:
-#     return tokenizer.encode(" " + name, add_special_tokens=False)
-# def _build_trie(tokenizer, names: Iterable[str]) -> _TrieNode:
-#     trie = _TrieNode()
-#     for name in names:
-#         try:
-#             ids = _encode_identifier(tokenizer, name)
-#             if ids:
-#                 trie.insert(ids)
-#         except Exception:
-#             continue
-#     return trie
-# def _allow_always_token_ids(tokenizer) -> torch.Tensor:
-#     tokens = [",", ")", "(", ".", ";", "\n"]
-#     ids: Set[int] = set()
-#     for t in tokens:
-#         try:
-#             ids.update(tokenizer.encode(t, add_special_tokens=False))
-#         except:
-#             pass
-#     return torch.tensor(sorted(ids), dtype=torch.long)
-# # =========================================================
-# # 📦 PER-DB CACHE
-# # =========================================================
-# @dataclass
-# class _PerDbTokenSets:
-#     fp: str
-#     table_trie: _TrieNode
-#     column_trie: _TrieNode
-#     allow_always: torch.Tensor
-# _DB_CACHE: Dict[str, _PerDbTokenSets] = {}
-# _DB_LOCK = threading.Lock()
-# def _per_db_tokensets(tokenizer, graph: ConstraintGraph) -> _PerDbTokenSets:
-#     with _DB_LOCK:
-#         cached = _DB_CACHE.get(graph.db_path)
-#         if cached and cached.fp == graph.fingerprint:
-#             return cached
-#     obj = _PerDbTokenSets(
-#         fp=graph.fingerprint,
-#         table_trie=_build_trie(tokenizer, graph.tables),
-#         column_trie=_build_trie(tokenizer, graph.all_columns),
-#         allow_always=_allow_always_token_ids(tokenizer),
-#     )
-#     with _DB_LOCK:
-#         _DB_CACHE[graph.db_path] = obj
-#     return obj
-# # =========================================================
-# # 🚀 MAIN LOGITS PROCESSOR
-# # =========================================================
-# class BatchSchemaConstrainedLogitsProcessor(LogitsProcessor):
-#     def __init__(self, tokenizer, db_paths: Sequence[str], max_prefix_tokens: int = 48):
-#         self.tokenizer = tokenizer
-#         self.db_paths = list(db_paths)
-#         self.max_prefix_tokens = max_prefix_tokens
-#         self._graphs = [build_constraint_graph(p) for p in db_paths]
-#         self._token_sets = [_per_db_tokensets(tokenizer, g) for g in self._graphs]
-#         # 📊 Metrics (IMPORTANT FOR REPORT)
-#         self.total_steps = 0
-#         self.constrained_steps = 0
-#     def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):
-#         batch = input_ids.size(0)
-#         for i in range(batch):
-#             self.total_steps += 1
-#             tail_ids = input_ids[i, -self.max_prefix_tokens:].tolist()
-#             prefix_text = self.tokenizer.decode(tail_ids, skip_special_tokens=True)
-#             expected = _infer_expected_identifier(prefix_text)
-#             if expected is None:
-#                 continue
-#             self.constrained_steps += 1
-#             # =========================
-#             # SELECT TRIE
-#             # =========================
-#             if expected == "table":
-#                 trie = self._token_sets[i].table_trie
-#             else:
-#                 trie = self._token_sets[i].column_trie
-#             # =========================
-#             # PARTIAL TOKEN MATCH
-#             # =========================
-#             match = re.search(r"([A-Za-z_][A-Za-z0-9_]*)$", prefix_text)
-#             partial = match.group(1) if match else ""
-#             try:
-#                 prefix_ids = _encode_identifier(self.tokenizer, partial) if partial else []
-#             except:
-#                 continue
-#             node = trie.walk(prefix_ids)
-#             if node is None or node.terminal:
-#                 continue
-#             allowed_next = list(node.children.keys())
-#             if not allowed_next:
-#                 continue
-#             allowed_next = torch.tensor(allowed_next, device=scores.device)
-#             allow_always = self._token_sets[i].allow_always.to(scores.device)
-#             keep = torch.cat([allowed_next, allow_always])
-#             kept_scores = scores[i, keep].clone()
-#             scores[i, :] = -float("inf")
-#             scores[i, keep] = kept_scores
-#         return scores
-#     # =========================================================
-#     # 📊 METRICS FOR REPORT
-#     # =========================================================
-#     def get_constraint_stats(self):
-#         if self.total_steps == 0:
-#             return 0
-#         return self.constrained_steps / self.total_steps
-# # =========================================================
-# # 🔁 BACKWARD COMPATIBILITY
-# # =========================================================
-# class SchemaConstraintGraph:
-#     def __init__(self, db_path: str):
-#         self._graph = build_constraint_graph(db_path)
-#         self.tables = sorted(self._graph.tables)
-#         self.columns = sorted(self._graph.all_columns)
-# class SchemaConstrainedLogitsProcessor(LogitsProcessor):
-#     def __init__(self, tokenizer, schema_graph: SchemaConstraintGraph):
-#         self.proc = BatchSchemaConstrainedLogitsProcessor(
-#             tokenizer, [schema_graph._graph.db_path]
-#         )
-#     def __call__(self, input_ids, scores):
-#         return self.proc(input_ids, scores)
-# from __future__ import annotations
-# import re
-# import threading
-# from dataclasses import dataclass
-# from typing import Dict, Iterable, List, Optional, Sequence, Set
-# import torch
-# from transformers.generation.logits_process import LogitsProcessor
-# from schema_constraints import ConstraintGraph, build_constraint_graph
-# def _infer_expected_identifier(prefix_text: str) -> Optional[str]:
-#     s = re.sub(r"\s+", " ", prefix_text.lower())
-#     last_from = s.rfind(" from ")
-#     last_join = s.rfind(" join ")
-#     last_select = s.rfind(" select ")
-#     last_where = s.rfind(" where ")
-#     last_on = s.rfind(" on ")
-#     last_group = s.rfind(" group by ")
-#     last_order = s.rfind(" order by ")
-#     last_having = s.rfind(" having ")
-#     last_table_kw = max(last_from, last_join)
-#     last_col_kw = max(last_select, last_where, last_on, last_group, last_order, last_having)
-#     if last_table_kw < 0 and last_col_kw < 0:
-#         return None
-#     if last_table_kw > last_col_kw:
-#         return "table"
-#     if last_col_kw > last_table_kw:
-#         return "column"
-#     return None
-# class _TrieNode:
-#     __slots__ = ("children", "terminal")
-#     def __init__(self) -> None:
-#         self.children: Dict[int, _TrieNode] = {}
-#         self.terminal: bool = False
-#     def insert(self, token_ids: Sequence[int]) -> None:
-#         node: _TrieNode = self
-#         for tid in token_ids:
-#             tid_i = int(tid)
-#             nxt = node.children.get(tid_i)
-#             if nxt is None:
-#                 nxt = _TrieNode()
-#                 node.children[tid_i] = nxt
-#             node = nxt
-#         node.terminal = True
-#     def walk(self, prefix: Sequence[int]) -> Optional["_TrieNode"]:
-#         node: _TrieNode = self
-#         for tid in prefix:
-#             node = node.children.get(int(tid))  # type: ignore[assignment]
-#             if node is None:
-#                 return None
-#         return node
-# def _encode_identifier(tokenizer, name: str) -> List[int]:
-#     # Leading space encourages word-start markers (e.g. "Ġ" in RoBERTa BPE).
-#     return tokenizer.encode(" " + name, add_special_tokens=False)
-# def _build_trie(tokenizer, names: Iterable[str]) -> _TrieNode:
-#     trie = _TrieNode()
-#     for n in names:
-#         if not n:
-#             continue
-#         try:
-#             ids = _encode_identifier(tokenizer, n)
-#         except Exception:
-#             continue
-#         if ids:
-#             trie.insert(ids)
-#     return trie
-# def _allow_always_token_ids(tokenizer) -> torch.Tensor:
-#     # Allow common delimiters so the model can end an identifier.
-#     toks = [",", ")", "(", "\n", ".", ";"]
-#     ids: Set[int] = set()
-#     for t in toks:
-#         try:
-#             for tid in tokenizer.encode(t, add_special_tokens=False):
-#                 ids.add(int(tid))
-#         except Exception:
-#             continue
-#     return torch.tensor(sorted(ids), dtype=torch.long)
-# @dataclass
-# class _PerDbTokenSets:
-#     fp: str
-#     table_trie: _TrieNode
-#     column_trie: _TrieNode
-#     allow_always: torch.Tensor
-# _DB_TOKENSET_LOCK = threading.Lock()
-# _DB_TOKENSETS: Dict[str, _PerDbTokenSets] = {}
-# def _per_db_tokensets(tokenizer, graph: ConstraintGraph) -> _PerDbTokenSets:
-#     with _DB_TOKENSET_LOCK:
-#         cached = _DB_TOKENSETS.get(graph.db_path)
-#         if cached is not None and cached.fp == graph.fingerprint:
-#             return cached
-#     out = _PerDbTokenSets(
-#         fp=graph.fingerprint,
-#         table_trie=_build_trie(tokenizer, graph.tables),
-#         column_trie=_build_trie(tokenizer, graph.all_columns),
-#         allow_always=_allow_always_token_ids(tokenizer),
-#     )
-#     with _DB_TOKENSET_LOCK:
-#         _DB_TOKENSETS[graph.db_path] = out
-#     return out
-# class BatchSchemaConstrainedLogitsProcessor(LogitsProcessor):
-#     """
-#     Schema-aware constrained decoding per item in the generation batch.
-#     Uses a tokenizer-based trie so multi-token identifiers can be constrained.
-#     """
-#     def __init__(self, tokenizer, db_paths: Sequence[str], *, max_prefix_tokens: int = 48):
-#         self.tokenizer = tokenizer
-#         self.db_paths = list(db_paths)
-#         self.max_prefix_tokens = int(max_prefix_tokens)
-#         self._graphs = [build_constraint_graph(p) for p in self.db_paths]
-#         self._token_sets = [_per_db_tokensets(tokenizer, g) for g in self._graphs]
-#     def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
-#         if input_ids.dim() != 2 or scores.dim() != 2:
-#             return scores
-#         batch = input_ids.size(0)
-#         if batch != len(self._graphs):
-#             return scores
-#         for i in range(batch):
-#             tail_ids = input_ids[i, -self.max_prefix_tokens :].tolist()
-#             prefix_text = self.tokenizer.decode(tail_ids, skip_special_tokens=True)
-#             expected = _infer_expected_identifier(prefix_text)
-#             if expected is None:
-#                 continue
-#             if expected == "table":
-#                 m = re.search(r"(?:from|join)\s+([A-Za-z_][A-Za-z0-9_]*)$", prefix_text, flags=re.I)
-#                 partial = m.group(1) if m else None
-#                 if partial is None and not re.search(r"(?:from|join)\s*$", prefix_text, flags=re.I):
-#                     continue
-#                 trie = self._token_sets[i].table_trie
-#             else:
-#                 m = re.search(
-#                     r"(?:select|where|on|group by|order by|having)\s+([A-Za-z_][A-Za-z0-9_]*(?:\.[A-Za-z_][A-Za-z0-9_]*)?)$",
-#                     prefix_text,
-#                     flags=re.I,
-#                 )
-#                 partial = m.group(1) if m else None
-#                 if partial is None and not re.search(
-#                     r"(?:select|where|on|group by|order by|having)\s*$", prefix_text, flags=re.I
-#                 ):
-#                     continue
-#                 trie = self._token_sets[i].column_trie
-#             if not partial:
-#                 prefix_token_ids: List[int] = []
-#             else:
-#                 try:
-#                     prefix_token_ids = _encode_identifier(self.tokenizer, partial)
-#                 except Exception:
-#                     continue
-#             node = trie.walk(prefix_token_ids)
-#             if node is None or node.terminal:
-#                 continue
-#             allowed_next = sorted(node.children.keys())
-#             if not allowed_next:
-#                 continue
-#             allowed_next_t = torch.tensor(allowed_next, dtype=torch.long, device=scores.device)
-#             allow_always = self._token_sets[i].allow_always.to(scores.device)
-#             keep = torch.cat([allowed_next_t, allow_always]) if allow_always.numel() else allowed_next_t
-#             kept_scores = scores[i, keep].clone()
-#             scores[i, :] = -float("inf")
-#             scores[i, keep] = kept_scores
-#         return scores
-# # Backwards-compatible names used elsewhere in the repo.
-# class SchemaConstraintGraph:
-#     def __init__(self, db_path: str):
-#         self._graph = build_constraint_graph(db_path)
-#         self.tables = sorted(self._graph.tables)
-#         self.columns = sorted(self._graph.all_columns)
-# class SchemaConstrainedLogitsProcessor(LogitsProcessor):
-#     def __init__(self, tokenizer, schema_graph: SchemaConstraintGraph):
-#         self._proc = BatchSchemaConstrainedLogitsProcessor(tokenizer, [schema_graph._graph.db_path])
-#     def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
-#         return self._proc(input_ids, scores)
-# from __future__ import annotations
-# import re
-# import threading
-# from dataclasses import dataclass
-# from typing import Dict, Iterable, List, Optional, Sequence, Set
-# import torch
-# from transformers.generation.logits_process import LogitsProcessor
-# from schema_constraints import ConstraintGraph, build_constraint_graph
-# # =========================================================
-# # 🔍 IDENTIFIER TYPE DETECTION
-# # =========================================================
-# def _infer_expected_identifier(prefix_text: str) -> Optional[str]:
-#     s = re.sub(r"\s+", " ", prefix_text.lower())
-#     last_from = s.rfind(" from ")
-#     last_join = s.rfind(" join ")
-#     last_select = s.rfind(" select ")
-#     last_where = s.rfind(" where ")
-#     last_on = s.rfind(" on ")
-#     last_group = s.rfind(" group by ")
-#     last_order = s.rfind(" order by ")
-#     last_having = s.rfind(" having ")
-#     last_table_kw = max(last_from, last_join)
-#     last_col_kw = max(last_select, last_where, last_on, last_group, last_order, last_having)
-#     if last_table_kw < 0 and last_col_kw < 0:
-#         return None
-#     if last_table_kw > last_col_kw:
-#         return "table"
-#     if last_col_kw > last_table_kw:
-#         return "column"
-#     return None
-# # =========================================================
-# # 🌳 TRIE STRUCTURE
-# # =========================================================
-# class _TrieNode:
-#     __slots__ = ("children", "terminal")
-#     def __init__(self) -> None:
-#         self.children: Dict[int, _TrieNode] = {}
-#         self.terminal: bool = False
-#     def insert(self, token_ids: Sequence[int]) -> None:
-#         node = self
-#         for tid in token_ids:
-#             tid = int(tid)
-#             if tid not in node.children:
-#                 node.children[tid] = _TrieNode()
-#             node = node.children[tid]
-#         node.terminal = True
-#     def walk(self, prefix: Sequence[int]) -> Optional["_TrieNode"]:
-#         node = self
-#         for tid in prefix:
-#             node = node.children.get(int(tid))
-#             if node is None:
-#                 return None
-#         return node
-# # =========================================================
-# # 🔤 TOKEN ENCODING
-# # =========================================================
-# def _encode_identifier(tokenizer, name: str) -> List[int]:
-#     return tokenizer.encode(" " + name, add_special_tokens=False)
-# def _build_trie(tokenizer, names: Iterable[str]) -> _TrieNode:
-#     trie = _TrieNode()
-#     for name in names:
-#         try:
-#             ids = _encode_identifier(tokenizer, name)
-#             if ids:
-#                 trie.insert(ids)
-#         except Exception:
-#             continue
-#     return trie
-# def _allow_always_token_ids(tokenizer) -> torch.Tensor:
-#     tokens = [",", ")", "(", ".", ";", "\n"]
-#     ids: Set[int] = set()
-#     for t in tokens:
-#         try:
-#             ids.update(tokenizer.encode(t, add_special_tokens=False))
-#         except:
-#             pass
-#     return torch.tensor(sorted(ids), dtype=torch.long)
-# # =========================================================
-# # 📦 PER-DB CACHE
-# # =========================================================
-# @dataclass
-# class _PerDbTokenSets:
-#     fp: str
-#     table_trie: _TrieNode
-#     column_trie: _TrieNode
-#     allow_always: torch.Tensor
-# _DB_CACHE: Dict[str, _PerDbTokenSets] = {}
-# _DB_LOCK = threading.Lock()
-# def _per_db_tokensets(tokenizer, graph: ConstraintGraph) -> _PerDbTokenSets:
-#     with _DB_LOCK:
-#         cached = _DB_CACHE.get(graph.db_path)
-#         if cached and cached.fp == graph.fingerprint:
-#             return cached
-#     obj = _PerDbTokenSets(
-#         fp=graph.fingerprint,
-#         table_trie=_build_trie(tokenizer, graph.tables),
-#         column_trie=_build_trie(tokenizer, graph.all_columns),
-#         allow_always=_allow_always_token_ids(tokenizer),
-#     )
-#     with _DB_LOCK:
-#         _DB_CACHE[graph.db_path] = obj
-#     return obj
-# # =========================================================
-# # 🚀 MAIN LOGITS PROCESSOR
-# # =========================================================
-# class BatchSchemaConstrainedLogitsProcessor(LogitsProcessor):
-#     def __init__(self, tokenizer, db_paths: Sequence[str], max_prefix_tokens: int = 48):
-#         self.tokenizer = tokenizer
-#         self.db_paths = list(db_paths)
-#         self.max_prefix_tokens = max_prefix_tokens
-#         self._graphs = [build_constraint_graph(p) for p in db_paths]
-#         self._token_sets = [_per_db_tokensets(tokenizer, g) for g in self._graphs]
-#         # 📊 Metrics (IMPORTANT FOR REPORT)
-#         self.total_steps = 0
-#         self.constrained_steps = 0
-#     def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):
-#         batch = input_ids.size(0)
-#         for i in range(batch):
-#             self.total_steps += 1
-#             tail_ids = input_ids[i, -self.max_prefix_tokens:].tolist()
-#             prefix_text = self.tokenizer.decode(tail_ids, skip_special_tokens=True)
-#             expected = _infer_expected_identifier(prefix_text)
-#             if expected is None:
-#                 continue
-#             self.constrained_steps += 1
-#             # =========================
-#             # SELECT TRIE
-#             # =========================
-#             if expected == "table":
-#                 trie = self._token_sets[i].table_trie
-#             else:
-#                 trie = self._token_sets[i].column_trie
-#             # =========================
-#             # PARTIAL TOKEN MATCH
-#             # =========================
-#             match = re.search(r"([A-Za-z_][A-Za-z0-9_]*)$", prefix_text)
-#             partial = match.group(1) if match else ""
-#             try:
-#                 prefix_ids = _encode_identifier(self.tokenizer, partial) if partial else []
-#             except:
-#                 continue
-#             node = trie.walk(prefix_ids)
-#             if node is None or node.terminal:
-#                 continue
-#             allowed_next = list(node.children.keys())
-#             if not allowed_next:
-#                 continue
-#             allowed_next = torch.tensor(allowed_next, device=scores.device)
-#             allow_always = self._token_sets[i].allow_always.to(scores.device)
-#             keep = torch.cat([allowed_next, allow_always])
-#             kept_scores = scores[i, keep].clone()
-#             scores[i, :] = -float("inf")
-#             scores[i, keep] = kept_scores
-#         return scores
-#     # =========================================================
-#     # 📊 METRICS FOR REPORT
-#     # =========================================================
-#     def get_constraint_stats(self):
-#         if self.total_steps == 0:
-#             return 0
-#         return self.constrained_steps / self.total_steps
-# # =========================================================
-# # 🔁 BACKWARD COMPATIBILITY
-# # =========================================================
-# class SchemaConstraintGraph:
-#     def __init__(self, db_path: str):
-#         self._graph = build_constraint_graph(db_path)
-#         self.tables = sorted(self._graph.tables)
-#         self.columns = sorted(self._graph.all_columns)
-# class SchemaConstrainedLogitsProcessor(LogitsProcessor):
-#     def __init__(self, tokenizer, schema_graph: SchemaConstraintGraph):
-#         self.proc = BatchSchemaConstrainedLogitsProcessor(
-#             tokenizer, [schema_graph._graph.db_path]
-#         )
-#     def __call__(self, input_ids, scores):
-#         return self.proc(input_ids, scores)
-# ********* after task 3
-import re
-import threading
-from functools import lru_cache
-import torch
-from transformers import LogitsProcessor
-from src.schema_utils import get_constraint_graph
-_TOKEN_CACHE_LOCK = threading.Lock()
-_TOKEN_ID_CACHE = {}  # (id(tokenizer), db_path) -> (allowed_ids_tensor, always_allow_ids_tensor)
-def _encode_variants(tokenizer, text: str) -> list[int]:
-    ids: list[int] = []
-    for variant in (text, " " + text):
-        try:
-            ids.extend(tokenizer.encode(variant, add_special_tokens=False))
-        except Exception:
-            continue
-    # de-dup while keeping order
-    seen = set()
-    out = []
-    for i in ids:
-        if int(i) not in seen:
-            seen.add(int(i))
-            out.append(int(i))
-    return out
-def _always_allow_ids(tokenizer) -> list[int]:
-    """
-    Tokens we should never block, otherwise decoding can get stuck or generate garbage:
-    - EOS/PAD
-    - punctuation/operators needed for SQL formatting
-    - digits/quotes
-    """
-    ids: list[int] = []
-    for special in [getattr(tokenizer, "eos_token_id", None), getattr(tokenizer, "pad_token_id", None)]:
-        if special is not None:
-            ids.append(int(special))
-    # Common SQL punctuation/operators
-    pieces = [
-        " ", "\n", "\t",
-        ",", ".", "(", ")", ";",
-        "=", "!=", "<>", "<", ">", "<=", ">=",
-        "*", "+", "-", "/", "%",
-        "'", '"',
-    ]
-    for p in pieces:
-        ids.extend(_encode_variants(tokenizer, p))
-    # digits
-    for d in "0123456789":
-        ids.extend(_encode_variants(tokenizer, d))
-    seen = set()
-    out = []
-    for i in ids:
-        if int(i) not in seen:
-            seen.add(int(i))
-            out.append(int(i))
-    return out
-def _infer_expected_identifier_tail(tail_text: str):
-    """
-    Returns ("table"|"column", partial_or_empty) if the tail looks like it's currently
-    emitting a table/column identifier. Otherwise returns None.
-    """
-    t = re.sub(r"\s+", " ", (tail_text or "")).lower()
-    m = re.search(r"(?:from|join)\s+([a-z_][a-z0-9_]*)?$", t)
-    if m:
-        partial = m.group(1) or ""
-        # ensure we are actually after keyword (not elsewhere)
-        if re.search(r"(?:from|join)\s*$", t) or partial:
-            return "table", partial
-    m = re.search(
-        r"(?:select|where|on|group by|order by|having)\s+([a-z_][a-z0-9_]*(?:\.[a-z_][a-z0-9_]*)?)?$",
-        t,
-    )
-    if m:
-        partial = m.group(1) or ""
-        if re.search(r"(?:select|where|on|group by|order by|having)\s*$", t) or partial:
-            return "column", partial
-    return None
-class SchemaConstrainedLogitsProcessor(LogitsProcessor):
-    def __init__(self, tokenizer, db_path):
-        self.tokenizer = tokenizer
-        graph = get_constraint_graph(db_path)
-        key = (id(tokenizer), str(db_path))
-        with _TOKEN_CACHE_LOCK:
-            cached = _TOKEN_ID_CACHE.get(key)
-        if cached is None:
-            allowed_tokens = set(graph.get("tables", set())) | set(graph.get("columns", set()))
-            sql_keywords = {
-                "select", "from", "where", "join", "on",
-                "group", "by", "order", "limit", "having",
-                "and", "or", "desc", "asc",
-                "count", "avg", "min", "max", "sum",
-                "distinct", "as", "in", "like", "between",
-                "is", "null",
-            }
-            allowed_tokens |= sql_keywords
-            allowed_ids: list[int] = []
-            for tok in sorted(allowed_tokens):
-                allowed_ids.extend(_encode_variants(tokenizer, tok))
-            always_ids = _always_allow_ids(tokenizer)
-            allowed_ids_t = torch.tensor(sorted(set(allowed_ids)), dtype=torch.long)
-            always_ids_t = torch.tensor(sorted(set(always_ids)), dtype=torch.long)
-            cached = (allowed_ids_t, always_ids_t)
-            with _TOKEN_CACHE_LOCK:
-                _TOKEN_ID_CACHE[key] = cached
-        self._allowed_ids_t, self._always_ids_t = cached
-    def __call__(self, input_ids, scores):
-        # Decode only a tail window for speed (beam search calls this a lot).
-        try:
-            tail_ids = input_ids[0][-128:]
-        except Exception:
-            tail_ids = input_ids[0]
-        tail = self.tokenizer.decode(tail_ids, skip_special_tokens=True)
-        inferred = _infer_expected_identifier_tail(tail)
-        if inferred is None:
-            return scores
-        keep = torch.cat([self._allowed_ids_t.to(scores.device), self._always_ids_t.to(scores.device)])
-        if keep.numel() == 0:
-            return scores
-        kept_scores = scores[:, keep].clone()
-        scores[:] = -float("inf")
-        scores[:, keep] = kept_scores
-        return scores

src/constrained_decoding_sample.py DELETED Viewed

@@ -1,516 +0,0 @@
-# from __future__ import annotations
-# import re
-# import threading
-# from dataclasses import dataclass
-# from typing import Dict, Iterable, List, Optional, Sequence, Set
-# import torch
-# from transformers.generation.logits_process import LogitsProcessor
-# from schema_constraints import ConstraintGraph, build_constraint_graph
-# def _infer_expected_identifier(prefix_text: str) -> Optional[str]:
-#     s = re.sub(r"\s+", " ", prefix_text.lower())
-#     last_from = s.rfind(" from ")
-#     last_join = s.rfind(" join ")
-#     last_select = s.rfind(" select ")
-#     last_where = s.rfind(" where ")
-#     last_on = s.rfind(" on ")
-#     last_group = s.rfind(" group by ")
-#     last_order = s.rfind(" order by ")
-#     last_having = s.rfind(" having ")
-#     last_table_kw = max(last_from, last_join)
-#     last_col_kw = max(last_select, last_where, last_on, last_group, last_order, last_having)
-#     if last_table_kw < 0 and last_col_kw < 0:
-#         return None
-#     if last_table_kw > last_col_kw:
-#         return "table"
-#     if last_col_kw > last_table_kw:
-#         return "column"
-#     return None
-# class _TrieNode:
-#     __slots__ = ("children", "terminal")
-#     def __init__(self) -> None:
-#         self.children: Dict[int, _TrieNode] = {}
-#         self.terminal: bool = False
-#     def insert(self, token_ids: Sequence[int]) -> None:
-#         node: _TrieNode = self
-#         for tid in token_ids:
-#             tid_i = int(tid)
-#             nxt = node.children.get(tid_i)
-#             if nxt is None:
-#                 nxt = _TrieNode()
-#                 node.children[tid_i] = nxt
-#             node = nxt
-#         node.terminal = True
-#     def walk(self, prefix: Sequence[int]) -> Optional["_TrieNode"]:
-#         node: _TrieNode = self
-#         for tid in prefix:
-#             node = node.children.get(int(tid))  # type: ignore[assignment]
-#             if node is None:
-#                 return None
-#         return node
-# def _encode_identifier(tokenizer, name: str) -> List[int]:
-#     # Leading space encourages word-start markers (e.g. "Ġ" in RoBERTa BPE).
-#     return tokenizer.encode(" " + name, add_special_tokens=False)
-# def _build_trie(tokenizer, names: Iterable[str]) -> _TrieNode:
-#     trie = _TrieNode()
-#     for n in names:
-#         if not n:
-#             continue
-#         try:
-#             ids = _encode_identifier(tokenizer, n)
-#         except Exception:
-#             continue
-#         if ids:
-#             trie.insert(ids)
-#     return trie
-# def _allow_always_token_ids(tokenizer) -> torch.Tensor:
-#     # Allow common delimiters so the model can end an identifier.
-#     toks = [",", ")", "(", "\n", ".", ";"]
-#     ids: Set[int] = set()
-#     for t in toks:
-#         try:
-#             for tid in tokenizer.encode(t, add_special_tokens=False):
-#                 ids.add(int(tid))
-#         except Exception:
-#             continue
-#     return torch.tensor(sorted(ids), dtype=torch.long)
-# @dataclass
-# class _PerDbTokenSets:
-#     fp: str
-#     table_trie: _TrieNode
-#     column_trie: _TrieNode
-#     allow_always: torch.Tensor
-# _DB_TOKENSET_LOCK = threading.Lock()
-# _DB_TOKENSETS: Dict[str, _PerDbTokenSets] = {}
-# def _per_db_tokensets(tokenizer, graph: ConstraintGraph) -> _PerDbTokenSets:
-#     with _DB_TOKENSET_LOCK:
-#         cached = _DB_TOKENSETS.get(graph.db_path)
-#         if cached is not None and cached.fp == graph.fingerprint:
-#             return cached
-#     out = _PerDbTokenSets(
-#         fp=graph.fingerprint,
-#         table_trie=_build_trie(tokenizer, graph.tables),
-#         column_trie=_build_trie(tokenizer, graph.all_columns),
-#         allow_always=_allow_always_token_ids(tokenizer),
-#     )
-#     with _DB_TOKENSET_LOCK:
-#         _DB_TOKENSETS[graph.db_path] = out
-#     return out
-# class BatchSchemaConstrainedLogitsProcessor(LogitsProcessor):
-#     """
-#     Schema-aware constrained decoding per item in the generation batch.
-#     Uses a tokenizer-based trie so multi-token identifiers can be constrained.
-#     """
-#     def __init__(self, tokenizer, db_paths: Sequence[str], *, max_prefix_tokens: int = 48):
-#         self.tokenizer = tokenizer
-#         self.db_paths = list(db_paths)
-#         self.max_prefix_tokens = int(max_prefix_tokens)
-#         self._graphs = [build_constraint_graph(p) for p in self.db_paths]
-#         self._token_sets = [_per_db_tokensets(tokenizer, g) for g in self._graphs]
-#     def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
-#         if input_ids.dim() != 2 or scores.dim() != 2:
-#             return scores
-#         batch = input_ids.size(0)
-#         if batch != len(self._graphs):
-#             return scores
-#         for i in range(batch):
-#             tail_ids = input_ids[i, -self.max_prefix_tokens :].tolist()
-#             prefix_text = self.tokenizer.decode(tail_ids, skip_special_tokens=True)
-#             expected = _infer_expected_identifier(prefix_text)
-#             if expected is None:
-#                 continue
-#             if expected == "table":
-#                 m = re.search(r"(?:from|join)\s+([A-Za-z_][A-Za-z0-9_]*)$", prefix_text, flags=re.I)
-#                 partial = m.group(1) if m else None
-#                 if partial is None and not re.search(r"(?:from|join)\s*$", prefix_text, flags=re.I):
-#                     continue
-#                 trie = self._token_sets[i].table_trie
-#             else:
-#                 m = re.search(
-#                     r"(?:select|where|on|group by|order by|having)\s+([A-Za-z_][A-Za-z0-9_]*(?:\.[A-Za-z_][A-Za-z0-9_]*)?)$",
-#                     prefix_text,
-#                     flags=re.I,
-#                 )
-#                 partial = m.group(1) if m else None
-#                 if partial is None and not re.search(
-#                     r"(?:select|where|on|group by|order by|having)\s*$", prefix_text, flags=re.I
-#                 ):
-#                     continue
-#                 trie = self._token_sets[i].column_trie
-#             if not partial:
-#                 prefix_token_ids: List[int] = []
-#             else:
-#                 try:
-#                     prefix_token_ids = _encode_identifier(self.tokenizer, partial)
-#                 except Exception:
-#                     continue
-#             node = trie.walk(prefix_token_ids)
-#             if node is None or node.terminal:
-#                 continue
-#             allowed_next = sorted(node.children.keys())
-#             if not allowed_next:
-#                 continue
-#             allowed_next_t = torch.tensor(allowed_next, dtype=torch.long, device=scores.device)
-#             allow_always = self._token_sets[i].allow_always.to(scores.device)
-#             keep = torch.cat([allowed_next_t, allow_always]) if allow_always.numel() else allowed_next_t
-#             kept_scores = scores[i, keep].clone()
-#             scores[i, :] = -float("inf")
-#             scores[i, keep] = kept_scores
-#         return scores
-# # Backwards-compatible names used elsewhere in the repo.
-# class SchemaConstraintGraph:
-#     def __init__(self, db_path: str):
-#         self._graph = build_constraint_graph(db_path)
-#         self.tables = sorted(self._graph.tables)
-#         self.columns = sorted(self._graph.all_columns)
-# class SchemaConstrainedLogitsProcessor(LogitsProcessor):
-#     def __init__(self, tokenizer, schema_graph: SchemaConstraintGraph):
-#         self._proc = BatchSchemaConstrainedLogitsProcessor(tokenizer, [schema_graph._graph.db_path])
-#     def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
-#         return self._proc(input_ids, scores)
-# from __future__ import annotations
-# import re
-# import threading
-# from dataclasses import dataclass
-# from typing import Dict, Iterable, List, Optional, Sequence, Set
-# import torch
-# from transformers.generation.logits_process import LogitsProcessor
-# from schema_constraints import ConstraintGraph, build_constraint_graph
-# # =========================================================
-# # 🔍 IDENTIFIER TYPE DETECTION
-# # =========================================================
-# def _infer_expected_identifier(prefix_text: str) -> Optional[str]:
-#     s = re.sub(r"\s+", " ", prefix_text.lower())
-#     last_from = s.rfind(" from ")
-#     last_join = s.rfind(" join ")
-#     last_select = s.rfind(" select ")
-#     last_where = s.rfind(" where ")
-#     last_on = s.rfind(" on ")
-#     last_group = s.rfind(" group by ")
-#     last_order = s.rfind(" order by ")
-#     last_having = s.rfind(" having ")
-#     last_table_kw = max(last_from, last_join)
-#     last_col_kw = max(last_select, last_where, last_on, last_group, last_order, last_having)
-#     if last_table_kw < 0 and last_col_kw < 0:
-#         return None
-#     if last_table_kw > last_col_kw:
-#         return "table"
-#     if last_col_kw > last_table_kw:
-#         return "column"
-#     return None
-# # =========================================================
-# # 🌳 TRIE STRUCTURE
-# # =========================================================
-# class _TrieNode:
-#     __slots__ = ("children", "terminal")
-#     def __init__(self) -> None:
-#         self.children: Dict[int, _TrieNode] = {}
-#         self.terminal: bool = False
-#     def insert(self, token_ids: Sequence[int]) -> None:
-#         node = self
-#         for tid in token_ids:
-#             tid = int(tid)
-#             if tid not in node.children:
-#                 node.children[tid] = _TrieNode()
-#             node = node.children[tid]
-#         node.terminal = True
-#     def walk(self, prefix: Sequence[int]) -> Optional["_TrieNode"]:
-#         node = self
-#         for tid in prefix:
-#             node = node.children.get(int(tid))
-#             if node is None:
-#                 return None
-#         return node
-# # =========================================================
-# # 🔤 TOKEN ENCODING
-# # =========================================================
-# def _encode_identifier(tokenizer, name: str) -> List[int]:
-#     return tokenizer.encode(" " + name, add_special_tokens=False)
-# def _build_trie(tokenizer, names: Iterable[str]) -> _TrieNode:
-#     trie = _TrieNode()
-#     for name in names:
-#         try:
-#             ids = _encode_identifier(tokenizer, name)
-#             if ids:
-#                 trie.insert(ids)
-#         except Exception:
-#             continue
-#     return trie
-# def _allow_always_token_ids(tokenizer) -> torch.Tensor:
-#     tokens = [",", ")", "(", ".", ";", "\n"]
-#     ids: Set[int] = set()
-#     for t in tokens:
-#         try:
-#             ids.update(tokenizer.encode(t, add_special_tokens=False))
-#         except:
-#             pass
-#     return torch.tensor(sorted(ids), dtype=torch.long)
-# # =========================================================
-# # 📦 PER-DB CACHE
-# # =========================================================
-# @dataclass
-# class _PerDbTokenSets:
-#     fp: str
-#     table_trie: _TrieNode
-#     column_trie: _TrieNode
-#     allow_always: torch.Tensor
-# _DB_CACHE: Dict[str, _PerDbTokenSets] = {}
-# _DB_LOCK = threading.Lock()
-# def _per_db_tokensets(tokenizer, graph: ConstraintGraph) -> _PerDbTokenSets:
-#     with _DB_LOCK:
-#         cached = _DB_CACHE.get(graph.db_path)
-#         if cached and cached.fp == graph.fingerprint:
-#             return cached
-#     obj = _PerDbTokenSets(
-#         fp=graph.fingerprint,
-#         table_trie=_build_trie(tokenizer, graph.tables),
-#         column_trie=_build_trie(tokenizer, graph.all_columns),
-#         allow_always=_allow_always_token_ids(tokenizer),
-#     )
-#     with _DB_LOCK:
-#         _DB_CACHE[graph.db_path] = obj
-#     return obj
-# # =========================================================
-# # 🚀 MAIN LOGITS PROCESSOR
-# # =========================================================
-# class BatchSchemaConstrainedLogitsProcessor(LogitsProcessor):
-#     def __init__(self, tokenizer, db_paths: Sequence[str], max_prefix_tokens: int = 48):
-#         self.tokenizer = tokenizer
-#         self.db_paths = list(db_paths)
-#         self.max_prefix_tokens = max_prefix_tokens
-#         self._graphs = [build_constraint_graph(p) for p in db_paths]
-#         self._token_sets = [_per_db_tokensets(tokenizer, g) for g in self._graphs]
-#         # 📊 Metrics (IMPORTANT FOR REPORT)
-#         self.total_steps = 0
-#         self.constrained_steps = 0
-#     def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):
-#         batch = input_ids.size(0)
-#         for i in range(batch):
-#             self.total_steps += 1
-#             tail_ids = input_ids[i, -self.max_prefix_tokens:].tolist()
-#             prefix_text = self.tokenizer.decode(tail_ids, skip_special_tokens=True)
-#             expected = _infer_expected_identifier(prefix_text)
-#             if expected is None:
-#                 continue
-#             self.constrained_steps += 1
-#             # =========================
-#             # SELECT TRIE
-#             # =========================
-#             if expected == "table":
-#                 trie = self._token_sets[i].table_trie
-#             else:
-#                 trie = self._token_sets[i].column_trie
-#             # =========================
-#             # PARTIAL TOKEN MATCH
-#             # =========================
-#             match = re.search(r"([A-Za-z_][A-Za-z0-9_]*)$", prefix_text)
-#             partial = match.group(1) if match else ""
-#             try:
-#                 prefix_ids = _encode_identifier(self.tokenizer, partial) if partial else []
-#             except:
-#                 continue
-#             node = trie.walk(prefix_ids)
-#             if node is None or node.terminal:
-#                 continue
-#             allowed_next = list(node.children.keys())
-#             if not allowed_next:
-#                 continue
-#             allowed_next = torch.tensor(allowed_next, device=scores.device)
-#             allow_always = self._token_sets[i].allow_always.to(scores.device)
-#             keep = torch.cat([allowed_next, allow_always])
-#             kept_scores = scores[i, keep].clone()
-#             scores[i, :] = -float("inf")
-#             scores[i, keep] = kept_scores
-#         return scores
-#     # =========================================================
-#     # 📊 METRICS FOR REPORT
-#     # =========================================================
-#     def get_constraint_stats(self):
-#         if self.total_steps == 0:
-#             return 0
-#         return self.constrained_steps / self.total_steps
-# # =========================================================
-# # 🔁 BACKWARD COMPATIBILITY
-# # =========================================================
-# class SchemaConstraintGraph:
-#     def __init__(self, db_path: str):
-#         self._graph = build_constraint_graph(db_path)
-#         self.tables = sorted(self._graph.tables)
-#         self.columns = sorted(self._graph.all_columns)
-# class SchemaConstrainedLogitsProcessor(LogitsProcessor):
-#     def __init__(self, tokenizer, schema_graph: SchemaConstraintGraph):
-#         self.proc = BatchSchemaConstrainedLogitsProcessor(
-#             tokenizer, [schema_graph._graph.db_path]
-#         )
-#     def __call__(self, input_ids, scores):
-#         return self.proc(input_ids, scores)
-# ********* after task 3
-import re
-import torch
-from transformers import LogitsProcessor
-from src.schema_utils import get_constraint_graph
-def _infer_expected_identifier(prefix_text: str):
-    s = prefix_text.lower()
-    if " from " in s or " join " in s:
-        return "table"
-    if any(k in s for k in ["select", "where", "on", "group by", "order by"]):
-        return "column"
-    return None
-class SchemaConstrainedLogitsProcessor(LogitsProcessor):
-    def __init__(self, tokenizer, db_path):
-        self.tokenizer = tokenizer
-        graph = get_constraint_graph(db_path)
-        self.allowed_tokens = set(graph["tables"]) | set(graph["columns"])
-        self.sql_keywords = {
-            "select", "from", "where", "join", "on",
-            "group", "by", "order", "limit",
-            "and", "or", "desc", "asc",
-            "count", "avg", "min", "max", "sum", "*"
-        }
-        self.allowed_tokens |= self.sql_keywords
-        self.allowed_token_ids = set()
-        for token in self.allowed_tokens:
-            ids = tokenizer.encode(token, add_special_tokens=False)
-            for i in ids:
-                self.allowed_token_ids.add(i)
-    def __call__(self, input_ids, scores):
-        prefix = self.tokenizer.decode(input_ids[0], skip_special_tokens=True)
-        # 🔥 SOFT CONSTRAINT (FIX)
-        if len(prefix.strip()) < 10:
-            return scores
-        expected = _infer_expected_identifier(prefix)
-        if expected not in ["table", "column"]:
-            return scores
-        mask = torch.full_like(scores, float("-inf"))
-        for token_id in self.allowed_token_ids:
-            mask[:, token_id] = scores[:, token_id]
-        return mask

src/eval_rl_fixed.py CHANGED Viewed

@@ -1,756 +1,466 @@
 # import json
-# import subprocess
-# import sys
-# import argparse
-# import random
 # import sqlite3
-# import time
-# import re
-# import os
 # from pathlib import Path
 # import torch
 # from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 # from peft import PeftModel
-# from prompting import encode_prompt
-# # -------------------------------
-# # NORMALIZATION
-# # -------------------------------
-# def normalize_sql(sql):
-#     sql = sql.replace('"', "'")
-#     sql = re.sub(r"\s+", " ", sql)
-#     return sql.strip().lower().rstrip(";")
-# # -------------------------------
-# # 🔥 SAFE RESULT NORMALIZATION (FIX)
-# # -------------------------------
-# def normalize_result(res):
-#     try:
-#         return sorted([str(r) for r in res])
-#     except:
-#         return []
-# # -------------------------------
-# # EXECUTION CHECK (FIXED)
-# # -------------------------------
-# def check_execution(pred_sql, gold_sql, db_path):
-#     try:
-#         conn = sqlite3.connect(db_path)
-#         conn.text_factory = lambda b: b.decode(errors='ignore')
-#         start_time = time.monotonic()
-#         def timeout_handler():
-#             return 1 if (time.monotonic() - start_time) > 2.0 else 0
-#         conn.set_progress_handler(timeout_handler, 10000)
-#         cursor = conn.cursor()
-#         cursor.execute(pred_sql)
-#         pred_res = cursor.fetchall()
-#         cursor.execute(gold_sql)
-#         gold_res = cursor.fetchall()
 #         conn.close()
-#         # 🔥 FIXED COMPARISON
-#         return normalize_result(pred_res) == normalize_result(gold_res)
 #     except Exception:
 #         return False
-# # -------------------------------
-# # SPIDER PARSER
-# # -------------------------------
-# def _parse_spider_accuracy(stdout: str, metric_type: str):
-#     for line in stdout.splitlines():
-#         if metric_type == "exec" and line.strip().startswith("execution"):
-#             try:
-#                 return float(line.split()[-1])
-#             except:
-#                 pass
-#         elif metric_type == "match" and line.strip().startswith("exact"):
-#             try:
-#                 return float(line.split()[-1])
-#             except:
-#                 pass
-#     return None
-# # -------------------------------
-# # MAIN
-# # -------------------------------
 # def main():
 #     parser = argparse.ArgumentParser()
 #     parser.add_argument("--adapter", type=str, required=True)
-#     parser.add_argument("--num_samples", type=int, default=700)
-#     parser.add_argument("--shuffle_dev", action="store_true")
-#     parser.add_argument("--shuffle_seed", type=int, default=42)
 #     args = parser.parse_args()
 #     project_root = Path(__file__).resolve().parents[1]
-#     adapter_dir = project_root / args.adapter
-#     db_root = project_root / "data" / "database"
-#     table_json = project_root / "data" / "tables.json"
 #     dev_json = project_root / "data" / "dev.json"
-#     pred_path = project_root / "temp_predictions.txt"
-#     temp_gold_path = project_root / "temp_gold.sql"
-#     if not adapter_dir.exists():
-#         raise FileNotFoundError(f"Missing adapter dir: {adapter_dir}")
-#     device = "mps" if torch.backends.mps.is_available() else (
-#         "cuda" if torch.cuda.is_available() else "cpu"
-#     )
-#     print(f"Using device: {device}")
-#     BASE_MODEL = "Salesforce/codet5-base"
-#     tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
-#     if tokenizer.pad_token is None:
-#         tokenizer.pad_token = tokenizer.eos_token
-#     print(f"\n📦 Loading Model: {args.adapter}")
-#     base = AutoModelForSeq2SeqLM.from_pretrained(BASE_MODEL).to(device)
-#     adapter_for_peft = os.path.relpath(adapter_dir, project_root)
-#     model = PeftModel.from_pretrained(
-#         base,
-#         adapter_for_peft,
-#         local_files_only=True
-#     ).to(device)
-#     model = model.merge_and_unload()
-#     model.eval()
-#     # -------------------------------
-#     # LOAD DATA
-#     # -------------------------------
-#     with dev_json.open() as f:
-#         dev = json.load(f)
-#     if args.shuffle_dev:
-#         rng = random.Random(args.shuffle_seed)
-#         rng.shuffle(dev)
-#     dev = dev[: args.num_samples]
-#     total = len(dev)
-#     gen_kwargs = dict(
-#         max_new_tokens=160,
-#         num_beams=8,
-#         length_penalty=0.8,
-#         do_sample=False,
-#         early_stopping=True,
-#         pad_token_id=tokenizer.pad_token_id,
-#         eos_token_id=tokenizer.eos_token_id,
-#     )
-#     print(f"\n🚀 Evaluating {total} samples...\n")
-#     em_correct = 0
-#     ex_correct = 0
-#     with pred_path.open("w") as out_pred, temp_gold_path.open("w") as out_gold, torch.no_grad():
-#         for i, ex in enumerate(dev, start=1):
-#             db_id = ex["db_id"]
-#             question = ex["question"]
-#             gold_query = ex["query"]
-#             db_path = db_root / db_id / f"{db_id}.sqlite"
-#             # -------------------------------
-#             # GENERATE SQL
-#             # -------------------------------
-#             input_ids = encode_prompt(
-#                 tokenizer,
-#                 question,
-#                 db_id,
-#                 device=device,
-#                 max_input_tokens=512
-#             )
-#             input_ids = input_ids.unsqueeze(0).to(device)
-#             attention_mask = (input_ids != tokenizer.pad_token_id).long().to(device)
 #             outputs = model.generate(
-#                 input_ids=input_ids,
-#                 attention_mask=attention_mask,
-#                 **gen_kwargs
 #             )
-#             pred_sql = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
-#             # -------------------------------
-#             # SAVE FOR SPIDER EVAL
-#             # -------------------------------
-#             out_pred.write(f"{pred_sql}\n")
-#             out_gold.write(f"{gold_query}\t{db_id}\n")
-#             # -------------------------------
-#             # LIVE METRICS
-#             # -------------------------------
-#             if normalize_sql(pred_sql) == normalize_sql(gold_query):
-#                 em_correct += 1
-#             if check_execution(pred_sql, gold_query, db_path):
-#                 ex_correct += 1
-#             if i % 20 == 0 or i == total:
-#                 print(
-#                     f"Progress: {i}/{total} | "
-#                     f"EM: {(em_correct/i)*100:.2f}% | "
-#                     f"EX: {(ex_correct/i)*100:.2f}%"
-#                 )
-#     print("\n🚀 Running Official Spider Evaluation...\n")
-#     eval_script = project_root / "spider_eval" / "evaluation.py"
-#     # EXACT MATCH
-#     cmd_match = [
-#         sys.executable, str(eval_script),
-#         "--gold", str(temp_gold_path),
-#         "--pred", str(pred_path),
-#         "--etype", "match",
-#         "--db", str(db_root),
-#         "--table", str(table_json),
-#     ]
-#     proc_match = subprocess.run(cmd_match, capture_output=True, text=True)
-#     exact_acc = _parse_spider_accuracy(proc_match.stdout, "match")
-#     # EXECUTION
-#     cmd_exec = [
-#         sys.executable, str(eval_script),
-#         "--gold", str(temp_gold_path),
-#         "--pred", str(pred_path),
-#         "--etype", "exec",
-#         "--db", str(db_root),
-#         "--table", str(table_json),
-#     ]
-#     proc_exec = subprocess.run(cmd_exec, capture_output=True, text=True)
-#     exec_acc = _parse_spider_accuracy(proc_exec.stdout, "exec")
-#     print("==========================================")
-#     print(f"🎯 OFFICIAL SPIDER RESULTS FOR: {args.adapter}")
-#     print("==========================================")
-#     print(f"Exact Match Accuracy  : {exact_acc*100:.2f}%" if exact_acc else "EM parsing failed")
-#     print(f"Execution Accuracy    : {exec_acc*100:.2f}%" if exec_acc else "EX parsing failed")
-#     print("==========================================\n")
 # if __name__ == "__main__":
 #     main()
 # import json
 # import sqlite3
-# import re
-# import time
-# import sys
 # import argparse
 # from pathlib import Path
-# # ==========================================
-# # PATH SETUP
-# # ==========================================
-# PROJECT_ROOT = Path(__file__).resolve().parents[1]
-# if str(PROJECT_ROOT) not in sys.path:
-#     sys.path.insert(0, str(PROJECT_ROOT))
-# from src.text2sql_engine import get_engine
-# from src.sql_validator import validate_sql_schema
-# # ==========================================
-# # CONFIG
-# # ==========================================
-# DATA_PATH = PROJECT_ROOT / "data" / "dev.json"
-# DB_ROOT = PROJECT_ROOT / "data" / "database"
-# # ==========================================
-# # NORMALIZATION
-# # ==========================================
-# def normalize_sql(sql):
-#     if not isinstance(sql, str):
-#         return ""
-#     sql = sql.replace('"', "'")
-#     sql = re.sub(r"\s+", " ", sql)
-#     return sql.strip().lower().rstrip(";")
-# def normalize_result(res):
-#     try:
-#         return sorted([tuple(map(str, r)) for r in res])
-#     except:
-#         return []
-# # ==========================================
-# # EXECUTION
-# # ==========================================
-# def execute_sql(db_path, sql):
-#     try:
-#         conn = sqlite3.connect(db_path)
-#         start = time.time()
-#         def timeout():
-#             return 1 if (time.time() - start) > 2 else 0
-#         conn.set_progress_handler(timeout, 10000)
-#         cur = conn.cursor()
-#         cur.execute(sql)
-#         res = cur.fetchall()
-#         conn.close()
-#         return res
-#     except Exception:
-#         return None
-# # ==========================================
-# # EVALUATION
-# # ==========================================
-# def evaluate(engine, data, is_constrained=False, debug=False):
-#     attempted = 0
-#     total = 0
-#     exact_match = 0
-#     execution_match = 0
-#     constraint_ok = 0
-#     skipped_missing_db = 0
-#     skipped_exception = 0
-#     skipped_no_sql = 0
-#     total_time = 0
-#     for i, item in enumerate(data, 1):
-#         question = item.get("question", "")
-#         gold_sql = item.get("query", "")
-#         db_id = item.get("db_id", "")
-#         db_path = DB_ROOT / db_id / f"{db_id}.sqlite"
-#         if not db_path.exists():
-#             skipped_missing_db += 1
-#             continue
-#         try:
-#             start = time.time()
-#             result = engine.ask(question, db_id)
-#             total_time += (time.time() - start)
-#         except Exception:
-#             skipped_exception += 1
-#             continue
-#         if not isinstance(result, dict):
-#             continue
-#         pred_sql = result.get("sql", "")
-#         # DEBUG
-#         if debug:
-#             print(f"\nQ: {question}")
-#             print(f"PRED: {pred_sql}")
-#             print(f"GOLD: {gold_sql}")
-#         if not pred_sql:
-#             skipped_no_sql += 1
-#             continue
-#         attempted += 1
-#         total += 1
-#         # CONSTRAINT CHECK
-#         if is_constrained:
-#             try:
-#                 is_valid, _ = validate_sql_schema(pred_sql, str(db_path))
-#                 if is_valid:
-#                     constraint_ok += 1
-#             except:
-#                 pass
-#         # EXACT MATCH
-#         if normalize_sql(pred_sql) == normalize_sql(gold_sql):
-#             exact_match += 1
-#         # EXECUTION MATCH
-#         pred_res = execute_sql(str(db_path), pred_sql)
-#         gold_res = execute_sql(str(db_path), gold_sql)
-#         if pred_res is not None and gold_res is not None:
-#             if normalize_result(pred_res) == normalize_result(gold_res):
-#                 execution_match += 1
-#         # PROGRESS
-#         if i % 10 == 0:
-#             print(
-#                 f"[{i}/{len(data)}] "
-#                 f"EM: {exact_match/max(total,1):.3f} | "
-#                 f"EX: {execution_match/max(total,1):.3f} | "
-#                 f"Constraint: {(constraint_ok/max(total,1)) if is_constrained else 0:.3f}"
-#             )
-#     avg_latency = total_time / max(attempted, 1)
-#     return {
-#         "exact_match": exact_match / total if total > 0 else 0,
-#         "execution_accuracy": execution_match / total if total > 0 else 0,
-#         "constraint_rate": (constraint_ok / total if (is_constrained and total > 0) else 0),
-#         "avg_latency": avg_latency,
-#         "total": total,
-#         "attempted": attempted,
-#         "skipped_missing_db": skipped_missing_db,
-#         "skipped_exception": skipped_exception,
-#         "skipped_no_sql": skipped_no_sql,
-#     }
-# # ==========================================
-# # MAIN
-# # ==========================================
-# if __name__ == "__main__":
-#     ap = argparse.ArgumentParser()
-#     ap.add_argument("--num-samples", type=int, default=100)
-#     ap.add_argument("--adapter", type=str, default="checkpoints/best_rlhf_model")
-#     ap.add_argument("--debug", action="store_true")
-#     args = ap.parse_args()
-#     print(f"\n📥 Loading dataset from {DATA_PATH}...")
-#     with open(str(DATA_PATH)) as f:
-#         data = json.load(f)[: args.num_samples]
-#     # ==========================================
-#     # 🔴 BASE MODEL
-#     # ==========================================
-#     print("\n🚀 Running BASE MODEL...\n")
-#     engine_base = get_engine(
-#         adapter_path="checkpoints/sft_adapter_codet5"  ,  # 🔥 change this
-#         use_lora=True,
-#         use_constrained=False
-#     )
-#     res_base = evaluate(engine_base, data, is_constrained=False, debug=args.debug)
-#     # ==========================================
-#     # 🟡 RLHF (NO CONSTRAINT)
-#     # ==========================================
-#     print("\n🚀 Running RLHF (NO CONSTRAINT)...\n")
-#     engine_rlhf = get_engine(
-#        adapter_path="checkpoints/best_rlhf_model",
-#         use_lora=True,
-#         use_constrained=False
-#     )
-#     res_rlhf = evaluate(engine_rlhf, data, is_constrained=False, debug=args.debug)
-#     # ==========================================
-#     # 🟢 RLHF + CONSTRAINT
-#     # ==========================================
-#     print("\n🚀 Running RLHF + CONSTRAINED...\n")
-#     engine_const = get_engine(
-#         adapter_path="checkpoints/best_rlhf_model_2",
-#         use_lora=True,
-#         use_constrained=True
-#     )
-#     res_const = evaluate(engine_const, data, is_constrained=True, debug=args.debug)
-#     # ==========================================
-#     # FINAL RESULTS
-#     # ==========================================
-#     print("\n==========================================")
-#     print("🎯 FINAL RESULTS (3-WAY COMPARISON)")
-#     print("==========================================")
-#     print(f"Base Model       → EM: {res_base['exact_match']*100:.2f}% | "
-#           f"EX: {res_base['execution_accuracy']*100:.2f}%")
-#     print(f"RLHF             → EM: {res_rlhf['exact_match']*100:.2f}% | "
-#           f"EX: {res_rlhf['execution_accuracy']*100:.2f}%")
-#     print(f"RLHF + Constrain → EM: {res_const['exact_match']*100:.2f}% | "
-#           f"EX: {res_const['execution_accuracy']*100:.2f}% | "
-#           f"Constraint: {res_const['constraint_rate']*100:.2f}%")
-#     print("==========================================\n")
 import json
 import argparse
 import sqlite3
 import time
 import re
-import os
 from pathlib import Path
 import torch
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 from peft import PeftModel
-# Import handling
-try:
-    from prompting import encode_prompt
-    from src.sql_validator import validate_sql_schema
-except ImportError:
-    import sys
-    sys.path.append(str(Path(__file__).resolve().parents[1]))
-    from src.prompting import encode_prompt
-    from src.sql_validator import validate_sql_schema
-# =========================================================
-# ERROR LOGGING
-# =========================================================
-ERROR_LOG_FILE = "results/error_logs.json"
-def classify_error(sql, error_msg=""):
-    sql = sql.lower()
-    error_msg = str(error_msg).lower()
-    if "no such column" in error_msg:
-        return "wrong_column"
-    if "no such table" in error_msg:
-        return "wrong_table"
-    if "syntax error" in error_msg:
-        return "syntax_error"
-    if "ambiguous column" in error_msg:
-        return "ambiguous_column"
-    if "join" in sql and " on " not in sql:
-        return "missing_join"
-    return "other"
-def log_error(question, sql, error, error_type):
-    os.makedirs(os.path.dirname(ERROR_LOG_FILE), exist_ok=True)
-    entry = {
-        "question": question,
-        "sql": sql,
-        "error": str(error),
-        "error_type": error_type,
-        "timestamp": time.time()
-    }
-    logs = []
-    if os.path.exists(ERROR_LOG_FILE):
-        try:
-            with open(ERROR_LOG_FILE, "r") as f:
-                content = f.read().strip()
-                if content:
-                    logs = json.loads(content)
-        except:
-            logs = []
-    logs.append(entry)
-    with open(ERROR_LOG_FILE, "w") as f:
-        json.dump(logs, f, indent=2)
-# =========================================================
-# 🔥 FINAL FIX_SQL (BALANCED VERSION)
-# =========================================================
-def fix_sql(sql):
-    if not sql:
-        return "SELECT 1"
-    s = str(sql).strip()
-    # Extract SQL only
-    match = re.search(r"(?i)(select|with)[\s\S]*", s)
-    if match:
-        s = match.group(0)
-    s = s.split(";")[0].strip()
-    # NULL fixes
-    s = re.sub(r'(?i)=\s*null', 'IS NULL', s)
-    s = re.sub(r'(?i)!=\s*null', 'IS NOT NULL', s)
-    # Fix commas
-    s = re.sub(r',\s*,+', ',', s)
-    s = re.sub(r'(?i),\s*from', ' FROM', s)
-    # 🔥 LIGHT COLUMN SAFETY (main improvement)
-    if "select" in s.lower():
-        if len(re.findall(r'\w+\.\w+', s)) > 3:
-            s = re.sub(r'(?i)select\s+.*?\s+from', 'SELECT * FROM', s)
-    # 🔥 JOIN fix
-    if "join" in s.lower() and " on " not in s.lower():
-        s = re.sub(r'join\s+(\w+)', r'JOIN \1 ON 1=1', s, flags=re.I)
-    # Ensure valid SQL
-    if not s.lower().startswith(("select", "with")):
-        return "SELECT 1"
-    return s.strip()
-# =========================================================
-# NORMALIZATION
-# =========================================================
-def normalize_sql(sql):
-    if not sql:
-        return ""
-    return re.sub(r"\s+", " ", str(sql)).strip().lower()
-def normalize_result(res):
-    if not res:
-        return []
-    try:
-        normalized = [tuple(sorted(str(x) for x in row)) for row in res]
-        return sorted(normalized)
-    except:
-        return sorted([str(r) for r in res])
-# =========================================================
-# EXECUTION HELPERS
-# =========================================================
-def is_executable(sql, db_path):
-    try:
-        conn = sqlite3.connect(db_path)
-        cur = conn.cursor()
-        cur.execute(sql)
-        conn.close()
-        return True
-    except:
-        return False
-def check_execution(pred_sql, gold_sql, db_path, question):
     try:
         conn = sqlite3.connect(db_path)
         conn.text_factory = lambda b: b.decode(errors='ignore')
-        cur = conn.cursor()
-        cur.execute(gold_sql)
-        gold_res = cur.fetchall()
-        cur.execute(pred_sql)
-        pred_res = cur.fetchall()
         conn.close()
-        return normalize_result(pred_res) == normalize_result(gold_res)
-    except Exception as e:
-        error_type = classify_error(pred_sql, str(e))
-        log_error(question, pred_sql, str(e), error_type)
         return False
-# =========================================================
 # MAIN
-# =========================================================
 def main():
     parser = argparse.ArgumentParser()
-    parser.add_argument("--adapter", type=str, required=True)
-    parser.add_argument("--num_samples", type=int, default=700)
     args = parser.parse_args()
-    project_root = Path(__file__).resolve().parent
-    if project_root.name in ["scripts", "src"]:
-        project_root = project_root.parent
     db_root = project_root / "data" / "database"
     dev_json = project_root / "data" / "dev.json"
-    device = "mps" if torch.backends.mps.is_available() else "cpu"
-    print(f"Loading model on {device}...")
-    tokenizer = AutoTokenizer.from_pretrained("Salesforce/codet5-base")
-    base_model = AutoModelForSeq2SeqLM.from_pretrained("Salesforce/codet5-base").to(device)
-    model = PeftModel.from_pretrained(base_model, args.adapter).to(device)
     model = model.merge_and_unload()
     model.eval()
-    with open(dev_json, "r") as f:
-        dev_data = json.load(f)[:args.num_samples]
-    em_correct = 0
-    ex_correct = 0
-    constraint_ok = 0
-    print(f"\n🚀 Evaluating {len(dev_data)} samples...\n")
-    for i, ex in enumerate(dev_data, 1):
-        db_id = ex["db_id"]
-        question = ex["question"]
-        gold_query = ex["query"]
-        db_path = db_root / db_id / f"{db_id}.sqlite"
-        input_tensor = encode_prompt(tokenizer, question, db_id, device=device).unsqueeze(0)
-        with torch.no_grad():
-            outputs = model.generate(
-                input_ids=input_tensor,
-                max_new_tokens=128,
-                num_beams=8,
-                num_return_sequences=8
-            )
-        best_sql = ""
-        # 🔥 EXECUTION-GUIDED SELECTION
-        for out in outputs:
-            raw_pred = tokenizer.decode(out, skip_special_tokens=True)
-            candidate_sql = fix_sql(raw_pred)
-            if is_executable(candidate_sql, str(db_path)):
-                best_sql = candidate_sql
-                break
-        if not best_sql:
-            best_sql = fix_sql(tokenizer.decode(outputs[0], skip_special_tokens=True))
-        try:
-            is_valid, _ = validate_sql_schema(best_sql, str(db_path))
-        except:
-            is_valid = False
-        if is_valid:
-            constraint_ok += 1
-        if normalize_sql(best_sql) == normalize_sql(gold_query):
-            em_correct += 1
-        if check_execution(best_sql, gold_query, str(db_path), question):
-            ex_correct += 1
-        if i % 50 == 0:
-            print(f"{i}/{len(dev_data)} done")
-    print("\n========================================")
-    print("🎯 FINAL EVALUATION RESULTS")
-    print("========================================")
-    print(f"Exact Match (EM):      {(em_correct/len(dev_data))*100:.2f}%")
-    print(f"Execution Acc (EX):    {(ex_correct/len(dev_data))*100:.2f}%")
-    print(f"Constraint Rate:       {(constraint_ok/len(dev_data))*100:.2f}%")
-    print("========================================")
-    print(f"Errors logged to: {ERROR_LOG_FILE}")
 if __name__ == "__main__":
-    main()

 # import json
 # import sqlite3
+# import argparse
 # from pathlib import Path
 # import torch
 # from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 # from peft import PeftModel
+# # ---------------- PROMPT (IDENTICAL TO TRAINING) ----------------
+# def build_prompt(question, schema):
+#     return f"""
+# Database Schema:
+# {schema}
+# Translate English to SQL:
+# {question}
+# SQL:
+# """
+# # ---------------- LOAD SCHEMA ----------------
+# def load_schema(db_path):
+#     conn = sqlite3.connect(db_path)
+#     cursor = conn.cursor()
+#     tables = cursor.execute(
+#         "SELECT name FROM sqlite_master WHERE type='table';"
+#     ).fetchall()
+#     schema = ""
+#     for (table,) in tables:
+#         cols = cursor.execute(f"PRAGMA table_info({table});").fetchall()
+#         col_names = [c[1] for c in cols]
+#         schema += f"{table}({', '.join(col_names)})\n"
+#     conn.close()
+#     return schema
+# # ---------------- EXECUTION CHECK ----------------
+# def execution_match(pred_sql, gold_sql, db_path):
+#     try:
+#         conn = sqlite3.connect(db_path)
+#         cur = conn.cursor()
+#         cur.execute(pred_sql)
+#         pred = cur.fetchall()
+#         cur.execute(gold_sql)
+#         gold = cur.fetchall()
 #         conn.close()
+#         return pred == gold
 #     except Exception:
 #         return False
+# # ---------------- MAIN ----------------
 # def main():
 #     parser = argparse.ArgumentParser()
 #     parser.add_argument("--adapter", type=str, required=True)
+#     parser.add_argument("--num_samples", type=int, default=1034)
 #     args = parser.parse_args()
 #     project_root = Path(__file__).resolve().parents[1]
 #     dev_json = project_root / "data" / "dev.json"
+#     db_root = project_root / "data" / "database"
+#     device = "mps" if torch.backends.mps.is_available() else "cpu"
+#     # load model
+#     base_model = "Salesforce/codet5-base"
+#     tokenizer = AutoTokenizer.from_pretrained(args.adapter)
+#     base = AutoModelForSeq2SeqLM.from_pretrained(base_model).to(device)
+#     model = PeftModel.from_pretrained(base, args.adapter).to(device)
+#     model = model.merge_and_unload()
+#     with open(dev_json) as f:
+#         dev = json.load(f)[: args.num_samples]
+#     correct = 0
+#     print(f"Evaluating {len(dev)} examples...\n")
+#     for i, ex in enumerate(dev, 1):
+#         question = ex["question"]
+#         db_id = ex["db_id"]
+#         gold_sql = ex["query"]
+#         db_path = db_root / db_id / f"{db_id}.sqlite"
+#         schema = load_schema(db_path)
+#         prompt = build_prompt(question, schema)
+#         inputs = tokenizer(prompt, return_tensors="pt").to(device)
+#         with torch.no_grad():
 #             outputs = model.generate(
+#                 **inputs,
+#                 max_new_tokens=80,
+#                 do_sample=False,
+#                 num_beams=4,
 #             )
+#         pred_sql = tokenizer.decode(outputs[0], skip_special_tokens=True)
+#         if "SQL:" in pred_sql:
+#             pred_sql = pred_sql.split("SQL:")[-1].strip()
+#         match = execution_match(pred_sql, gold_sql, db_path)
+#         if match:
+#             correct += 1
+#         if i % 10 == 0:
+#             print(f"{i}/{len(dev)} | Acc: {correct/i:.3f}")
+#     print("\n=============================")
+#     print(f"FINAL EXECUTION ACCURACY: {correct/len(dev)*100:.2f}%")
+#     print("=============================")
 # if __name__ == "__main__":
 #     main()
 # import json
 # import sqlite3
 # import argparse
+# import time
 # from pathlib import Path
+# import torch
+# from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+# from peft import PeftModel
+# # ---------------- PROMPT (IDENTICAL TO TRAINING) ----------------
+# def build_prompt(question, schema):
+#     return f"""
+# Database Schema:
+# {schema}
+# Translate English to SQL:
+# {question}
+# SQL:
+# """
+# # ---------------- LOAD SCHEMA ----------------
+# def load_schema(db_path):
+#     conn = sqlite3.connect(db_path)
+#     cursor = conn.cursor()
+#     tables = cursor.execute(
+#         "SELECT name FROM sqlite_master WHERE type='table';"
+#     ).fetchall()
+#     schema = ""
+#     for (table,) in tables:
+#         cols = cursor.execute(f"PRAGMA table_info({table});").fetchall()
+#         col_names = [c[1] for c in cols]
+#         schema += f"{table}({', '.join(col_names)})\n"
+#     conn.close()
+#     return schema
+# # ---------------- EXECUTION CHECK WITH TIMEOUT ----------------
+# def execution_match(pred_sql, gold_sql, db_path):
+#     try:
+#         conn = sqlite3.connect(db_path)
+#         # --- 5-SECOND TIMEOUT SO EVALUATION DOESN'T FREEZE ---
+#         start_time = time.monotonic()
+#         def timeout_handler():
+#             return 1 if (time.monotonic() - start_time) > 5.0 else 0
+#         conn.set_progress_handler(timeout_handler, 10000)
+#         cur = conn.cursor()
+#         cur.execute(pred_sql)
+#         pred = cur.fetchall()
+#         cur.execute(gold_sql)
+#         gold = cur.fetchall()
+#         conn.close()
+#         return pred == gold
+#     except Exception:
+#         return False
+# # ---------------- MAIN ----------------
+# def main():
+#     parser = argparse.ArgumentParser()
+#     parser.add_argument("--adapter", type=str, required=True)
+#     parser.add_argument("--num_samples", type=int, default=1034)
+#     args = parser.parse_args()
+#     project_root = Path(__file__).resolve().parents[1]
+#     dev_json = project_root / "data" / "dev.json"
+#     db_root = project_root / "data" / "database"
+#     # 🎯 Added CUDA support for Nvidia GPUs
+#     device = "mps" if torch.backends.mps.is_available() else ("cuda" if torch.cuda.is_available() else "cpu")
+#     # load model
+#     base_model = "Salesforce/codet5-base"
+#     print(f"Loading Base: {base_model}")
+#     print(f"Loading Adapter: {args.adapter}")
+#     tokenizer = AutoTokenizer.from_pretrained(args.adapter)
+#     base = AutoModelForSeq2SeqLM.from_pretrained(base_model).to(device)
+#     model = PeftModel.from_pretrained(base, args.adapter).to(device)
+#     model = model.merge_and_unload()
+#     with open(dev_json) as f:
+#         dev = json.load(f)[: args.num_samples]
+#     correct = 0
+#     print(f"Evaluating {len(dev)} examples...\n")
+#     for i, ex in enumerate(dev, 1):
+#         question = ex["question"]
+#         db_id = ex["db_id"]
+#         gold_sql = ex["query"]
+#         db_path = db_root / db_id / f"{db_id}.sqlite"
+#         schema = load_schema(db_path)
+#         prompt = build_prompt(question, schema)
+#         inputs = tokenizer(prompt, return_tensors="pt").to(device)
+#         with torch.no_grad():
+#             outputs = model.generate(
+#                 **inputs,
+#                 max_new_tokens=80,
+#                 do_sample=False,
+#                 num_beams=4,
+#             )
+#         pred_sql = tokenizer.decode(outputs[0], skip_special_tokens=True)
+#         if "SQL:" in pred_sql:
+#             pred_sql = pred_sql.split("SQL:")[-1].strip()
+#         match = execution_match(pred_sql, gold_sql, db_path)
+#         if match:
+#             correct += 1
+#         if i % 10 == 0:
+#             print(f"{i}/{len(dev)} | Acc: {correct/i:.3f}")
+#     print("\n=============================")
+#     print(f"FINAL EXECUTION ACCURACY: {correct/len(dev)*100:.2f}%")
+#     print("=============================")
+# if __name__ == "__main__":
+#     main()
 import json
+import subprocess
+import sys
 import argparse
+import random
 import sqlite3
 import time
 import re
 from pathlib import Path
 import torch
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 from peft import PeftModel
+# Assuming you have a prompting.py that has encode_prompt
+from prompting import encode_prompt
+# -------------------------------
+# LIVE CHECK HELPERS
+# -------------------------------
+def normalize_sql(sql):
+    """Basic normalization for the live progress bar."""
+    sql = sql.replace('"', "'")
+    sql = re.sub(r"\s+", " ", sql)
+    return sql.strip().lower().rstrip(";")
+def check_execution(pred_sql, gold_sql, db_path):
+    """Basic execution check for the live progress bar."""
     try:
         conn = sqlite3.connect(db_path)
         conn.text_factory = lambda b: b.decode(errors='ignore')
+        # 2-second timeout so the live tracker doesn't freeze forever
+        start_time = time.monotonic()
+        def timeout_handler():
+            return 1 if (time.monotonic() - start_time) > 2.0 else 0
+        conn.set_progress_handler(timeout_handler, 10000)
+        cursor = conn.cursor()
+        cursor.execute(pred_sql)
+        pred_res = cursor.fetchall()
+        cursor.execute(gold_sql)
+        gold_res = cursor.fetchall()
         conn.close()
+        # Simple sorted check for the live tracker
+        return sorted(pred_res) == sorted(gold_res)
+    except Exception:
         return False
+# -------------------------------
+# SPIDER PARSER
+# -------------------------------
+def _parse_spider_accuracy(stdout: str, metric_type: str) -> float | None:
+    for line in stdout.splitlines():
+        if metric_type == "exec" and line.strip().startswith("execution"):
+            try: return float(line.split()[-1])
+            except: pass
+        elif metric_type == "match" and line.strip().startswith("exact"):
+            try: return float(line.split()[-1])
+            except: pass
+    return None
+# -------------------------------
 # MAIN
+# -------------------------------
 def main():
     parser = argparse.ArgumentParser()
+    parser.add_argument("--adapter", type=str, required=True, help="Path to your SFT or RLHF checkpoint")
+    parser.add_argument("--num_samples", type=int, default=700, help="Number of samples to evaluate")
+    parser.add_argument("--shuffle_dev", action="store_true")
+    parser.add_argument("--shuffle_seed", type=int, default=42)
     args = parser.parse_args()
+    project_root = Path(__file__).resolve().parents[1]
+    adapter_dir = project_root / args.adapter
     db_root = project_root / "data" / "database"
+    table_json = project_root / "data" / "tables.json"
     dev_json = project_root / "data" / "dev.json"
+    pred_path = project_root / "temp_predictions.txt"
+    temp_gold_path = project_root / "temp_gold.sql"
+    if not adapter_dir.exists():
+        raise FileNotFoundError(f"Missing adapter dir: {adapter_dir}")
+    device = "mps" if torch.backends.mps.is_available() else ("cuda" if torch.cuda.is_available() else "cpu")
+    print(f"Using device: {device}")
+    BASE_MODEL = "Salesforce/codet5-base"
+    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
+    print(f"Loading Model: {args.adapter}...")
+    base = AutoModelForSeq2SeqLM.from_pretrained(BASE_MODEL).to(device)
+    model = PeftModel.from_pretrained(base, str(adapter_dir)).to(device)
     model = model.merge_and_unload()
     model.eval()
+    with dev_json.open() as f:
+        dev = json.load(f)
+    if args.shuffle_dev:
+        rng = random.Random(args.shuffle_seed)
+        rng.shuffle(dev)
+    dev = dev[: args.num_samples]
+    total = len(dev)
+    gen_kwargs = dict(
+        max_new_tokens=160,
+        num_beams=4,
+        do_sample=False,
+        early_stopping=True,
+        pad_token_id=tokenizer.pad_token_id,
+        eos_token_id=tokenizer.eos_token_id,
+    )
+    print(f"\n🚀 Generating and live-tracking {total} samples...\n")
+    em_correct = 0
+    ex_correct = 0
+    with pred_path.open("w") as out_pred, temp_gold_path.open("w") as out_gold, torch.no_grad():
+        for i, ex in enumerate(dev, start=1):
+            db_id = ex["db_id"]
+            question = ex["question"]
+            gold_query = ex["query"]
+            db_path = db_root / db_id / f"{db_id}.sqlite"
+            # Generate
+            input_ids = encode_prompt(tokenizer, question, db_id, device=device, max_input_tokens=512)
+            input_ids = input_ids.unsqueeze(0).to(device)
+            attention_mask = (input_ids != tokenizer.pad_token_id).long().to(device)
+            outputs = model.generate(input_ids=input_ids, attention_mask=attention_mask, **gen_kwargs)
+            pred_sql = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
+            # Write to files for official spider eval later
+            out_pred.write(f"{pred_sql}\n")
+            out_gold.write(f"{gold_query}\t{db_id}\n")
+            # --- LIVE TRACKING CHECKS ---
+            if normalize_sql(pred_sql) == normalize_sql(gold_query):
+                em_correct += 1
+            if check_execution(pred_sql, gold_query, db_path):
+                ex_correct += 1
+            # Print progress every 50 loops
+            if i % 10 == 0 or i == total:
+                print(f"Progress: {i}/{total} | Current EM: {(em_correct/i)*100:.2f}% | Current EX: {(ex_correct/i)*100:.2f}%")
+    print("\nGeneration finished. Running Official Spider Evaluations for final numbers...\n")
+    eval_script = project_root / "spider_eval" / "evaluation.py"
+    # 1. RUN EXACT MATCH EVAL
+    cmd_match = [
+        sys.executable, str(eval_script),
+        "--gold", str(temp_gold_path),
+        "--pred", str(pred_path),
+        "--etype", "match",
+        "--db", str(db_root),
+        "--table", str(table_json),
+    ]
+    proc_match = subprocess.run(cmd_match, capture_output=True, text=True)
+    exact_acc = _parse_spider_accuracy(proc_match.stdout, "match")
+    # 2. RUN EXECUTION EVAL
+    cmd_exec = [
+        sys.executable, str(eval_script),
+        "--gold", str(temp_gold_path),
+        "--pred", str(pred_path),
+        "--etype", "exec",
+        "--db", str(db_root),
+        "--table", str(table_json),
+    ]
+    proc_exec = subprocess.run(cmd_exec, capture_output=True, text=True)
+    exec_acc = _parse_spider_accuracy(proc_exec.stdout, "exec")
+    print("==========================================")
+    print(f"🎯 OFFICIAL SPIDER RESULTS FOR: {args.adapter}")
+    print("==========================================")
+    if exact_acc is not None:
+        print(f"Exact Set Match Accuracy  : {exact_acc*100:.2f}%")
+    else:
+        print("Exact Set Match Accuracy  : Could not parse output")
+    if exec_acc is not None:
+        print(f"Execution Accuracy        : {exec_acc*100:.2f}%")
+    else:
+        print("Execution Accuracy        : Could not parse output")
+    print("==========================================\n")
 if __name__ == "__main__":
+    main()

src/evaluate_without_constraied.py DELETED Viewed

@@ -1,503 +0,0 @@
-# *********** code till task 3 ************
-# import json
-# import subprocess
-# import sys
-# import argparse
-# import random
-# import sqlite3
-# import time
-# import re
-# import os
-# from pathlib import Path
-# import torch
-# from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
-# from peft import PeftModel
-# from prompting import encode_prompt
-# # -------------------------------
-# # NORMALIZATION
-# # -------------------------------
-# def normalize_sql(sql):
-#     sql = sql.replace('"', "'")
-#     sql = re.sub(r"\s+", " ", sql)
-#     return sql.strip().lower().rstrip(";")
-# # -------------------------------
-# # 🔥 SAFE RESULT NORMALIZATION (FIX)
-# # -------------------------------
-# def normalize_result(res):
-#     try:
-#         return sorted([str(r) for r in res])
-#     except:
-#         return []
-# # -------------------------------
-# # EXECUTION CHECK (FIXED)
-# # -------------------------------
-# def check_execution(pred_sql, gold_sql, db_path):
-#     try:
-#         conn = sqlite3.connect(db_path)
-#         conn.text_factory = lambda b: b.decode(errors='ignore')
-#         start_time = time.monotonic()
-#         def timeout_handler():
-#             return 1 if (time.monotonic() - start_time) > 2.0 else 0
-#         conn.set_progress_handler(timeout_handler, 10000)
-#         cursor = conn.cursor()
-#         cursor.execute(pred_sql)
-#         pred_res = cursor.fetchall()
-#         cursor.execute(gold_sql)
-#         gold_res = cursor.fetchall()
-#         conn.close()
-#         # 🔥 FIXED COMPARISON
-#         return normalize_result(pred_res) == normalize_result(gold_res)
-#     except Exception:
-#         return False
-# # -------------------------------
-# # SPIDER PARSER
-# # -------------------------------
-# def _parse_spider_accuracy(stdout: str, metric_type: str):
-#     for line in stdout.splitlines():
-#         if metric_type == "exec" and line.strip().startswith("execution"):
-#             try:
-#                 return float(line.split()[-1])
-#             except:
-#                 pass
-#         elif metric_type == "match" and line.strip().startswith("exact"):
-#             try:
-#                 return float(line.split()[-1])
-#             except:
-#                 pass
-#     return None
-# # -------------------------------
-# # MAIN
-# # -------------------------------
-# def main():
-#     parser = argparse.ArgumentParser()
-#     parser.add_argument("--adapter", type=str, required=True)
-#     parser.add_argument("--num_samples", type=int, default= 500)
-#     parser.add_argument("--shuffle_dev", action="store_true")
-#     parser.add_argument("--shuffle_seed", type=int, default=42)
-#     args = parser.parse_args()
-#     project_root = Path(__file__).resolve().parents[1]
-#     adapter_dir = project_root / args.adapter
-#     db_root = project_root / "data" / "database"
-#     table_json = project_root / "data" / "tables.json"
-#     dev_json = project_root / "data" / "dev.json"
-#     pred_path = project_root / "temp_predictions.txt"
-#     temp_gold_path = project_root / "temp_gold.sql"
-#     if not adapter_dir.exists():
-#         raise FileNotFoundError(f"Missing adapter dir: {adapter_dir}")
-#     device = "mps" if torch.backends.mps.is_available() else (
-#         "cuda" if torch.cuda.is_available() else "cpu"
-#     )
-#     print(f"Using device: {device}")
-#     BASE_MODEL = "Salesforce/codet5-base"
-#     tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
-#     if tokenizer.pad_token is None:
-#         tokenizer.pad_token = tokenizer.eos_token
-#     print(f"\n📦 Loading Model: {args.adapter}")
-#     base = AutoModelForSeq2SeqLM.from_pretrained(BASE_MODEL).to(device)
-#     adapter_for_peft = os.path.relpath(adapter_dir, project_root)
-#     model = PeftModel.from_pretrained(
-#         base,
-#         adapter_for_peft,
-#         local_files_only=True
-#     ).to(device)
-#     model = model.merge_and_unload()
-#     model.eval()
-#     # -------------------------------
-#     # LOAD DATA
-#     # -------------------------------
-#     with dev_json.open() as f:
-#         dev = json.load(f)
-#     if args.shuffle_dev:
-#         rng = random.Random(args.shuffle_seed)
-#         rng.shuffle(dev)
-#     dev = dev[: args.num_samples]
-#     total = len(dev)
-#     gen_kwargs = dict(
-#         max_new_tokens=160,
-#         num_beams=8,
-#         length_penalty=0.8,
-#         do_sample=False,
-#         early_stopping=True,
-#         pad_token_id=tokenizer.pad_token_id,
-#         eos_token_id=tokenizer.eos_token_id,
-#     )
-#     print(f"\n🚀 Evaluating {total} samples...\n")
-#     em_correct = 0
-#     ex_correct = 0
-#     with pred_path.open("w") as out_pred, temp_gold_path.open("w") as out_gold, torch.no_grad():
-#         for i, ex in enumerate(dev, start=1):
-#             db_id = ex["db_id"]
-#             question = ex["question"]
-#             gold_query = ex["query"]
-#             db_path = db_root / db_id / f"{db_id}.sqlite"
-#             # -------------------------------
-#             # GENERATE SQL
-#             # -------------------------------
-#             input_ids = encode_prompt(
-#                 tokenizer,
-#                 question,
-#                 db_id,
-#                 device=device,
-#                 max_input_tokens=512
-#             )
-#             input_ids = input_ids.unsqueeze(0).to(device)
-#             attention_mask = (input_ids != tokenizer.pad_token_id).long().to(device)
-#             outputs = model.generate(
-#                 input_ids=input_ids,
-#                 attention_mask=attention_mask,
-#                 **gen_kwargs
-#             )
-#             pred_sql = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
-#             # -------------------------------
-#             # SAVE FOR SPIDER EVAL
-#             # -------------------------------
-#             out_pred.write(f"{pred_sql}\n")
-#             out_gold.write(f"{gold_query}\t{db_id}\n")
-#             # -------------------------------
-#             # LIVE METRICS
-#             # -------------------------------
-#             if normalize_sql(pred_sql) == normalize_sql(gold_query):
-#                 em_correct += 1
-#             if check_execution(pred_sql, gold_query, db_path):
-#                 ex_correct += 1
-#             if i % 20 == 0 or i == total:
-#                 print(
-#                     f"Progress: {i}/{total} | "
-#                     f"EM: {(em_correct/i)*100:.2f}% | "
-#                     f"EX: {(ex_correct/i)*100:.2f}%"
-#                 )
-#     print("\n🚀 Running Official Spider Evaluation...\n")
-#     eval_script = project_root / "spider_eval" / "evaluation.py"
-#     # EXACT MATCH
-#     cmd_match = [
-#         sys.executable, str(eval_script),
-#         "--gold", str(temp_gold_path),
-#         "--pred", str(pred_path),
-#         "--etype", "match",
-#         "--db", str(db_root),
-#         "--table", str(table_json),
-#     ]
-#     proc_match = subprocess.run(cmd_match, capture_output=True, text=True)
-#     exact_acc = _parse_spider_accuracy(proc_match.stdout, "match")
-#     # EXECUTION
-#     cmd_exec = [
-#         sys.executable, str(eval_script),
-#         "--gold", str(temp_gold_path),
-#         "--pred", str(pred_path),
-#         "--etype", "exec",
-#         "--db", str(db_root),
-#         "--table", str(table_json),
-#     ]
-#     proc_exec = subprocess.run(cmd_exec, capture_output=True, text=True)
-#     exec_acc = _parse_spider_accuracy(proc_exec.stdout, "exec")
-#     print("==========================================")
-#     print(f"🎯 OFFICIAL SPIDER RESULTS FOR: {args.adapter}")
-#     print("==========================================")
-#     print(f"Exact Match Accuracy  : {exact_acc*100:.2f}%" if exact_acc else "EM parsing failed")
-#     print(f"Execution Accuracy    : {exec_acc*100:.2f}%" if exec_acc else "EX parsing failed")
-#     print("==========================================\n")
-# if __name__ == "__main__":
-#     main()
-# *********** for task 2 ****************************************
-import json
-import argparse
-import random
-import sqlite3
-import re
-import os
-from pathlib import Path
-from collections import defaultdict
-import torch
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
-from peft import PeftModel
-from prompting import encode_prompt
-# -------------------------------
-# NORMALIZATION
-# -------------------------------
-def normalize_sql(sql):
-    sql = sql.replace('"', "'")
-    sql = re.sub(r"\s+", " ", sql)
-    return sql.strip().lower().rstrip(";")
-def normalize_result(res):
-    try:
-        return sorted([str(r) for r in res])
-    except:
-        return []
-# -------------------------------
-# STEP 1: EXECUTION
-# -------------------------------
-def execute_with_error(sql, db_path):
-    try:
-        conn = sqlite3.connect(db_path)
-        cur = conn.cursor()
-        cur.execute(sql)
-        res = cur.fetchall()
-        conn.close()
-        return res, None
-    except Exception as e:
-        return None, str(e)
-# -------------------------------
-# STEP 2: ERROR CLASSIFICATION
-# -------------------------------
-def classify_error(sql, error_msg):
-    if error_msg is None:
-        return "correct"
-    err = error_msg.lower()
-    sql_l = sql.lower()
-    if "syntax" in err:
-        return "syntax_error"
-    if "no such table" in err:
-        return "wrong_table"
-    if "no such column" in err:
-        return "wrong_column"
-    if "ambiguous" in err:
-        return "missing_join"
-    if "datatype mismatch" in err:
-        return "type_error"
-    if "where" not in sql_l and any(x in sql_l for x in ["=", ">", "<"]):
-        return "missing_where"
-    return "other"
-# -------------------------------
-# STEP 4: HINTS
-# -------------------------------
-def generate_hint(error_type):
-    hints = {
-        "missing_join": "Try using JOIN between related tables.",
-        "wrong_column": "Check column names in schema.",
-        "missing_where": "Add WHERE condition.",
-        "syntax_error": "Fix SQL syntax.",
-        "wrong_table": "Verify table names.",
-        "type_error": "Check data types.",
-        "other": "Review SQL logic."
-    }
-    return hints.get(error_type, "")
-# -------------------------------
-# STEP 2 EXTRA: LIGHT ATTRIBUTION
-# -------------------------------
-def extract_keywords(question):
-    return [w for w in re.findall(r"\w+", question.lower()) if len(w) > 3]
-# -------------------------------
-# MAIN
-# -------------------------------
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--adapter", type=str, required=True)
-    parser.add_argument("--num_samples", type=int, default=200)
-    args = parser.parse_args()
-    project_root = Path(__file__).resolve().parents[1]
-    db_root = project_root / "data" / "database"
-    dev_json = project_root / "data" / "dev.json"
-    device = "mps" if torch.backends.mps.is_available() else "cpu"
-    tokenizer = AutoTokenizer.from_pretrained("Salesforce/codet5-base")
-    base = AutoModelForSeq2SeqLM.from_pretrained("Salesforce/codet5-base").to(device)
-    model = PeftModel.from_pretrained(
-        base,
-        os.path.relpath(project_root / args.adapter, project_root),
-        local_files_only=True
-    ).to(device)
-    model = model.merge_and_unload()
-    model.eval()
-    with open(dev_json) as f:
-        dev = json.load(f)
-    dev = dev[:args.num_samples]
-    # STORAGE
-    error_counter = defaultdict(int)
-    error_examples = defaultdict(list)
-    success_examples = []
-    hint_examples = defaultdict(list)
-    operation_counter = defaultdict(int)
-    attribution_map = defaultdict(list)
-    em, ex = 0, 0
-    print(f"\n🚀 Evaluating {len(dev)} samples...\n")
-    for i, sample in enumerate(dev, 1):
-        db_id = sample["db_id"]
-        q = sample["question"]
-        gold = sample["query"]
-        db_path = db_root / db_id / f"{db_id}.sqlite"
-        input_ids = encode_prompt(tokenizer, q, db_id, device=device).unsqueeze(0)
-        out = model.generate(input_ids=input_ids, max_new_tokens=120, num_beams=8)
-        pred = tokenizer.decode(out[0], skip_special_tokens=True).strip()
-        # operation analysis
-        s = pred.lower()
-        if "select" in s: operation_counter["SELECT"] += 1
-        if "where" in s: operation_counter["WHERE"] += 1
-        if "join" in s: operation_counter["JOIN"] += 1
-        if "group by" in s: operation_counter["GROUP_BY"] += 1
-        if "order by" in s: operation_counter["ORDER_BY"] += 1
-        pred_res, err = execute_with_error(pred, db_path)
-        gold_res, _ = execute_with_error(gold, db_path)
-        error_type = classify_error(pred, err)
-        error_counter[error_type] += 1
-        # attribution
-        if err:
-            attribution_map[error_type].append(extract_keywords(q))
-        # examples
-        if len(error_examples[error_type]) < 3:
-            error_examples[error_type].append(pred)
-        # hints
-        if error_type != "correct":
-            hint = generate_hint(error_type)
-            if len(hint_examples[error_type]) < 3:
-                hint_examples[error_type].append((pred, hint))
-        # metrics
-        if normalize_sql(pred) == normalize_sql(gold):
-            em += 1
-        if pred_res and gold_res and normalize_result(pred_res) == normalize_result(gold_res):
-            ex += 1
-            if len(success_examples) < 5:
-                success_examples.append(pred)
-        if i % 20 == 0:
-            print(f"[{i}] EM: {em/i:.2f} | EX: {ex/i:.2f}")
-    # -------------------------------
-    # OUTPUT
-    # -------------------------------
-    print("\n🎯 FINAL RESULTS")
-    print(f"EM: {em/len(dev)*100:.2f}%")
-    print(f"EX: {ex/len(dev)*100:.2f}%")
-    print("\n🔥 ERROR SUMMARY")
-    for k, v in error_counter.items():
-        print(k, ":", v)
-    print("\n🔥 ERROR EXAMPLES")
-    for k in error_examples:
-        print("\n", k)
-        for e in error_examples[k]:
-            print("  ", e)
-    print("\n🔥 HINTS")
-    for k in hint_examples:
-        print("\n", k)
-        for sql, h in hint_examples[k]:
-            print("  ", sql)
-            print("  →", h)
-    print("\n🔥 ATTRIBUTION (KEYWORDS)")
-    for k in attribution_map:
-        print(k, ":", attribution_map[k][:3])
-    print("\n🔥 SQL OPERATIONS")
-    for k, v in operation_counter.items():
-        print(k, ":", v)
-    # -------------------------------
-    # ADVERSARIAL
-    # -------------------------------
-    print("\n🔥 ADVERSARIAL TESTS")
-    adv = [
-        "Find most expensive product",
-        "Top 3 students by marks",
-        "Average salary per department"
-    ]
-    for q in adv:
-        inp = encode_prompt(tokenizer, q, dev[0]["db_id"], device=device).unsqueeze(0)
-        out = model.generate(input_ids=inp, max_new_tokens=120)
-        print("\nQ:", q)
-        print("SQL:", tokenizer.decode(out[0], skip_special_tokens=True))
-if __name__ == "__main__":
-    main()

src/execution_reward copy.py DELETED Viewed

@@ -1,831 +0,0 @@
-# from __future__ import annotations
-# import hashlib
-# import os
-# import queue
-# import re
-# import sqlite3
-# import threading
-# import time
-# from concurrent.futures import ThreadPoolExecutor, as_completed
-# from dataclasses import dataclass
-# from typing import Dict, List, Optional, Sequence, Set, Tuple, Union
-# # --- CACHE CONTROL ---
-# USE_CACHE = True
-# _REWARD_CACHE: Dict[str, float] = {}
-# def set_use_cache(enabled: bool):
-#     """Dynamically toggle the reward cache for benchmarks."""
-#     global USE_CACHE
-#     USE_CACHE = enabled
-# def _normalize_sql(sql: str) -> str:
-#     if not isinstance(sql, str):
-#         return ""
-#     s = sql.strip()
-#     if s.startswith("```"):
-#         s = re.sub(r"^```[a-zA-Z0-9_+-]*\n?", "", s).strip()
-#         s = re.sub(r"\n?```$", "", s).strip()
-#     if s.lower().startswith("sql:"):
-#         s = s[4:].strip()
-#     if ";" in s:
-#         s = s.split(";", 1)[0].strip()
-#     return s
-# def _connect_readonly(db_path: str) -> sqlite3.Connection:
-#     uri = f"file:{os.path.abspath(db_path)}?mode=ro"
-#     conn = sqlite3.connect(uri, uri=True, check_same_thread=False)
-#     conn.execute("PRAGMA query_only = ON;")
-#     conn.execute("PRAGMA foreign_keys = ON;")
-#     return conn
-# DEFAULT_QUERY_TIMEOUT_S = 2.0
-# def _with_timeout(conn: sqlite3.Connection, timeout_s: float = DEFAULT_QUERY_TIMEOUT_S) -> None:
-#     start = time.monotonic()
-#     def _handler() -> int:
-#         return 1 if (time.monotonic() - start) > timeout_s else 0
-#     conn.set_progress_handler(_handler, 10_000)
-# def _list_tables(conn: sqlite3.Connection) -> List[str]:
-#     try:
-#         cur = conn.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';")
-#         return [r[0] for r in cur.fetchall() if r and isinstance(r[0], str)]
-#     except sqlite3.Error:
-#         return []
-# def _contains_table_name(sql: str, table_names: Sequence[str]) -> bool:
-#     s = sql.lower()
-#     for t in table_names:
-#         tl = t.lower()
-#         if not tl:
-#             continue
-#         if re.search(rf"\b{re.escape(tl)}\b", s):
-#             return True
-#     return False
-# def _explain_query_plan(conn: sqlite3.Connection, sql: str) -> bool:
-#     try:
-#         _with_timeout(conn, timeout_s=DEFAULT_QUERY_TIMEOUT_S)
-#         conn.execute(f"EXPLAIN QUERY PLAN {sql}")
-#         return True
-#     except sqlite3.Error:
-#         return False
-# def _execute(conn: sqlite3.Connection, sql: str, max_rows: int = 1000) -> Tuple[bool, List[Tuple], Optional[str]]:
-#     try:
-#         _with_timeout(conn, timeout_s=DEFAULT_QUERY_TIMEOUT_S)
-#         cur = conn.execute(sql)
-#         rows = cur.fetchmany(max_rows)
-#         norm_rows = [tuple(r) for r in rows]
-#         return True, norm_rows, None
-#     except sqlite3.Error as e:
-#         return False, [], str(e)
-# _SQL_KEYWORDS_TO_IGNORE = {
-#     "select", "from", "where", "join", "inner", "left", "right", "full", "outer",
-#     "on", "group", "by", "order", "limit", "having", "distinct", "union", "intersect",
-#     "except", "as", "and", "or", "not", "in", "is", "null", "like", "between", "case",
-#     "when", "then", "else", "end", "asc", "desc"
-# }
-# _SQL_FUNCTIONS_TO_IGNORE = {
-#     "count", "avg", "min", "max", "sum", "lower", "upper", "substr", "coalesce",
-#     "round", "date", "datetime", "strftime"
-# }
-# # --- LIGHTWEIGHT PARSING ---
-# def is_valid_select(sql: str):
-#     sql = sql.strip().lower()
-#     return sql.startswith("select") or sql.startswith("with")
-# def extract_tables(sql: str) -> List[str]:
-#     sql = sql.lower()
-#     if "join" not in sql:
-#         tables = re.findall(r'from\s+(\w+)', sql)
-#         return list(set(tables))
-#     tables = re.findall(r'from\s+([a-zA-Z_][a-zA-Z0-9_]*)', sql)
-#     joins = re.findall(r'join\s+([a-zA-Z_][a-zA-Z0-9_]*)', sql)
-#     return list(set(tables + joins))
-# def extract_columns(sql: str) -> List[str]:
-#     sql = sql.lower()
-#     match = re.search(r'select\s+(.*?)\s+from', sql)
-#     if not match:
-#         return []
-#     cols = match.group(1)
-#     if cols.strip() == "*":
-#         return ["*"]
-#     return [c.strip() for c in cols.split(",")]
-# def _get_db_tables_and_columns(conn: sqlite3.Connection) -> Tuple[Set[str], Set[str]]:
-#     tables = set()
-#     columns = set()
-#     for t in _list_tables(conn):
-#         tl = t.lower()
-#         if not tl:
-#             continue
-#         tables.add(tl)
-#         try:
-#             cur = conn.execute(f'PRAGMA table_info("{t}")')
-#             for row in cur.fetchall():
-#                 if row and isinstance(row[1], str):
-#                     columns.add(row[1].lower())
-#         except sqlite3.Error:
-#             continue
-#     return tables, columns
-# def _safe_results_equal(a: List[Tuple], b: List[Tuple]) -> bool:
-#     return a == b
-# @dataclass
-# class RewardDebugStats:
-#     total: int = 0
-#     parsed_ok: int = 0
-#     table_match: int = 0
-#     column_match: int = 0
-#     executed_ok: int = 0
-#     exact_match: int = 0
-# _DEBUG = RewardDebugStats()
-# def reset_debug_metrics() -> None:
-#     global _DEBUG
-#     _DEBUG = RewardDebugStats()
-# def get_debug_metrics() -> dict:
-#     denom = max(_DEBUG.total, 1)
-#     return {
-#         "valid_sql_rate": _DEBUG.parsed_ok / denom,
-#         "table_match_rate": _DEBUG.table_match / denom,
-#         "column_match_rate": _DEBUG.column_match / denom,
-#         "execution_accuracy": _DEBUG.exact_match / denom,
-#     }
-# EXECUTION_ERROR = "EXECUTION_ERROR"
-# _RESULT_CACHE_LOCK = threading.Lock()
-# _RESULT_CACHE: "Dict[str, Union[List[Tuple], str]]" = {}
-# _RESULT_CACHE_MAX = 100_000
-# def clear_result_cache() -> None:
-#     """Clear both DB query cache and reward cache."""
-#     with _RESULT_CACHE_LOCK:
-#         _RESULT_CACHE.clear()
-#     _REWARD_CACHE.clear()
-# def _db_state_fingerprint(db_path: str) -> str:
-#     try:
-#         st = os.stat(db_path)
-#         return f"{st.st_mtime_ns}:{st.st_size}"
-#     except OSError:
-#         return "missing"
-# def _result_cache_key(db_path: str, sql: str) -> str:
-#     fp = _db_state_fingerprint(db_path)
-#     payload = f"{fp}\0{sql}".encode("utf-8", errors="ignore")
-#     return hashlib.sha256(payload).hexdigest()
-# class _ConnectionPool:
-#     def __init__(self, db_path: str, maxsize: int = 1) -> None:
-#         self.db_path = db_path
-#         self.pool = queue.LifoQueue(maxsize=maxsize)
-#         self.lock = threading.Lock()
-#     def acquire(self) -> sqlite3.Connection:
-#         try:
-#             return self.pool.get_nowait()
-#         except queue.Empty:
-#             with self.lock:
-#                 try:
-#                     return self.pool.get_nowait()
-#                 except queue.Empty:
-#                     return _connect_readonly(self.db_path)
-#     def release(self, conn: sqlite3.Connection) -> None:
-#         try:
-#             self.pool.put_nowait(conn)
-#         except queue.Full:
-#             try:
-#                 conn.close()
-#             except Exception:
-#                 pass
-# _POOL_LOCK = threading.Lock()
-# _POOLS: Dict[str, _ConnectionPool] = {}
-# def _get_pool(db_path: str) -> _ConnectionPool:
-#     with _POOL_LOCK:
-#         pool = _POOLS.get(db_path)
-#         if pool is None:
-#             pool = _ConnectionPool(db_path=db_path, maxsize=1)
-#             _POOLS[db_path] = pool
-#         return pool
-# class _PooledConnection:
-#     def __init__(self, db_path: str) -> None:
-#         self.db_path = db_path
-#         self.pool = _get_pool(db_path)
-#         self.conn: Optional[sqlite3.Connection] = None
-#     def __enter__(self) -> sqlite3.Connection:
-#         self.conn = self.pool.acquire()
-#         return self.conn
-#     def __exit__(self, exc_type, exc, tb) -> None:
-#         if self.conn is not None:
-#             self.pool.release(self.conn)
-#             self.conn = None
-# def _cache_get(key: str) -> Optional[Union[List[Tuple], str]]:
-#     with _RESULT_CACHE_LOCK:
-#         return _RESULT_CACHE.get(key)
-# def _cache_put(key: str, value: Union[List[Tuple], str]) -> None:
-#     with _RESULT_CACHE_LOCK:
-#         if len(_RESULT_CACHE) >= _RESULT_CACHE_MAX:
-#             _RESULT_CACHE.clear()
-#         _RESULT_CACHE[key] = value
-# def execute_sql(conn: sqlite3.Connection, sql: str, *, max_rows: int = 1000) -> Union[List[Tuple], str]:
-#     try:
-#         _with_timeout(conn, timeout_s=DEFAULT_QUERY_TIMEOUT_S)
-#         cur = conn.execute(sql)
-#         rows = cur.fetchmany(max_rows)
-#         return [tuple(r) for r in rows]
-#     except Exception:
-#         return EXECUTION_ERROR
-# def execute_sql_cached(db_path: str, sql: str, *, max_rows: int = 1000) -> Union[List[Tuple], str]:
-#     if not USE_CACHE:
-#         with _PooledConnection(db_path) as conn:
-#             return execute_sql(conn, sql, max_rows=max_rows)
-#     key = _result_cache_key(db_path, sql)
-#     cached = _cache_get(key)
-#     if cached is not None:
-#         return cached
-#     with _PooledConnection(db_path) as conn:
-#         res = execute_sql(conn, sql, max_rows=max_rows)
-#     _cache_put(key, res)
-#     return res
-# def execution_reward_timed(
-#     pred_sql: str, db_path: str, gold_sql: str, *, measure_plan: bool = False,
-# ) -> Tuple[float, Dict[str, float]]:
-#     timings = {"parse_s": 0.0, "plan_s": 0.0, "exec_s": 0.0}
-#     t0 = time.perf_counter()
-#     sql = _normalize_sql(pred_sql)
-#     gold = _normalize_sql(gold_sql)
-#     if not is_valid_select(sql):
-#         timings["parse_s"] = time.perf_counter() - t0
-#         return 0.0, timings
-#     t1 = time.perf_counter()
-#     timings["parse_s"] = t1 - t0
-#     if measure_plan:
-#         with _PooledConnection(db_path) as conn:
-#             p0 = time.perf_counter()
-#             _explain_query_plan(conn, sql)
-#             _explain_query_plan(conn, gold)
-#             timings["plan_s"] = time.perf_counter() - p0
-#     e0 = time.perf_counter()
-#     pred_res = execute_sql_cached(db_path, sql)
-#     if pred_res == EXECUTION_ERROR:
-#         timings["exec_s"] = time.perf_counter() - e0
-#         return 0.0, timings
-#     gold_res = execute_sql_cached(db_path, gold)
-#     timings["exec_s"] = time.perf_counter() - e0
-#     if gold_res == EXECUTION_ERROR:
-#         return 0.0, timings
-#     reward = -0.2
-#     reward += 0.2
-#     if _safe_results_equal(pred_res, gold_res):
-#         return 1.0, timings
-#     return max(-1.0, min(1.0, reward)), timings
-# def execution_reward(pred_sql: str, db_path: str, gold_sql: str) -> float:
-#     try:
-#         sql = _normalize_sql(pred_sql)
-#         gold = _normalize_sql(gold_sql)
-#         if not is_valid_select(sql):
-#             return -1.0
-#         reward = -0.2
-#         pred_tables = set(extract_tables(sql))
-#         gold_tables = set(extract_tables(gold))
-#         if pred_tables == gold_tables and len(gold_tables) > 0:
-#             reward += 0.3
-#         pred_cols = set(extract_columns(sql))
-#         gold_cols = set(extract_columns(gold))
-#         if gold_cols:
-#             overlap = len(pred_cols & gold_cols) / len(gold_cols)
-#             reward += 0.3 * overlap
-#         pred_res = execute_sql_cached(db_path, sql)
-#         if pred_res == EXECUTION_ERROR:
-#             return 0.0
-#         reward += 0.2
-#         gold_res = execute_sql_cached(db_path, gold)
-#         if gold_res == EXECUTION_ERROR:
-#             return 0.0
-#         if _safe_results_equal(pred_res, gold_res):
-#             return 1.0
-#         return max(-1.0, min(1.0, reward))
-#     except Exception:
-#         return 0.0
-# def cached_execution_reward(pred_sql: str, db_path: str, gold_sql: str) -> float:
-#     if not USE_CACHE:
-#         return execution_reward(pred_sql, db_path, gold_sql)
-#     key = f"{db_path}|{pred_sql}|{gold_sql}"
-#     if key not in _REWARD_CACHE:
-#         _REWARD_CACHE[key] = execution_reward(pred_sql, db_path, gold_sql)
-#     return _REWARD_CACHE[key]
-# def execution_reward_batch_sequential(rollouts: Sequence[Tuple[str, str, str]]) -> List[float]:
-#     return [cached_execution_reward(pred_sql, db_path, gold_sql) for pred_sql, db_path, gold_sql in rollouts]
-# def execution_reward_batch_parallel(rollouts: Sequence[Tuple[str, str, str]], *, max_workers: int = 20) -> List[float]:
-#     if not rollouts:
-#         return []
-#     unique_dbs = {db_path for _, db_path, _ in rollouts}
-#     worker_count = max(1, min(max_workers, len(unique_dbs)))
-#     results: List[Optional[float]] = [None] * len(rollouts)
-#     with ThreadPoolExecutor(max_workers=worker_count) as executor:
-#         futures = {
-#             executor.submit(cached_execution_reward, pred_sql, db_path, gold_sql): i
-#             for i, (pred_sql, db_path, gold_sql) in enumerate(rollouts)
-#         }
-#         for fut in as_completed(futures):
-#             idx = futures[fut]
-#             try:
-#                 results[idx] = float(fut.result())
-#             except Exception:
-#                 results[idx] = 0.0
-#     return [r if r is not None else 0.0 for r in results]
-from __future__ import annotations
-import os
-import re
-import sqlite3
-import threading
-import time
-import json
-from concurrent.futures import ThreadPoolExecutor, as_completed
-from dataclasses import dataclass
-from typing import Dict, List
-from src.sql_validator import validate_sql_schema
-# =========================================================
-# 🔥 CONFIG FLAGS
-# =========================================================
-USE_SCHEMA_VALIDATION = True
-USE_CACHE = True
-DEFAULT_QUERY_TIMEOUT_S = 2.0
-EXECUTION_ERROR = "EXECUTION_ERROR"
-_REWARD_CACHE: Dict[str, float] = {}
-# =========================================================
-# 🔥 TASK 2: ERROR ANALYSIS + LOGGING
-# =========================================================
-ERROR_LOG_FILE = "results/error_logs.json"
-def classify_error(sql: str) -> str:
-    sql = sql.lower()
-    if "join" in sql and " on " not in sql:
-        return "missing_join"
-    if "where" in sql and "=" not in sql and ">" not in sql and "<" not in sql:
-        return "wrong_where"
-    if "null" in sql:
-        return "null_handling"
-    if "group by" in sql and "count" not in sql:
-        return "wrong_groupby"
-    return "other"
-def get_hint(error_type: str) -> str:
-    hints = {
-        "missing_join": "Add proper JOIN condition using ON.",
-        "wrong_where": "Check WHERE clause conditions.",
-        "null_handling": "Handle NULL values using IS NULL.",
-        "wrong_groupby": "Use aggregation functions with GROUP BY.",
-        "other": "Check SQL syntax and logic."
-    }
-    return hints.get(error_type, "Check query.")
-def log_error(question: str, sql: str, error: str, error_type: str):
-    os.makedirs("results", exist_ok=True)
-    entry = {
-        "question": question,
-        "sql": sql,
-        "error": error,
-        "error_type": error_type,
-        "timestamp": time.time()
-    }
-    if os.path.exists(ERROR_LOG_FILE):
-        with open(ERROR_LOG_FILE, "r") as f:
-            logs = json.load(f)
-    else:
-        logs = []
-    logs.append(entry)
-    with open(ERROR_LOG_FILE, "w") as f:
-        json.dump(logs, f, indent=2)
-# =========================================================
-# CACHE/VALIDATION TOGGLES (Task 1)
-# =========================================================
-def set_use_cache(enabled: bool) -> None:
-    global USE_CACHE
-    USE_CACHE = bool(enabled)
-def set_use_schema_validation(enabled: bool) -> None:
-    global USE_SCHEMA_VALIDATION
-    USE_SCHEMA_VALIDATION = bool(enabled)
-# =========================================================
-# SQL CLEANING
-# =========================================================
-def _normalize_sql(sql: str) -> str:
-    if not isinstance(sql, str):
-        return ""
-    s = sql.strip()
-    if s.startswith("```"):
-        s = re.sub(r"^```[a-zA-Z0-9_+-]*\n?", "", s).strip()
-        s = re.sub(r"\n?```$", "", s).strip()
-    if s.lower().startswith("sql:"):
-        s = s[4:].strip()
-    if ";" in s:
-        s = s.split(";", 1)[0].strip()
-    return s
-# =========================================================
-# DB EXECUTION
-# =========================================================
-def _connect_readonly(db_path: str):
-    uri = f"file:{os.path.abspath(db_path)}?mode=ro"
-    conn = sqlite3.connect(uri, uri=True, check_same_thread=False)
-    conn.execute("PRAGMA query_only = ON;")
-    conn.execute("PRAGMA foreign_keys = ON;")
-    return conn
-def _with_timeout(conn: sqlite3.Connection, timeout_s: float = DEFAULT_QUERY_TIMEOUT_S):
-    start = time.monotonic()
-    def handler():
-        return 1 if (time.monotonic() - start) > timeout_s else 0
-    conn.set_progress_handler(handler, 10_000)
-def execute_sql(conn, sql):
-    try:
-        _with_timeout(conn, timeout_s=DEFAULT_QUERY_TIMEOUT_S)
-        cur = conn.execute(sql)
-        return cur.fetchall()
-    except Exception:
-        return EXECUTION_ERROR
-_RESULT_CACHE = {}
-_RESULT_LOCK = threading.Lock()
-def execute_sql_cached(db_path, sql):
-    key = f"{db_path}|{sql}"
-    if USE_CACHE:
-        with _RESULT_LOCK:
-            if key in _RESULT_CACHE:
-                return _RESULT_CACHE[key]
-    conn = _connect_readonly(db_path)
-    result = execute_sql(conn, sql)
-    conn.close()
-    if USE_CACHE:
-        with _RESULT_LOCK:
-            _RESULT_CACHE[key] = result
-    return result
-def execute_sql_cached_conn(conn: sqlite3.Connection, db_path: str, sql: str):
-    """
-    Like execute_sql_cached(), but reuses an existing connection.
-    Intended for 1-thread-per-DB workloads (Task 1).
-    """
-    key = f"{db_path}|{sql}"
-    if USE_CACHE:
-        with _RESULT_LOCK:
-            if key in _RESULT_CACHE:
-                return _RESULT_CACHE[key]
-    result = execute_sql(conn, sql)
-    if USE_CACHE:
-        with _RESULT_LOCK:
-            _RESULT_CACHE[key] = result
-    return result
-def clear_result_cache() -> None:
-    global _RESULT_CACHE, _REWARD_CACHE
-    with _RESULT_LOCK:
-        _RESULT_CACHE.clear()
-    _REWARD_CACHE.clear()
-# =========================================================
-# SQL PARSING
-# =========================================================
-def is_valid_select(sql):
-    return sql.lower().startswith("select") or sql.lower().startswith("with")
-def extract_tables(sql):
-    return re.findall(r'from\s+(\w+)', sql.lower())
-def extract_columns(sql):
-    match = re.search(r'select\s+(.*?)\s+from', sql.lower())
-    if not match:
-        return []
-    cols = match.group(1)
-    return ["*"] if cols.strip() == "*" else [c.strip() for c in cols.split(",")]
-def get_sql_operations(sql: str):
-    sql = sql.lower()
-    ops = []
-    if "select" in sql: ops.append("SELECT")
-    if "where" in sql: ops.append("WHERE")
-    if "join" in sql: ops.append("JOIN")
-    if "group by" in sql: ops.append("GROUP_BY")
-    if "order by" in sql: ops.append("ORDER_BY")
-    return ops
-def _explain_query_plan(conn: sqlite3.Connection, sql: str) -> bool:
-    try:
-        _with_timeout(conn, timeout_s=DEFAULT_QUERY_TIMEOUT_S)
-        conn.execute(f"EXPLAIN QUERY PLAN {sql}")
-        return True
-    except Exception:
-        return False
-def execution_reward_timed(pred_sql: str, db_path: str, gold_sql: str, measure_plan: bool = False):
-    """
-    Returns (reward, timings) where timings keys: parse_s, plan_s, exec_s.
-    Used by Task-1 benchmark to profile bottlenecks.
-    """
-    timings = {"parse_s": 0.0, "plan_s": 0.0, "exec_s": 0.0}
-    t0 = time.perf_counter()
-    sql = _normalize_sql(pred_sql)
-    gold = _normalize_sql(gold_sql)
-    if not is_valid_select(sql):
-        timings["parse_s"] = time.perf_counter() - t0
-        return 0.0, timings
-    t1 = time.perf_counter()
-    timings["parse_s"] = t1 - t0
-    conn = _connect_readonly(db_path)
-    try:
-        if measure_plan:
-            p0 = time.perf_counter()
-            _explain_query_plan(conn, sql)
-            _explain_query_plan(conn, gold)
-            timings["plan_s"] = time.perf_counter() - p0
-        e0 = time.perf_counter()
-        pred_res = execute_sql_cached_conn(conn, db_path, sql)
-        if pred_res == EXECUTION_ERROR:
-            timings["exec_s"] = time.perf_counter() - e0
-            return 0.0, timings
-        gold_res = execute_sql_cached_conn(conn, db_path, gold)
-        timings["exec_s"] = time.perf_counter() - e0
-        if gold_res == EXECUTION_ERROR:
-            return 0.0, timings
-        reward = -0.2 + 0.2
-        if pred_res == gold_res:
-            return 1.0, timings
-        return max(-1.0, min(1.0, reward)), timings
-    finally:
-        try:
-            conn.close()
-        except Exception:
-            pass
-# =========================================================
-# 🔥 FINAL REWARD FUNCTION (TASK 2 INTEGRATED)
-# =========================================================
-def execution_reward(pred_sql: str, db_path: str, gold_sql: str) -> float:
-    try:
-        sql = _normalize_sql(pred_sql)
-        gold = _normalize_sql(gold_sql)
-        if not is_valid_select(sql):
-            return -1.0
-        reward = -0.2
-        # =========================
-        # SCHEMA VALIDATION (Task 3)
-        # =========================
-        if USE_SCHEMA_VALIDATION:
-            valid, _ = validate_sql_schema(sql, db_path)
-            if not valid:
-                error_type = classify_error(sql)
-                log_error("UNKNOWN", sql, "schema_invalid", error_type)
-                return 0.1
-        # =========================
-        # EXECUTION
-        # =========================
-        pred_res = execute_sql_cached(db_path, sql)
-        if pred_res == "EXECUTION_ERROR":
-            error_type = classify_error(sql)
-            log_error(
-                question="UNKNOWN",
-                sql=sql,
-                error="execution_error",
-                error_type=error_type
-            )
-            print(f"[ERROR] {error_type}")
-            print(f"[HINT] {get_hint(error_type)}")
-            return 0.1
-        reward += 0.2
-        gold_res = execute_sql_cached(db_path, gold)
-        if gold_res == "EXECUTION_ERROR":
-            return 0.1
-        if pred_res == gold_res:
-            return 1.0
-        return max(-1.0, min(1.0, reward))
-    except Exception as e:
-        log_error("UNKNOWN", pred_sql, str(e), "runtime_error")
-        return 0.0
-# =========================================================
-# BATCH EXECUTION (Task 1)
-# =========================================================
-def cached_execution_reward(pred_sql: str, db_path: str, gold_sql: str) -> float:
-    if not USE_CACHE:
-        return float(execution_reward(pred_sql, db_path, gold_sql))
-    key = f"{db_path}|{pred_sql}|{gold_sql}"
-    if key in _REWARD_CACHE:
-        return float(_REWARD_CACHE[key])
-    r = float(execution_reward(pred_sql, db_path, gold_sql))
-    _REWARD_CACHE[key] = r
-    return r
-def execution_reward_batch_sequential(rollouts):
-    return [cached_execution_reward(p, d, g) for (p, d, g) in rollouts]
-def execution_reward_batch_parallel(rollouts, max_workers=10):
-    results = [0.0] * len(rollouts)
-    with ThreadPoolExecutor(max_workers=max_workers) as executor:
-        futures = {
-            executor.submit(cached_execution_reward, p, d, g): i
-            for i, (p, d, g) in enumerate(rollouts)
-        }
-        for fut in as_completed(futures):
-            idx = futures[fut]
-            try:
-                results[idx] = fut.result()
-            except Exception:
-                results[idx] = 0.0
-    return results
-def execution_reward_batch_parallel_by_db(rollouts, max_workers: int = 20):
-    """
-    1 thread per DB path. Reuses a single readonly connection per DB worker.
-    Preserves input order.
-    """
-    if not rollouts:
-        return []
-    by_db = {}
-    for idx, (pred_sql, db_path, gold_sql) in enumerate(rollouts):
-        by_db.setdefault(db_path, []).append((idx, pred_sql, gold_sql))
-    results = [0.0 for _ in range(len(rollouts))]
-    def _reward_with_conn(conn: sqlite3.Connection, pred_sql: str, db_path: str, gold_sql: str) -> float:
-        try:
-            sql = _normalize_sql(pred_sql)
-            gold = _normalize_sql(gold_sql)
-            if not is_valid_select(sql):
-                return -1.0
-            reward = -0.2
-            if USE_SCHEMA_VALIDATION:
-                valid, _ = validate_sql_schema(sql, db_path)
-                if not valid:
-                    error_type = classify_error(sql)
-                    log_error("UNKNOWN", sql, "schema_invalid", error_type)
-                    return 0.1
-            pred_res = execute_sql_cached_conn(conn, db_path, sql)
-            if pred_res == EXECUTION_ERROR:
-                error_type = classify_error(sql)
-                log_error("UNKNOWN", sql, "execution_error", error_type)
-                return 0.1
-            reward += 0.2
-            gold_res = execute_sql_cached_conn(conn, db_path, gold)
-            if gold_res == EXECUTION_ERROR:
-                return 0.1
-            if pred_res == gold_res:
-                return 1.0
-            return max(-1.0, min(1.0, reward))
-        except Exception:
-            return 0.0
-    def _worker(db_path: str, items):
-        conn = _connect_readonly(db_path)
-        try:
-            for idx, pred, gold in items:
-                results[idx] = _reward_with_conn(conn, pred, db_path, gold)
-        finally:
-            try:
-                conn.close()
-            except Exception:
-                pass
-    with ThreadPoolExecutor(max_workers=int(max_workers)) as ex:
-        futures = [ex.submit(_worker, db_path, items) for db_path, items in by_db.items()]
-        for fut in as_completed(futures):
-            fut.result()
-    return results

src/execution_reward.py CHANGED Viewed

@@ -1,510 +1,41 @@
-# from __future__ import annotations
-# import hashlib
-# import os
-# import queue
-# import re
-# import sqlite3
-# import threading
-# import time
-# from concurrent.futures import ThreadPoolExecutor, as_completed
-# from dataclasses import dataclass
-# from typing import Dict, List, Optional, Sequence, Set, Tuple, Union
-# # --- CACHE CONTROL ---
-# USE_CACHE = True
-# _REWARD_CACHE: Dict[str, float] = {}
-# def set_use_cache(enabled: bool):
-#     """Dynamically toggle the reward cache for benchmarks."""
-#     global USE_CACHE
-#     USE_CACHE = enabled
-# def _normalize_sql(sql: str) -> str:
-#     if not isinstance(sql, str):
-#         return ""
-#     s = sql.strip()
-#     if s.startswith("```"):
-#         s = re.sub(r"^```[a-zA-Z0-9_+-]*\n?", "", s).strip()
-#         s = re.sub(r"\n?```$", "", s).strip()
-#     if s.lower().startswith("sql:"):
-#         s = s[4:].strip()
-#     if ";" in s:
-#         s = s.split(";", 1)[0].strip()
-#     return s
-# def _connect_readonly(db_path: str) -> sqlite3.Connection:
-#     uri = f"file:{os.path.abspath(db_path)}?mode=ro"
-#     conn = sqlite3.connect(uri, uri=True, check_same_thread=False)
-#     conn.execute("PRAGMA query_only = ON;")
-#     conn.execute("PRAGMA foreign_keys = ON;")
-#     return conn
-# DEFAULT_QUERY_TIMEOUT_S = 2.0
-# def _with_timeout(conn: sqlite3.Connection, timeout_s: float = DEFAULT_QUERY_TIMEOUT_S) -> None:
-#     start = time.monotonic()
-#     def _handler() -> int:
-#         return 1 if (time.monotonic() - start) > timeout_s else 0
-#     conn.set_progress_handler(_handler, 10_000)
-# def _list_tables(conn: sqlite3.Connection) -> List[str]:
-#     try:
-#         cur = conn.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';")
-#         return [r[0] for r in cur.fetchall() if r and isinstance(r[0], str)]
-#     except sqlite3.Error:
-#         return []
-# def _contains_table_name(sql: str, table_names: Sequence[str]) -> bool:
-#     s = sql.lower()
-#     for t in table_names:
-#         tl = t.lower()
-#         if not tl:
-#             continue
-#         if re.search(rf"\b{re.escape(tl)}\b", s):
-#             return True
-#     return False
-# def _explain_query_plan(conn: sqlite3.Connection, sql: str) -> bool:
-#     try:
-#         _with_timeout(conn, timeout_s=DEFAULT_QUERY_TIMEOUT_S)
-#         conn.execute(f"EXPLAIN QUERY PLAN {sql}")
-#         return True
-#     except sqlite3.Error:
-#         return False
-# def _execute(conn: sqlite3.Connection, sql: str, max_rows: int = 1000) -> Tuple[bool, List[Tuple], Optional[str]]:
-#     try:
-#         _with_timeout(conn, timeout_s=DEFAULT_QUERY_TIMEOUT_S)
-#         cur = conn.execute(sql)
-#         rows = cur.fetchmany(max_rows)
-#         norm_rows = [tuple(r) for r in rows]
-#         return True, norm_rows, None
-#     except sqlite3.Error as e:
-#         return False, [], str(e)
-# _SQL_KEYWORDS_TO_IGNORE = {
-#     "select", "from", "where", "join", "inner", "left", "right", "full", "outer",
-#     "on", "group", "by", "order", "limit", "having", "distinct", "union", "intersect",
-#     "except", "as", "and", "or", "not", "in", "is", "null", "like", "between", "case",
-#     "when", "then", "else", "end", "asc", "desc"
-# }
-# _SQL_FUNCTIONS_TO_IGNORE = {
-#     "count", "avg", "min", "max", "sum", "lower", "upper", "substr", "coalesce",
-#     "round", "date", "datetime", "strftime"
-# }
-# # --- LIGHTWEIGHT PARSING ---
-# def is_valid_select(sql: str):
-#     sql = sql.strip().lower()
-#     return sql.startswith("select") or sql.startswith("with")
-# def extract_tables(sql: str) -> List[str]:
-#     sql = sql.lower()
-#     if "join" not in sql:
-#         tables = re.findall(r'from\s+(\w+)', sql)
-#         return list(set(tables))
-#     tables = re.findall(r'from\s+([a-zA-Z_][a-zA-Z0-9_]*)', sql)
-#     joins = re.findall(r'join\s+([a-zA-Z_][a-zA-Z0-9_]*)', sql)
-#     return list(set(tables + joins))
-# def extract_columns(sql: str) -> List[str]:
-#     sql = sql.lower()
-#     match = re.search(r'select\s+(.*?)\s+from', sql)
-#     if not match:
-#         return []
-#     cols = match.group(1)
-#     if cols.strip() == "*":
-#         return ["*"]
-#     return [c.strip() for c in cols.split(",")]
-# def _get_db_tables_and_columns(conn: sqlite3.Connection) -> Tuple[Set[str], Set[str]]:
-#     tables = set()
-#     columns = set()
-#     for t in _list_tables(conn):
-#         tl = t.lower()
-#         if not tl:
-#             continue
-#         tables.add(tl)
-#         try:
-#             cur = conn.execute(f'PRAGMA table_info("{t}")')
-#             for row in cur.fetchall():
-#                 if row and isinstance(row[1], str):
-#                     columns.add(row[1].lower())
-#         except sqlite3.Error:
-#             continue
-#     return tables, columns
-# def _safe_results_equal(a: List[Tuple], b: List[Tuple]) -> bool:
-#     return a == b
-# @dataclass
-# class RewardDebugStats:
-#     total: int = 0
-#     parsed_ok: int = 0
-#     table_match: int = 0
-#     column_match: int = 0
-#     executed_ok: int = 0
-#     exact_match: int = 0
-# _DEBUG = RewardDebugStats()
-# def reset_debug_metrics() -> None:
-#     global _DEBUG
-#     _DEBUG = RewardDebugStats()
-# def get_debug_metrics() -> dict:
-#     denom = max(_DEBUG.total, 1)
-#     return {
-#         "valid_sql_rate": _DEBUG.parsed_ok / denom,
-#         "table_match_rate": _DEBUG.table_match / denom,
-#         "column_match_rate": _DEBUG.column_match / denom,
-#         "execution_accuracy": _DEBUG.exact_match / denom,
-#     }
-# EXECUTION_ERROR = "EXECUTION_ERROR"
-# _RESULT_CACHE_LOCK = threading.Lock()
-# _RESULT_CACHE: "Dict[str, Union[List[Tuple], str]]" = {}
-# _RESULT_CACHE_MAX = 100_000
-# def clear_result_cache() -> None:
-#     """Clear both DB query cache and reward cache."""
-#     with _RESULT_CACHE_LOCK:
-#         _RESULT_CACHE.clear()
-#     _REWARD_CACHE.clear()
-# def _db_state_fingerprint(db_path: str) -> str:
-#     try:
-#         st = os.stat(db_path)
-#         return f"{st.st_mtime_ns}:{st.st_size}"
-#     except OSError:
-#         return "missing"
-# def _result_cache_key(db_path: str, sql: str) -> str:
-#     fp = _db_state_fingerprint(db_path)
-#     payload = f"{fp}\0{sql}".encode("utf-8", errors="ignore")
-#     return hashlib.sha256(payload).hexdigest()
-# class _ConnectionPool:
-#     def __init__(self, db_path: str, maxsize: int = 1) -> None:
-#         self.db_path = db_path
-#         self.pool = queue.LifoQueue(maxsize=maxsize)
-#         self.lock = threading.Lock()
-#     def acquire(self) -> sqlite3.Connection:
-#         try:
-#             return self.pool.get_nowait()
-#         except queue.Empty:
-#             with self.lock:
-#                 try:
-#                     return self.pool.get_nowait()
-#                 except queue.Empty:
-#                     return _connect_readonly(self.db_path)
-#     def release(self, conn: sqlite3.Connection) -> None:
-#         try:
-#             self.pool.put_nowait(conn)
-#         except queue.Full:
-#             try:
-#                 conn.close()
-#             except Exception:
-#                 pass
-# _POOL_LOCK = threading.Lock()
-# _POOLS: Dict[str, _ConnectionPool] = {}
-# def _get_pool(db_path: str) -> _ConnectionPool:
-#     with _POOL_LOCK:
-#         pool = _POOLS.get(db_path)
-#         if pool is None:
-#             pool = _ConnectionPool(db_path=db_path, maxsize=1)
-#             _POOLS[db_path] = pool
-#         return pool
-# class _PooledConnection:
-#     def __init__(self, db_path: str) -> None:
-#         self.db_path = db_path
-#         self.pool = _get_pool(db_path)
-#         self.conn: Optional[sqlite3.Connection] = None
-#     def __enter__(self) -> sqlite3.Connection:
-#         self.conn = self.pool.acquire()
-#         return self.conn
-#     def __exit__(self, exc_type, exc, tb) -> None:
-#         if self.conn is not None:
-#             self.pool.release(self.conn)
-#             self.conn = None
-# def _cache_get(key: str) -> Optional[Union[List[Tuple], str]]:
-#     with _RESULT_CACHE_LOCK:
-#         return _RESULT_CACHE.get(key)
-# def _cache_put(key: str, value: Union[List[Tuple], str]) -> None:
-#     with _RESULT_CACHE_LOCK:
-#         if len(_RESULT_CACHE) >= _RESULT_CACHE_MAX:
-#             _RESULT_CACHE.clear()
-#         _RESULT_CACHE[key] = value
-# def execute_sql(conn: sqlite3.Connection, sql: str, *, max_rows: int = 1000) -> Union[List[Tuple], str]:
-#     try:
-#         _with_timeout(conn, timeout_s=DEFAULT_QUERY_TIMEOUT_S)
-#         cur = conn.execute(sql)
-#         rows = cur.fetchmany(max_rows)
-#         return [tuple(r) for r in rows]
-#     except Exception:
-#         return EXECUTION_ERROR
-# def execute_sql_cached(db_path: str, sql: str, *, max_rows: int = 1000) -> Union[List[Tuple], str]:
-#     if not USE_CACHE:
-#         with _PooledConnection(db_path) as conn:
-#             return execute_sql(conn, sql, max_rows=max_rows)
-#     key = _result_cache_key(db_path, sql)
-#     cached = _cache_get(key)
-#     if cached is not None:
-#         return cached
-#     with _PooledConnection(db_path) as conn:
-#         res = execute_sql(conn, sql, max_rows=max_rows)
-#     _cache_put(key, res)
-#     return res
-# def execution_reward_timed(
-#     pred_sql: str, db_path: str, gold_sql: str, *, measure_plan: bool = False,
-# ) -> Tuple[float, Dict[str, float]]:
-#     timings = {"parse_s": 0.0, "plan_s": 0.0, "exec_s": 0.0}
-#     t0 = time.perf_counter()
-#     sql = _normalize_sql(pred_sql)
-#     gold = _normalize_sql(gold_sql)
-#     if not is_valid_select(sql):
-#         timings["parse_s"] = time.perf_counter() - t0
-#         return 0.0, timings
-#     t1 = time.perf_counter()
-#     timings["parse_s"] = t1 - t0
-#     if measure_plan:
-#         with _PooledConnection(db_path) as conn:
-#             p0 = time.perf_counter()
-#             _explain_query_plan(conn, sql)
-#             _explain_query_plan(conn, gold)
-#             timings["plan_s"] = time.perf_counter() - p0
-#     e0 = time.perf_counter()
-#     pred_res = execute_sql_cached(db_path, sql)
-#     if pred_res == EXECUTION_ERROR:
-#         timings["exec_s"] = time.perf_counter() - e0
-#         return 0.0, timings
-#     gold_res = execute_sql_cached(db_path, gold)
-#     timings["exec_s"] = time.perf_counter() - e0
-#     if gold_res == EXECUTION_ERROR:
-#         return 0.0, timings
-#     reward = -0.2
-#     reward += 0.2
-#     if _safe_results_equal(pred_res, gold_res):
-#         return 1.0, timings
-#     return max(-1.0, min(1.0, reward)), timings
-# def execution_reward(pred_sql: str, db_path: str, gold_sql: str) -> float:
-#     try:
-#         sql = _normalize_sql(pred_sql)
-#         gold = _normalize_sql(gold_sql)
-#         if not is_valid_select(sql):
-#             return -1.0
-#         reward = -0.2
-#         pred_tables = set(extract_tables(sql))
-#         gold_tables = set(extract_tables(gold))
-#         if pred_tables == gold_tables and len(gold_tables) > 0:
-#             reward += 0.3
-#         pred_cols = set(extract_columns(sql))
-#         gold_cols = set(extract_columns(gold))
-#         if gold_cols:
-#             overlap = len(pred_cols & gold_cols) / len(gold_cols)
-#             reward += 0.3 * overlap
-#         pred_res = execute_sql_cached(db_path, sql)
-#         if pred_res == EXECUTION_ERROR:
-#             return 0.0
-#         reward += 0.2
-#         gold_res = execute_sql_cached(db_path, gold)
-#         if gold_res == EXECUTION_ERROR:
-#             return 0.0
-#         if _safe_results_equal(pred_res, gold_res):
-#             return 1.0
-#         return max(-1.0, min(1.0, reward))
-#     except Exception:
-#         return 0.0
-# def cached_execution_reward(pred_sql: str, db_path: str, gold_sql: str) -> float:
-#     if not USE_CACHE:
-#         return execution_reward(pred_sql, db_path, gold_sql)
-#     key = f"{db_path}|{pred_sql}|{gold_sql}"
-#     if key not in _REWARD_CACHE:
-#         _REWARD_CACHE[key] = execution_reward(pred_sql, db_path, gold_sql)
-#     return _REWARD_CACHE[key]
-# def execution_reward_batch_sequential(rollouts: Sequence[Tuple[str, str, str]]) -> List[float]:
-#     return [cached_execution_reward(pred_sql, db_path, gold_sql) for pred_sql, db_path, gold_sql in rollouts]
-# def execution_reward_batch_parallel(rollouts: Sequence[Tuple[str, str, str]], *, max_workers: int = 20) -> List[float]:
-#     if not rollouts:
-#         return []
-#     unique_dbs = {db_path for _, db_path, _ in rollouts}
-#     worker_count = max(1, min(max_workers, len(unique_dbs)))
-#     results: List[Optional[float]] = [None] * len(rollouts)
-#     with ThreadPoolExecutor(max_workers=worker_count) as executor:
-#         futures = {
-#             executor.submit(cached_execution_reward, pred_sql, db_path, gold_sql): i
-#             for i, (pred_sql, db_path, gold_sql) in enumerate(rollouts)
-#         }
-#         for fut in as_completed(futures):
-#             idx = futures[fut]
-#             try:
-#                 results[idx] = float(fut.result())
-#             except Exception:
-#                 results[idx] = 0.0
-#     return [r if r is not None else 0.0 for r in results]
 from __future__ import annotations
 import os
 import re
 import sqlite3
-import threading
 import time
-import json
-from concurrent.futures import ThreadPoolExecutor, as_completed
 from dataclasses import dataclass
-from typing import Dict, List
-from src.sql_validator import validate_sql_schema
-# =========================================================
-# 🔥 CONFIG FLAGS
-# =========================================================
-USE_SCHEMA_VALIDATION = True
-USE_CACHE = True
-DEFAULT_QUERY_TIMEOUT_S = 2.0
-EXECUTION_ERROR = "EXECUTION_ERROR"
-_REWARD_CACHE: Dict[str, float] = {}
-# =========================================================
-# 🔥 TASK 2: ERROR ANALYSIS + LOGGING
-# =========================================================
-ERROR_LOG_FILE = "results/error_logs.json"
-def classify_error(sql: str) -> str:
-    sql = sql.lower()
-    if "join" in sql and " on " not in sql:
-        return "missing_join"
-    if "where" in sql and "=" not in sql and ">" not in sql and "<" not in sql:
-        return "wrong_where"
-    if "null" in sql:
-        return "null_handling"
-    if "group by" in sql and "count" not in sql:
-        return "wrong_groupby"
-    return "other"
-def get_hint(error_type: str) -> str:
-    hints = {
-        "missing_join": "Add proper JOIN condition using ON.",
-        "wrong_where": "Check WHERE clause conditions.",
-        "null_handling": "Handle NULL values using IS NULL.",
-        "wrong_groupby": "Use aggregation functions with GROUP BY.",
-        "other": "Check SQL syntax and logic."
-    }
-    return hints.get(error_type, "Check query.")
-def log_error(question: str, sql: str, error: str, error_type: str):
-    os.makedirs("results", exist_ok=True)
-    entry = {
-        "question": question,
-        "sql": sql,
-        "error": error,
-        "error_type": error_type,
-        "timestamp": time.time()
-    }
-    if os.path.exists(ERROR_LOG_FILE):
-        with open(ERROR_LOG_FILE, "r") as f:
-            logs = json.load(f)
-    else:
-        logs = []
-    logs.append(entry)
-    with open(ERROR_LOG_FILE, "w") as f:
-        json.dump(logs, f, indent=2)
-# =========================================================
-# CACHE/VALIDATION TOGGLES (Task 1)
-# =========================================================
-def set_use_cache(enabled: bool) -> None:
-    global USE_CACHE
-    USE_CACHE = bool(enabled)
-def set_use_schema_validation(enabled: bool) -> None:
-    global USE_SCHEMA_VALIDATION
-    USE_SCHEMA_VALIDATION = bool(enabled)
-# =========================================================
-# SQL CLEANING
-# =========================================================
 def _normalize_sql(sql: str) -> str:
     if not isinstance(sql, str):
         return ""
     s = sql.strip()
     if s.startswith("```"):
         s = re.sub(r"^```[a-zA-Z0-9_+-]*\n?", "", s).strip()
         s = re.sub(r"\n?```$", "", s).strip()
     if s.lower().startswith("sql:"):
         s = s[4:].strip()
     if ";" in s:
         s = s.split(";", 1)[0].strip()
     return s
-# =========================================================
-# DB EXECUTION
-# =========================================================
-def _connect_readonly(db_path: str):
     uri = f"file:{os.path.abspath(db_path)}?mode=ro"
     conn = sqlite3.connect(uri, uri=True, check_same_thread=False)
     conn.execute("PRAGMA query_only = ON;")
@@ -512,320 +43,367 @@ def _connect_readonly(db_path: str):
     return conn
-def _with_timeout(conn: sqlite3.Connection, timeout_s: float = DEFAULT_QUERY_TIMEOUT_S):
     start = time.monotonic()
-    def handler():
         return 1 if (time.monotonic() - start) > timeout_s else 0
-    conn.set_progress_handler(handler, 10_000)
-def execute_sql(conn, sql):
     try:
-        _with_timeout(conn, timeout_s=DEFAULT_QUERY_TIMEOUT_S)
-        cur = conn.execute(sql)
-        return cur.fetchall()
-    except Exception:
-        return EXECUTION_ERROR
-_RESULT_CACHE = {}
-_RESULT_LOCK = threading.Lock()
-def execute_sql_cached(db_path, sql):
-    key = f"{db_path}|{sql}"
-    if USE_CACHE:
-        with _RESULT_LOCK:
-            if key in _RESULT_CACHE:
-                return _RESULT_CACHE[key]
-    conn = _connect_readonly(db_path)
-    result = execute_sql(conn, sql)
-    conn.close()
-    if USE_CACHE:
-        with _RESULT_LOCK:
-            _RESULT_CACHE[key] = result
-    return result
-def execute_sql_cached_conn(conn: sqlite3.Connection, db_path: str, sql: str):
     """
-    Like execute_sql_cached(), but reuses an existing connection.
-    Intended for 1-thread-per-DB workloads (Task 1).
     """
-    key = f"{db_path}|{sql}"
-    if USE_CACHE:
-        with _RESULT_LOCK:
-            if key in _RESULT_CACHE:
-                return _RESULT_CACHE[key]
-    result = execute_sql(conn, sql)
-    if USE_CACHE:
-        with _RESULT_LOCK:
-            _RESULT_CACHE[key] = result
-    return result
-def clear_result_cache() -> None:
-    global _RESULT_CACHE, _REWARD_CACHE
-    with _RESULT_LOCK:
-        _RESULT_CACHE.clear()
-    _REWARD_CACHE.clear()
-# =========================================================
-# SQL PARSING
-# =========================================================
-def is_valid_select(sql):
-    return sql.lower().startswith("select") or sql.lower().startswith("with")
-def extract_tables(sql):
-    return re.findall(r'from\s+(\w+)', sql.lower())
-def extract_columns(sql):
-    match = re.search(r'select\s+(.*?)\s+from', sql.lower())
-    if not match:
-        return []
-    cols = match.group(1)
-    return ["*"] if cols.strip() == "*" else [c.strip() for c in cols.split(",")]
-def get_sql_operations(sql: str):
-    sql = sql.lower()
-    ops = []
-    if "select" in sql: ops.append("SELECT")
-    if "where" in sql: ops.append("WHERE")
-    if "join" in sql: ops.append("JOIN")
-    if "group by" in sql: ops.append("GROUP_BY")
-    if "order by" in sql: ops.append("ORDER_BY")
-    return ops
-def _explain_query_plan(conn: sqlite3.Connection, sql: str) -> bool:
     try:
-        _with_timeout(conn, timeout_s=DEFAULT_QUERY_TIMEOUT_S)
-        conn.execute(f"EXPLAIN QUERY PLAN {sql}")
-        return True
     except Exception:
-        return False
-def execution_reward_timed(pred_sql: str, db_path: str, gold_sql: str, measure_plan: bool = False):
     """
-    Returns (reward, timings) where timings keys: parse_s, plan_s, exec_s.
-    Used by Task-1 benchmark to profile bottlenecks.
     """
-    timings = {"parse_s": 0.0, "plan_s": 0.0, "exec_s": 0.0}
-    t0 = time.perf_counter()
-    sql = _normalize_sql(pred_sql)
-    gold = _normalize_sql(gold_sql)
-    if not is_valid_select(sql):
-        timings["parse_s"] = time.perf_counter() - t0
-        return 0.0, timings
-    t1 = time.perf_counter()
-    timings["parse_s"] = t1 - t0
-    conn = _connect_readonly(db_path)
     try:
-        if measure_plan:
-            p0 = time.perf_counter()
-            _explain_query_plan(conn, sql)
-            _explain_query_plan(conn, gold)
-            timings["plan_s"] = time.perf_counter() - p0
-        e0 = time.perf_counter()
-        pred_res = execute_sql_cached_conn(conn, db_path, sql)
-        if pred_res == EXECUTION_ERROR:
-            timings["exec_s"] = time.perf_counter() - e0
-            return 0.0, timings
-        gold_res = execute_sql_cached_conn(conn, db_path, gold)
-        timings["exec_s"] = time.perf_counter() - e0
-        if gold_res == EXECUTION_ERROR:
-            return 0.0, timings
-        reward = -0.2 + 0.2
-        if pred_res == gold_res:
-            return 1.0, timings
-        return max(-1.0, min(1.0, reward)), timings
-    finally:
-        try:
-            conn.close()
-        except Exception:
-            pass
-# =========================================================
-# 🔥 FINAL REWARD FUNCTION (TASK 2 INTEGRATED)
-# =========================================================
 def execution_reward(pred_sql: str, db_path: str, gold_sql: str) -> float:
     try:
         sql = _normalize_sql(pred_sql)
         gold = _normalize_sql(gold_sql)
-        if not is_valid_select(sql):
             return -1.0
-        reward = -0.2
-        # =========================
-        # SCHEMA VALIDATION (Task 3)
-        # =========================
-        if USE_SCHEMA_VALIDATION:
-            valid, _ = validate_sql_schema(sql, db_path)
-            if not valid:
-                error_type = classify_error(sql)
-                log_error("UNKNOWN", sql, "schema_invalid", error_type)
-                return 0.1
-        # =========================
-        # EXECUTION
-        # =========================
-        pred_res = execute_sql_cached(db_path, sql)
-        if pred_res == "EXECUTION_ERROR":
-            error_type = classify_error(sql)
-            log_error(
-                question="UNKNOWN",
-                sql=sql,
-                error="execution_error",
-                error_type=error_type
-            )
-            print(f"[ERROR] {error_type}")
-            print(f"[HINT] {get_hint(error_type)}")
-            return 0.1
-        reward += 0.2
-        gold_res = execute_sql_cached(db_path, gold)
-        if gold_res == "EXECUTION_ERROR":
-            return 0.1
-        if pred_res == gold_res:
-            return 1.0
-        return max(-1.0, min(1.0, reward))
-    except Exception as e:
-        log_error("UNKNOWN", pred_sql, str(e), "runtime_error")
-        return 0.0
-# =========================================================
-# BATCH EXECUTION (Task 1)
-# =========================================================
-def cached_execution_reward(pred_sql: str, db_path: str, gold_sql: str) -> float:
-    if not USE_CACHE:
-        return float(execution_reward(pred_sql, db_path, gold_sql))
-    key = f"{db_path}|{pred_sql}|{gold_sql}"
-    if key in _REWARD_CACHE:
-        return float(_REWARD_CACHE[key])
-    r = float(execution_reward(pred_sql, db_path, gold_sql))
-    _REWARD_CACHE[key] = r
-    return r
-def execution_reward_batch_sequential(rollouts):
-    return [cached_execution_reward(p, d, g) for (p, d, g) in rollouts]
-def execution_reward_batch_parallel(rollouts, max_workers=10):
-    results = [0.0] * len(rollouts)
-    with ThreadPoolExecutor(max_workers=max_workers) as executor:
-        futures = {
-            executor.submit(cached_execution_reward, p, d, g): i
-            for i, (p, d, g) in enumerate(rollouts)
-        }
-        for fut in as_completed(futures):
-            idx = futures[fut]
-            try:
-                results[idx] = fut.result()
-            except Exception:
-                results[idx] = 0.0
-    return results
-def execution_reward_batch_parallel_by_db(rollouts, max_workers: int = 20):
-    """
-    1 thread per DB path. Reuses a single readonly connection per DB worker.
-    Preserves input order.
-    """
-    if not rollouts:
-        return []
-    by_db = {}
-    for idx, (pred_sql, db_path, gold_sql) in enumerate(rollouts):
-        by_db.setdefault(db_path, []).append((idx, pred_sql, gold_sql))
-    results = [0.0 for _ in range(len(rollouts))]
-    def _reward_with_conn(conn: sqlite3.Connection, pred_sql: str, db_path: str, gold_sql: str) -> float:
-        try:
-            sql = _normalize_sql(pred_sql)
-            gold = _normalize_sql(gold_sql)
-            if not is_valid_select(sql):
-                return -1.0
-            reward = -0.2
-            if USE_SCHEMA_VALIDATION:
-                valid, _ = validate_sql_schema(sql, db_path)
-                if not valid:
-                    error_type = classify_error(sql)
-                    log_error("UNKNOWN", sql, "schema_invalid", error_type)
-                    return 0.1
-            pred_res = execute_sql_cached_conn(conn, db_path, sql)
-            if pred_res == EXECUTION_ERROR:
-                error_type = classify_error(sql)
-                log_error("UNKNOWN", sql, "execution_error", error_type)
-                return 0.1
-            reward += 0.2
-            gold_res = execute_sql_cached_conn(conn, db_path, gold)
-            if gold_res == EXECUTION_ERROR:
-                return 0.1
-            if pred_res == gold_res:
                 return 1.0
-            return max(-1.0, min(1.0, reward))
-        except Exception:
-            return 0.0
-    def _worker(db_path: str, items):
-        conn = _connect_readonly(db_path)
-        try:
-            for idx, pred, gold in items:
-                results[idx] = _reward_with_conn(conn, pred, db_path, gold)
-        finally:
-            try:
-                conn.close()
-            except Exception:
-                pass
-    with ThreadPoolExecutor(max_workers=int(max_workers)) as ex:
-        futures = [ex.submit(_worker, db_path, items) for db_path, items in by_db.items()]
-        for fut in as_completed(futures):
-            fut.result()
-    return results

 from __future__ import annotations
 import os
 import re
 import sqlite3
 import time
 from dataclasses import dataclass
+from typing import List, Optional, Sequence, Set, Tuple, Union
+try:
+    import sqlparse
+    from sqlparse.sql import Function, Identifier, IdentifierList, Statement, Token, Where
+    from sqlparse.tokens import DML, Keyword, Name, Number, Punctuation, String, Whitespace
+except Exception:  # pragma: no cover
+    sqlparse = None  # type: ignore[assignment]
+    Statement = object  # type: ignore[misc,assignment]
+    Token = object  # type: ignore[misc,assignment]
 def _normalize_sql(sql: str) -> str:
     if not isinstance(sql, str):
         return ""
     s = sql.strip()
     if s.startswith("```"):
+        # Strip markdown fences if present.
         s = re.sub(r"^```[a-zA-Z0-9_+-]*\n?", "", s).strip()
         s = re.sub(r"\n?```$", "", s).strip()
     if s.lower().startswith("sql:"):
         s = s[4:].strip()
+    # Keep only the first statement to avoid accidental multi-statement execution.
     if ";" in s:
         s = s.split(";", 1)[0].strip()
     return s
+def _connect_readonly(db_path: str) -> sqlite3.Connection:
+    # Read-only prevents any accidental mutation during reward computation.
+    # Note: requires SQLite URI support (built-in).
     uri = f"file:{os.path.abspath(db_path)}?mode=ro"
     conn = sqlite3.connect(uri, uri=True, check_same_thread=False)
     conn.execute("PRAGMA query_only = ON;")
     return conn
+def _with_timeout(conn: sqlite3.Connection, timeout_s: float = 1.0) -> None:
     start = time.monotonic()
+    def _handler() -> int:
         return 1 if (time.monotonic() - start) > timeout_s else 0
+    # Call handler every N VM opcodes.
+    conn.set_progress_handler(_handler, 10_000)
+def _list_tables(conn: sqlite3.Connection) -> List[str]:
     try:
+        cur = conn.execute(
+            "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';"
+        )
+        return [r[0] for r in cur.fetchall() if r and isinstance(r[0], str)]
+    except sqlite3.Error:
+        return []
+def _contains_table_name(sql: str, table_names: Sequence[str]) -> bool:
+    s = sql.lower()
+    for t in table_names:
+        tl = t.lower()
+        if not tl:
+            continue
+        if re.search(rf"\b{re.escape(tl)}\b", s):
+            return True
+    return False
+def _explain_query_plan(conn: sqlite3.Connection, sql: str) -> bool:
+    try:
+        _with_timeout(conn, timeout_s=1.0)
+        conn.execute(f"EXPLAIN QUERY PLAN {sql}")
+        return True
+    except sqlite3.Error:
+        return False
+def _execute(conn: sqlite3.Connection, sql: str, max_rows: int = 1000) -> Tuple[bool, List[Tuple], Optional[str]]:
+    try:
+        _with_timeout(conn, timeout_s=1.0)
+        cur = conn.execute(sql)
+        rows = cur.fetchmany(max_rows)
+        # Normalize to plain tuples for deterministic comparison.
+        norm_rows = [tuple(r) for r in rows]
+        return True, norm_rows, None
+    except sqlite3.Error as e:
+        return False, [], str(e)
+_SQL_KEYWORDS_TO_IGNORE = {
+    "select",
+    "from",
+    "where",
+    "join",
+    "inner",
+    "left",
+    "right",
+    "full",
+    "outer",
+    "on",
+    "group",
+    "by",
+    "order",
+    "limit",
+    "having",
+    "distinct",
+    "union",
+    "intersect",
+    "except",
+    "as",
+    "and",
+    "or",
+    "not",
+    "in",
+    "is",
+    "null",
+    "like",
+    "between",
+    "case",
+    "when",
+    "then",
+    "else",
+    "end",
+    "asc",
+    "desc",
+}
+_SQL_FUNCTIONS_TO_IGNORE = {
+    "count",
+    "avg",
+    "min",
+    "max",
+    "sum",
+    "lower",
+    "upper",
+    "substr",
+    "coalesce",
+    "round",
+    "date",
+    "datetime",
+    "strftime",
+}
+def extract_tables(sql: str) -> Set[str]:
     """
+    Best-effort table extraction from SQL using sqlparse.
+    Returns lowercase table names (unqualified).
     """
+    sql = _normalize_sql(sql)
+    if not sql:
+        return set()
+    if sqlparse is None:
+        # Fallback: naive regex for FROM/JOIN.
+        found = set()
+        for m in re.finditer(r"\b(from|join)\s+([a-zA-Z_][\w$]*)", sql, flags=re.I):
+            found.add(m.group(2).lower())
+        return found
+    try:
+        statements = sqlparse.parse(sql)
+    except Exception:
+        return set()
+    tables: Set[str] = set()
+    def _add_identifier_as_table(ident: Identifier) -> None:
+        # Prefer real name over alias; strip any schema prefix.
+        name = ident.get_real_name() or ident.get_name()
+        if not name:
+            return
+        tables.add(name.lower())
+    for st in statements:
+        if not isinstance(st, Statement):
+            continue
+        seen_from = False
+        for tok in st.flatten():
+            if tok.ttype in Whitespace:
+                continue
+            if tok.ttype is Keyword and tok.value.upper() in {"FROM", "JOIN", "INNER JOIN", "LEFT JOIN", "RIGHT JOIN", "FULL JOIN"}:
+                seen_from = True
+                continue
+            if not seen_from:
+                continue
+            if isinstance(tok, Identifier):
+                _add_identifier_as_table(tok)
+                seen_from = False
+            elif tok.ttype is Name:
+                tables.add(tok.value.lower())
+                seen_from = False
+            elif tok.ttype is Keyword and tok.value.upper() in {"WHERE", "GROUP", "ORDER", "HAVING", "LIMIT"}:
+                seen_from = False
+    return tables
+def extract_columns(sql: str) -> Set[str]:
+    """
+    Best-effort column extraction from SQL using sqlparse.
+    Returns lowercase column names (unqualified).
+    """
+    sql = _normalize_sql(sql)
+    if not sql:
+        return set()
+    if sqlparse is None:
+        # Fallback: naive dotted identifiers and bare names after SELECT/WHERE/etc.
+        cols = set()
+        for m in re.finditer(r"\b([a-zA-Z_][\w$]*)\b", sql):
+            w = m.group(1).lower()
+            if w in _SQL_KEYWORDS_TO_IGNORE or w in _SQL_FUNCTIONS_TO_IGNORE:
+                continue
+            cols.add(w)
+        return cols
+    try:
+        statements = sqlparse.parse(sql)
+    except Exception:
+        return set()
+    cols: Set[str] = set()
+    def _maybe_add_col(name: Optional[str]) -> None:
+        if not name:
+            return
+        n = name.strip().strip('"').strip("'").lower()
+        if not n or n == "*":
+            return
+        if n in _SQL_KEYWORDS_TO_IGNORE or n in _SQL_FUNCTIONS_TO_IGNORE:
+            return
+        cols.add(n)
+    def _handle_identifier(ident: Identifier) -> None:
+        # If qualified (t.col), keep only col for overlap/hallucination checks.
+        _maybe_add_col(ident.get_real_name() or ident.get_name())
+    for st in statements:
+        if not isinstance(st, Statement):
+            continue
+        for tok in st.flatten():
+            # Skip whitespace/punctuation/string literals/numbers.
+            if getattr(tok, "ttype", None) in (Whitespace, Punctuation, String, Number):
+                continue
+            if isinstance(tok, Function):
+                fname = tok.get_name()
+                if fname:
+                    # Don't treat function name as a column.
+                    pass
+                continue
+            if isinstance(tok, IdentifierList):
+                for ident in tok.get_identifiers():
+                    if isinstance(ident, Identifier):
+                        _handle_identifier(ident)
+                continue
+            if isinstance(tok, Identifier):
+                _handle_identifier(tok)
+                continue
+            if getattr(tok, "ttype", None) is Name:
+                _maybe_add_col(tok.value)
+    return cols
+def _get_db_tables_and_columns(conn: sqlite3.Connection) -> Tuple[Set[str], Set[str]]:
+    """
+    Return (tables, columns) sets from SQLite schema; all lowercased.
+    Columns are returned as a global set (unqualified).
+    """
+    tables = set()
+    columns = set()
+    for t in _list_tables(conn):
+        tl = t.lower()
+        if not tl:
+            continue
+        tables.add(tl)
+        try:
+            cur = conn.execute(f'PRAGMA table_info("{t}")')
+            for row in cur.fetchall():
+                if row and isinstance(row[1], str):
+                    columns.add(row[1].lower())
+        except sqlite3.Error:
+            continue
+    return tables, columns
+def _safe_results_equal(a: List[Tuple], b: List[Tuple]) -> bool:
+    # Deterministic comparison: compare exact row tuples in order.
+    return a == b
+@dataclass
+class RewardDebugStats:
+    total: int = 0
+    parsed_ok: int = 0
+    table_match: int = 0
+    column_match: int = 0
+    executed_ok: int = 0
+    exact_match: int = 0
+_DEBUG = RewardDebugStats()
+def reset_debug_metrics() -> None:
+    global _DEBUG
+    _DEBUG = RewardDebugStats()
+def get_debug_metrics() -> dict:
+    denom = max(_DEBUG.total, 1)
+    return {
+        "valid_sql_rate": _DEBUG.parsed_ok / denom,
+        "table_match_rate": _DEBUG.table_match / denom,
+        "column_match_rate": _DEBUG.column_match / denom,
+        "execution_accuracy": _DEBUG.exact_match / denom,
+    }
+EXECUTION_ERROR = "EXECUTION_ERROR"
+def execute_sql(conn: sqlite3.Connection, sql: str, *, max_rows: int = 1000) -> Union[List[Tuple], str]:
+    """
+    Execute SQL safely.
+    If sqlite raises ANY exception, return EXECUTION_ERROR (NOT empty list).
+    """
     try:
+        _with_timeout(conn, timeout_s=1.0)
+        cur = conn.execute(sql)
+        rows = cur.fetchmany(max_rows)
+        return [tuple(r) for r in rows]
     except Exception:
+        return EXECUTION_ERROR
+def _sqlparse_valid_select(sql: str) -> bool:
     """
+    Parse validation using sqlparse:
+      - parse() non-empty
+      - contains a SELECT statement
     """
+    if sqlparse is None:
+        return False
     try:
+        stmts = sqlparse.parse(sql)
+        if not stmts:
+            return False
+        for st in stmts:
+            try:
+                if hasattr(st, "get_type") and st.get_type() == "SELECT":
+                    return True
+            except Exception:
+                continue
+        return False
+    except Exception:
+        return False
 def execution_reward(pred_sql: str, db_path: str, gold_sql: str) -> float:
     try:
         sql = _normalize_sql(pred_sql)
         gold = _normalize_sql(gold_sql)
+        if not sql or "SELECT" not in sql.upper():
             return -1.0
+        if not _sqlparse_valid_select(sql):
+            return -1.0
+        reward = -0.2  # valid SQL baseline
+        pred_tables = extract_tables(sql)
+        gold_tables = extract_tables(gold)
+        if pred_tables == gold_tables and len(gold_tables) > 0:
+            reward += 0.3
+        pred_cols = extract_columns(sql)
+        gold_cols = extract_columns(gold)
+        if gold_cols:
+            overlap = len(pred_cols & gold_cols) / len(gold_cols)
+            reward += 0.3 * overlap
+        with _connect_readonly(db_path) as conn:
+            pred_res = execute_sql(conn, sql)
+            if pred_res != EXECUTION_ERROR:
+                reward += 0.2
+            gold_res = execute_sql(conn, gold)
+            if pred_res != EXECUTION_ERROR and _safe_results_equal(pred_res, gold_res):
                 return 1.0
+        return max(-1.0, min(1.0, reward))
+    except Exception:
+        return -1.0

src/execution_reward_soft.py DELETED Viewed

@@ -1,211 +0,0 @@
-import random
-import threading
-from collections import Counter
-from concurrent.futures import ThreadPoolExecutor, as_completed
-from src.execution_reward import (
-    _normalize_sql,
-    is_valid_select,
-    execute_sql_cached,
-    execute_sql_cached_conn,
-    EXECUTION_ERROR,
-    validate_sql_schema,
-    USE_SCHEMA_VALIDATION,
-    _connect_readonly,
-)
-# =========================================================
-# 🔥 SOFT REWARD CORE
-# =========================================================
-def compute_soft_reward(pred_res, gold_res, sample_k=10):
-    try:
-        # =================================================
-        # 1. EDGE CASES
-        # =================================================
-        if not gold_res:
-            return 1.0 if not pred_res else 0.3
-        if not pred_res:
-            return -0.05
-        # =================================================
-        # 2. SAFE HASHING
-        # =================================================
-        def make_hashable(row):
-            return tuple(str(item) for item in row)
-        pred_counter = Counter(make_hashable(r) for r in pred_res)
-        # =================================================
-        # 3. SAMPLING
-        # =================================================
-        k = min(sample_k, len(gold_res))
-        sample = random.sample(gold_res, k)
-        # =================================================
-        # 4. MATCH COUNT
-        # =================================================
-        match = 0
-        for row in sample:
-            key = make_hashable(row)
-            if pred_counter.get(key, 0) > 0:
-                pred_counter[key] -= 1
-                match += 1
-        score = match / max(len(sample), 1)
-        # =================================================
-        # 5. 🔥 ANTI-CHEAT LENGTH PENALTY
-        # =================================================
-        len_ratio = len(pred_res) / max(len(gold_res), 1)
-        if len_ratio > 1.5:
-            score = score / (len_ratio ** 0.5)   # 🔥 smoother penalty
-        # =================================================
-        # 6. CLAMP SCORE (IMPORTANT FOR STABILITY)
-        # =================================================
-        score = max(0.0, min(1.0, score))
-        # =================================================
-        # 7. FINAL REWARD
-        # =================================================
-        return 0.3 + 0.7 * score
-    except Exception:
-        return -0.05
-# =========================================================
-# 🔥 MAIN EXECUTION REWARD
-# =========================================================
-_TLS = threading.local()
-def _get_thread_conn(db_path: str):
-    conns = getattr(_TLS, "conns", None)
-    if conns is None:
-        conns = {}
-        _TLS.conns = conns
-    conn = conns.get(db_path)
-    if conn is None:
-        conn = _connect_readonly(db_path)
-        conns[db_path] = conn
-    return conn
-def execution_reward_soft_pooled(pred_sql, db_path, gold_sql, *, sample_k: int = 10):
-    """
-    Soft execution reward, but reuses a per-thread read-only SQLite connection.
-    This avoids connect/close overhead in RL loops.
-    """
-    try:
-        sql = _normalize_sql(pred_sql)
-        gold = _normalize_sql(gold_sql)
-        if not is_valid_select(sql):
-            return -0.05
-        if USE_SCHEMA_VALIDATION:
-            ok, _ = validate_sql_schema(sql, db_path)
-            if not ok:
-                return -0.05
-        conn = _get_thread_conn(db_path)
-        pred_res = execute_sql_cached_conn(conn, db_path, sql)
-        if pred_res == EXECUTION_ERROR:
-            return -0.05
-        gold_res = execute_sql_cached_conn(conn, db_path, gold)
-        if gold_res == EXECUTION_ERROR:
-            return -0.05
-        return compute_soft_reward(pred_res, gold_res, sample_k=int(sample_k))
-    except Exception:
-        return -0.05
-def execution_reward_soft(pred_sql, db_path, gold_sql):
-    try:
-        sql = _normalize_sql(pred_sql)
-        gold = _normalize_sql(gold_sql)
-        # =================================================
-        # BASIC VALIDATION
-        # =================================================
-        if not is_valid_select(sql):
-            return -0.05
-        if USE_SCHEMA_VALIDATION:
-            ok, _ = validate_sql_schema(sql, db_path)
-            if not ok:
-                return -0.05
-        # =================================================
-        # EXECUTION
-        # =================================================
-        pred_res = execute_sql_cached(db_path, sql)
-        if pred_res == EXECUTION_ERROR:
-            return -0.05
-        gold_res = execute_sql_cached(db_path, gold)
-        if gold_res == EXECUTION_ERROR:
-            return -0.05
-        return compute_soft_reward(pred_res, gold_res)
-    except Exception:
-        return -0.05
-def execution_reward_soft_batch_parallel_by_db(rollouts, *, max_workers: int = 20, sample_k: int = 10):
-    """
-    rollouts: Sequence[(pred_sql, db_path, gold_sql)]
-    Executes with 1-thread-per-DB grouping for better connection reuse.
-    Returns rewards in the same order as input.
-    """
-    if not rollouts:
-        return []
-    # Group by DB so each worker can hold a single connection and reuse it.
-    by_db = {}
-    for idx, (pred_sql, db_path, gold_sql) in enumerate(rollouts):
-        by_db.setdefault(db_path, []).append((idx, pred_sql, gold_sql))
-    out = [0.0 for _ in range(len(rollouts))]
-    def _worker(db_path: str, items):
-        conn = _connect_readonly(db_path)
-        try:
-            for idx, pred_sql, gold_sql in items:
-                # Do NOT use the global thread-local here; this worker owns the connection.
-                try:
-                    sql = _normalize_sql(pred_sql)
-                    gold = _normalize_sql(gold_sql)
-                    if not is_valid_select(sql):
-                        out[idx] = -0.05
-                        continue
-                    if USE_SCHEMA_VALIDATION:
-                        ok, _ = validate_sql_schema(sql, db_path)
-                        if not ok:
-                            out[idx] = -0.05
-                            continue
-                    pred_res = execute_sql_cached_conn(conn, db_path, sql)
-                    if pred_res == EXECUTION_ERROR:
-                        out[idx] = -0.05
-                        continue
-                    gold_res = execute_sql_cached_conn(conn, db_path, gold)
-                    if gold_res == EXECUTION_ERROR:
-                        out[idx] = -0.05
-                        continue
-                    out[idx] = float(compute_soft_reward(pred_res, gold_res, sample_k=int(sample_k)))
-                except Exception:
-                    out[idx] = -0.05
-        finally:
-            conn.close()
-    with ThreadPoolExecutor(max_workers=int(max_workers)) as ex:
-        futures = [ex.submit(_worker, db_path, items) for db_path, items in by_db.items()]
-        for fut in as_completed(futures):
-            fut.result()
-    return out

src/load_lora_model.py CHANGED Viewed

@@ -1,84 +1,21 @@
-# import torch
-# from transformers import T5ForConditionalGeneration, T5Tokenizer
-# from peft import LoraConfig, get_peft_model, TaskType
-# device = "mps" if torch.backends.mps.is_available() else "cpu"
-# MODEL_PATH = "../outputs/model"   # your supervised trained model
-# print("Loading base model...")
-# model = T5ForConditionalGeneration.from_pretrained(MODEL_PATH).to(device)
-# tokenizer = T5Tokenizer.from_pretrained("t5-small")
-# # ---------------- LoRA CONFIG ----------------
-# lora_config = LoraConfig(
-#     r=8,                       # rank (small brain attachment)
-#     lora_alpha=16,
-#     target_modules=["q", "v"], # attention matrices only
-#     lora_dropout=0.05,
-#     bias="none",
-#     task_type=TaskType.SEQ_2_SEQ_LM
-# )
-# print("Attaching LoRA adapters...")
-# model = get_peft_model(model, lora_config)
-# model.print_trainable_parameters()
-# print("READY ✔ LoRA model loaded")
-# ****************** task 5 @#$%^&*I(O)(*&^%$#$%^&*(*&^%$#$%^&*^%$#%^)
-# )
-#
-#
 import torch
 from transformers import T5ForConditionalGeneration, T5Tokenizer
 from peft import LoraConfig, get_peft_model, TaskType
-# ---------------- DEVICE SETUP ----------------
 device = "mps" if torch.backends.mps.is_available() else "cpu"
-MODEL_PATH = "../outputs/model"
-# ---------------- LOAD TOKENIZER ----------------
-tokenizer = T5Tokenizer.from_pretrained("t5-small")
-# ---------------- LOAD MODEL WITH QUANTIZATION ----------------
-def load_model(quantization=None):
-    print(f"Loading model with quantization = {quantization}")
-    if quantization == "int8":
-        model = T5ForConditionalGeneration.from_pretrained(
-            MODEL_PATH,
-            load_in_8bit=True,
-            device_map="auto"
-        )
-    elif quantization == "int4":
-        model = T5ForConditionalGeneration.from_pretrained(
-            MODEL_PATH,
-            load_in_4bit=True,
-            device_map="auto"
-        )
-    else:  # fp32
-        model = T5ForConditionalGeneration.from_pretrained(MODEL_PATH).to(device)
-    return model
-# 👉 CHANGE THIS VALUE TO TEST
-QUANTIZATION = "int8"   # options: None, "int8", "int4"
-model = load_model(QUANTIZATION)
 # ---------------- LoRA CONFIG ----------------
 lora_config = LoraConfig(
-    r=8,
     lora_alpha=16,
-    target_modules=["q", "v"],
     lora_dropout=0.05,
     bias="none",
     task_type=TaskType.SEQ_2_SEQ_LM
@@ -89,4 +26,5 @@ model = get_peft_model(model, lora_config)
 model.print_trainable_parameters()
-print("READY ✔ LoRA + Quantized model loaded")

 import torch
 from transformers import T5ForConditionalGeneration, T5Tokenizer
 from peft import LoraConfig, get_peft_model, TaskType
 device = "mps" if torch.backends.mps.is_available() else "cpu"
+MODEL_PATH = "../outputs/model"   # your supervised trained model
+print("Loading base model...")
+model = T5ForConditionalGeneration.from_pretrained(MODEL_PATH).to(device)
+tokenizer = T5Tokenizer.from_pretrained("t5-small")
 # ---------------- LoRA CONFIG ----------------
 lora_config = LoraConfig(
+    r=8,                       # rank (small brain attachment)
     lora_alpha=16,
+    target_modules=["q", "v"], # attention matrices only
     lora_dropout=0.05,
     bias="none",
     task_type=TaskType.SEQ_2_SEQ_LM
 model.print_trainable_parameters()
+print("READY ✔ LoRA model loaded")

src/quantization_utils.py DELETED Viewed

@@ -1,222 +0,0 @@
-from __future__ import annotations
-import json
-import os
-import time
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Any, Dict, Optional, Tuple
-import torch
-from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
-try:
-    from transformers import BitsAndBytesConfig  # type: ignore
-except Exception:  # pragma: no cover
-    BitsAndBytesConfig = None  # type: ignore
-try:
-    from peft import PeftModel
-except Exception as e:  # pragma: no cover
-    PeftModel = None  # type: ignore
-@dataclass(frozen=True)
-class QuantArtifact:
-    out_dir: Path
-    mode: str  # fp32 | int8_dynamic | int8_decoder_dynamic | int8_bnb | int4_bnb
-    base_model: str
-    adapter_path: Optional[str]
-    created_at_s: float
-def _bool_env(name: str, default: str = "0") -> bool:
-    return os.environ.get(name, default).strip() in {"1", "true", "True", "yes", "Y"}
-def estimate_model_bytes(model: torch.nn.Module) -> int:
-    total = 0
-    for p in model.parameters():
-        total += p.numel() * p.element_size()
-    for b in model.buffers():
-        total += b.numel() * b.element_size()
-    return int(total)
-def _load_tokenizer(base_model: str, *, local_only: bool) -> Any:
-    tok = AutoTokenizer.from_pretrained(base_model, local_files_only=local_only)
-    if tok.pad_token_id is None and getattr(tok, "eos_token_id", None) is not None:
-        tok.pad_token = tok.eos_token
-    return tok
-def load_fp32_model(
-    base_model: str,
-    *,
-    adapter_path: Optional[str] = None,
-    device: str = "cpu",
-    local_only: bool = True,
-    torch_dtype: torch.dtype = torch.float32,
-    merge_lora: bool = True,
-) -> Tuple[Any, torch.nn.Module]:
-    tok = _load_tokenizer(base_model, local_only=local_only)
-    model = AutoModelForSeq2SeqLM.from_pretrained(
-        base_model,
-        local_files_only=local_only,
-        torch_dtype=torch_dtype,
-    ).to(device)
-    if adapter_path:
-        if PeftModel is None:
-            raise RuntimeError("peft is required to load adapters.")
-        model = PeftModel.from_pretrained(model, adapter_path).to(device)
-        if merge_lora and hasattr(model, "merge_and_unload"):
-            model = model.merge_and_unload()
-            model = model.to(device)
-    model.eval()
-    return tok, model
-def quantize_dynamic_int8(model: torch.nn.Module) -> torch.nn.Module:
-    # CPU-only; quantized kernels run on CPU.
-    # Ensure a quantization engine is selected (PyTorch may default to "none" on macOS).
-    try:
-        supported = list(getattr(torch.backends.quantized, "supported_engines", []))
-        current = getattr(torch.backends.quantized, "engine", "none")
-        if current in {"none", None, ""}:
-            if "fbgemm" in supported:
-                torch.backends.quantized.engine = "fbgemm"
-            elif "qnnpack" in supported:
-                torch.backends.quantized.engine = "qnnpack"
-    except Exception:  # pragma: no cover
-        pass
-    return torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)
-def quantize_dynamic_int8_decoder_only(model: Any) -> Any:
-    """
-    Mixed-precision (Task 5): encoder fp32, decoder int8 dynamic quantized.
-    """
-    if not hasattr(model, "decoder"):
-        raise ValueError("Model has no decoder attribute.")
-    try:
-        supported = list(getattr(torch.backends.quantized, "supported_engines", []))
-        current = getattr(torch.backends.quantized, "engine", "none")
-        if current in {"none", None, ""}:
-            if "fbgemm" in supported:
-                torch.backends.quantized.engine = "fbgemm"
-            elif "qnnpack" in supported:
-                torch.backends.quantized.engine = "qnnpack"
-    except Exception:  # pragma: no cover
-        pass
-    model.decoder = torch.quantization.quantize_dynamic(model.decoder, {torch.nn.Linear}, dtype=torch.qint8)
-    return model
-def load_bnb_quantized_model(
-    base_model: str,
-    *,
-    adapter_path: Optional[str],
-    device: str,
-    local_only: bool,
-    load_in_8bit: bool = False,
-    load_in_4bit: bool = False,
-) -> Tuple[Any, torch.nn.Module]:
-    """
-    bitsandbytes int8/int4 (requires bitsandbytes + CUDA). Not supported on CPU/MPS.
-    """
-    if BitsAndBytesConfig is None:
-        raise RuntimeError("transformers BitsAndBytesConfig not available; upgrade transformers or install extras.")
-    if device != "cuda":
-        raise RuntimeError("bitsandbytes quantization requires CUDA (device=cuda).")
-    if not (load_in_8bit or load_in_4bit):
-        raise ValueError("Specify load_in_8bit or load_in_4bit.")
-    tok = _load_tokenizer(base_model, local_only=local_only)
-    qconf = BitsAndBytesConfig(load_in_8bit=load_in_8bit, load_in_4bit=load_in_4bit)
-    model = AutoModelForSeq2SeqLM.from_pretrained(
-        base_model,
-        local_files_only=local_only,
-        quantization_config=qconf,
-        device_map="auto",
-    )
-    if adapter_path:
-        if PeftModel is None:
-            raise RuntimeError("peft is required to load adapters.")
-        model = PeftModel.from_pretrained(model, adapter_path)
-    model.eval()
-    return tok, model
-def save_quant_artifact(
-    out_dir: str | Path,
-    *,
-    mode: str,
-    base_model: str,
-    adapter_path: Optional[str],
-    tokenizer: Any,
-    model: torch.nn.Module,
-) -> QuantArtifact:
-    out = Path(out_dir)
-    out.mkdir(parents=True, exist_ok=True)
-    (out / "tokenizer").mkdir(exist_ok=True)
-    tokenizer.save_pretrained(out / "tokenizer")
-    torch.save(model.state_dict(), out / "model.pt")
-    meta: Dict[str, Any] = {
-        "mode": mode,
-        "base_model": base_model,
-        "adapter_path": adapter_path,
-        "created_at_s": time.time(),
-        "estimated_model_bytes": estimate_model_bytes(model),
-    }
-    (out / "meta.json").write_text(json.dumps(meta, indent=2))
-    return QuantArtifact(
-        out_dir=out,
-        mode=mode,
-        base_model=base_model,
-        adapter_path=adapter_path,
-        created_at_s=float(meta["created_at_s"]),
-    )
-def load_quant_artifact(
-    artifact_dir: str | Path,
-    *,
-    device: str = "cpu",
-    local_only: bool = True,
-) -> Tuple[Any, torch.nn.Module, Dict[str, Any]]:
-    """
-    Loads a previously exported quant artifact.
-    For dynamic quant modes, we reconstruct the architecture, apply the same quantization,
-    then load the saved state_dict.
-    """
-    adir = Path(artifact_dir)
-    meta = json.loads((adir / "meta.json").read_text())
-    mode = meta["mode"]
-    base_model = meta["base_model"]
-    tok = AutoTokenizer.from_pretrained(adir / "tokenizer", local_files_only=True)
-    if tok.pad_token_id is None and getattr(tok, "eos_token_id", None) is not None:
-        tok.pad_token = tok.eos_token
-    model = AutoModelForSeq2SeqLM.from_pretrained(base_model, local_files_only=local_only).to(device)
-    model.eval()
-    if mode == "int8_dynamic":
-        model = quantize_dynamic_int8(model)
-    elif mode == "int8_decoder_dynamic":
-        model = quantize_dynamic_int8_decoder_only(model)
-    elif mode in {"fp32"}:
-        pass
-    else:
-        raise RuntimeError(f"Unsupported artifact mode for local loading: {mode}")
-    state = torch.load(adir / "model.pt", map_location=device)
-    model.load_state_dict(state, strict=False)
-    model.to(device)
-    model.eval()
-    return tok, model, meta

src/quantized_text2sql_engine.py DELETED Viewed

@@ -1,243 +0,0 @@
-from __future__ import annotations
-import os
-import sqlite3
-import threading
-import time
-from concurrent.futures import ThreadPoolExecutor, as_completed
-from collections import OrderedDict
-from pathlib import Path
-from typing import Any, Dict, List, Sequence, Tuple
-import torch
-from src.quantization_utils import load_quant_artifact
-from src.schema_encoder import SchemaEncoder
-from src.sql_validator import validate_sql_schema
-# ==========================================
-# RELATIVE PATH RESOLUTION (GLOBAL)
-# ==========================================
-PROJECT_ROOT = Path(__file__).resolve().parent.parent
-if (PROJECT_ROOT / "data" / "database").exists():
-    DB_ROOT = PROJECT_ROOT / "data" / "database"
-else:
-    DB_ROOT = PROJECT_ROOT / "final_databases"
-class QuantizedText2SQLEngine:
-    def __init__(
-        self,
-        artifact_dir: str,
-        *,
-        device: str = "cpu",
-        use_constrained: bool = False,
-        exec_workers: int | None = None,
-        default_timeout_s: float = 2.0,
-        use_cache: bool = True,
-        cache_max_entries: int = 50_000,
-    ):
-        self.device = device
-        self.use_constrained = bool(use_constrained)
-        self.tokenizer, self.model, self.meta = load_quant_artifact(artifact_dir, device=device, local_only=True)
-        self.schema_encoder = SchemaEncoder(DB_ROOT)
-        if exec_workers is None:
-            exec_workers = int(os.environ.get("SQL_EXEC_WORKERS", "8"))
-        self.exec_pool = ThreadPoolExecutor(max_workers=int(exec_workers))
-        self.default_timeout_s = float(default_timeout_s)
-        self.use_cache = bool(use_cache)
-        self.cache_max_entries = int(cache_max_entries)
-        self._cache: "OrderedDict[tuple[str, str], tuple[list, list]]" = OrderedDict()
-        self._cache_lock = threading.Lock()
-        self._stats_lock = threading.Lock()
-        self._exec_cache_hits = 0
-        self._exec_cache_misses = 0
-        self._exec_calls = 0
-        self._tls = threading.local()
-    def _get_db_path(self, db_id: str) -> str:
-        """Smart resolver for flat vs nested database folders"""
-        path1 = DB_ROOT / db_id / f"{db_id}.sqlite"
-        path2 = DB_ROOT / f"{db_id}.sqlite"
-        return str(path1) if path1.exists() else str(path2)
-    def build_prompt(self, question: str, db_id: str) -> str:
-        schema = self.schema_encoder.structured_schema(db_id)
-        return (
-            "You are a SQLite expert.\n\n"
-            f"Database: {db_id}\n\n"
-            "Schema:\n"
-            f"{schema}\n\n"
-            "Question:\n"
-            f"{question}\n\n"
-            "SQL:"
-        )
-    def generate_sql_batch(
-        self,
-        pairs: Sequence[Tuple[str, str]],
-        *,
-        max_new_tokens: int = 120,
-        num_beams: int = 8,
-        repetition_penalty: float = 1.2,
-    ) -> List[str]:
-        prompts = [self.build_prompt(q, db_id) for q, db_id in pairs]
-        if self.use_constrained:
-            from transformers.generation.logits_process import LogitsProcessorList
-            from src.constrained_decoding import SchemaConstrainedLogitsProcessor
-            sqls: List[str] = []
-            for (q, db_id), prompt in zip(pairs, prompts):
-                db_path = self._get_db_path(db_id)
-                enc = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(self.device)
-                proc = LogitsProcessorList([SchemaConstrainedLogitsProcessor(self.tokenizer, db_path)])
-                out = self.model.generate(
-                    **enc,
-                    max_new_tokens=int(max_new_tokens),
-                    num_beams=int(num_beams),
-                    repetition_penalty=float(repetition_penalty),
-                    logits_processor=proc,
-                )
-                sqls.append(self.tokenizer.decode(out[0], skip_special_tokens=True).strip())
-            return sqls
-        enc = self.tokenizer(prompts, return_tensors="pt", padding=True, truncation=True, max_length=512).to(self.device)
-        out = self.model.generate(
-            **enc,
-            max_new_tokens=int(max_new_tokens),
-            num_beams=int(num_beams),
-            repetition_penalty=float(repetition_penalty),
-        )
-        return [self.tokenizer.decode(x, skip_special_tokens=True).strip() for x in out]
-    def _get_thread_conn(self, db_path: str) -> sqlite3.Connection:
-        conns = getattr(self._tls, "conns", None)
-        if conns is None:
-            conns = {}
-            self._tls.conns = conns
-        conn = conns.get(db_path)
-        if conn is None:
-            conn = sqlite3.connect(db_path)
-            conn.text_factory = lambda b: b.decode(errors="ignore")
-            conns[db_path] = conn
-        return conn
-    def _cache_get(self, key: tuple[str, str]) -> tuple[list, list] | None:
-        if not self.use_cache: return None
-        with self._cache_lock:
-            hit = self._cache.get(key)
-            if hit is None: return None
-            self._cache.move_to_end(key)
-            return hit
-    def _cache_put(self, key: tuple[str, str], value: tuple[list, list]) -> None:
-        if not self.use_cache: return
-        with self._cache_lock:
-            self._cache[key] = value
-            self._cache.move_to_end(key)
-            while len(self._cache) > self.cache_max_entries:
-                self._cache.popitem(last=False)
-    def _execute_one(self, sql: str, db_path: str, timeout_s: float | None = None):
-        timeout_s = float(self.default_timeout_s if timeout_s is None else timeout_s)
-        key = (db_path, sql)
-        cached = self._cache_get(key)
-        with self._stats_lock: self._exec_calls += 1
-        if cached is not None:
-            with self._stats_lock: self._exec_cache_hits += 1
-            return cached
-        with self._stats_lock: self._exec_cache_misses += 1
-        conn = self._get_thread_conn(db_path)
-        start_t = time.monotonic()
-        def handler():
-            return 1 if (time.monotonic() - start_t) > timeout_s else 0
-        conn.set_progress_handler(handler, 10_000)
-        cur = conn.cursor()
-        cur.execute(sql)
-        rows = cur.fetchall()
-        cols = [d[0] for d in cur.description] if cur.description else []
-        out = (rows, cols)
-        self._cache_put(key, out)
-        return out
-    def stats(self) -> Dict[str, Any]:
-        with self._stats_lock:
-            calls, hits, misses = int(self._exec_calls), int(self._exec_cache_hits), int(self._exec_cache_misses)
-        hit_rate = (hits / calls) if calls else 0.0
-        return {
-            "exec_calls": calls, "exec_cache_hits": hits, "exec_cache_misses": misses,
-            "exec_cache_hit_rate": float(hit_rate), "use_cache": bool(self.use_cache),
-            "exec_workers": int(getattr(self.exec_pool, "_max_workers", 0) or 0),
-        }
-    def reset_stats(self) -> None:
-        with self._stats_lock:
-            self._exec_calls = self._exec_cache_hits = self._exec_cache_misses = 0
-    def execute_sql(self, sql: str, db_id: str, *, timeout_s: float | None = None, validate_schema: bool = True):
-        db_path = self._get_db_path(db_id)
-        if validate_schema:
-            try: ok, _ = validate_sql_schema(sql, db_path)
-            except Exception: ok = False
-            if not ok: raise ValueError("Invalid schema")
-        return self._execute_one(sql, db_path, timeout_s=timeout_s)
-    def ask(
-        self,
-        question: str,
-        db_id: str,
-        *,
-        max_new_tokens: int = 120,
-        num_beams: int = 8,
-        repetition_penalty: float = 1.2,
-        timeout_s: float | None = None,
-    ) -> Dict[str, Any]:
-        sql = self.generate_sql_batch(
-            [(question, db_id)],
-            max_new_tokens=max_new_tokens,
-            num_beams=num_beams,
-            repetition_penalty=repetition_penalty,
-        )[0]
-        db_path = self._get_db_path(db_id)
-        try: ok, _ = validate_sql_schema(sql, db_path)
-        except Exception: ok = False
-        if not ok: return {"sql": sql, "rows": [], "columns": [], "error": "Invalid schema"}
-        try:
-            rows, cols = self._execute_one(sql, db_path, timeout_s=timeout_s)
-            return {"sql": sql, "rows": rows, "columns": cols, "error": None}
-        except Exception as e:
-            return {"sql": sql, "rows": [], "columns": [], "error": str(e)}
-    def ask_batch_execute(self, pairs: Sequence[Tuple[str, str]]) -> List[Dict[str, Any]]:
-        sqls = self.generate_sql_batch(pairs)
-        results: List[Dict[str, Any]] = []
-        futures = {}
-        for (q, db_id), sql in zip(pairs, sqls):
-            db_path = self._get_db_path(db_id)
-            futures[self.exec_pool.submit(self._execute_one, sql, db_path)] = (sql, db_id)
-        for fut in as_completed(futures):
-            sql, db_id = futures[fut]
-            try:
-                rows, cols = fut.result()
-                results.append({"db_id": db_id, "sql": sql, "rows": rows, "columns": cols, "error": None})
-            except Exception as e:
-                results.append({"db_id": db_id, "sql": sql, "rows": [], "columns": [], "error": str(e)})
-        return results

src/schema_encoder.py CHANGED Viewed

@@ -1,38 +1,54 @@
 import sqlite3
-from pathlib import Path
 class SchemaEncoder:
     def __init__(self, db_root):
-        self.db_root = Path(db_root)
-    def _get_db_path(self, db_id: str) -> Path:
-        # Check standard Spider format (subfolder)
-        path1 = self.db_root / db_id / f"{db_id}.sqlite"
-        # Check flat format (no subfolder)
-        path2 = self.db_root / f"{db_id}.sqlite"
-        if path1.exists():
-            return path1
-        if path2.exists():
-            return path2
-        raise FileNotFoundError(f"unable to open database file. Looked in:\n1. {path1}\n2. {path2}")
-    def structured_schema(self, db_id: str) -> str:
-        db_path = self._get_db_path(db_id)
-        conn = sqlite3.connect(str(db_path))
-        cur = conn.cursor()
-        # Get all tables
-        cur.execute("SELECT name FROM sqlite_master WHERE type='table';")
-        tables = [r[0] for r in cur.fetchall() if r[0] != "sqlite_sequence"]
-        schema_str = ""
-        for table in tables:
-            cur.execute(f"PRAGMA table_info(`{table}`);")
-            cols = [c[1] for c in cur.fetchall()]
-            schema_str += f"{table} ({', '.join(cols)})\n"
         conn.close()
-        return schema_str.strip()

 import sqlite3
 class SchemaEncoder:
     def __init__(self, db_root):
+        self.db_root = db_root
+    def get_tables_and_columns(self, db_id):
+        # FIXED PATH
+        db_path = self.db_root / f"{db_id}.sqlite"
+        conn = sqlite3.connect(db_path)
+        cursor = conn.cursor()
+        tables = cursor.execute(
+            "SELECT name FROM sqlite_master WHERE type='table';"
+        ).fetchall()
+        schema = {}
+        for (table,) in tables:
+            cols = cursor.execute(f"PRAGMA table_info({table});").fetchall()
+            col_names = [c[1] for c in cols]
+            schema[table] = col_names
         conn.close()
+        return schema
+    # -----------------------------------
+    # Strategy 1: Structured
+    # -----------------------------------
+    def structured_schema(self, db_id):
+        schema = self.get_tables_and_columns(db_id)
+        lines = []
+        for table, cols in schema.items():
+            lines.append(f"{table}({', '.join(cols)})")
+        return "\n".join(lines)
+    # -----------------------------------
+    # Strategy 2: Natural Language
+    # -----------------------------------
+    def natural_language_schema(self, db_id):
+        schema = self.get_tables_and_columns(db_id)
+        lines = []
+        for table, cols in schema.items():
+            col_text = ", ".join(cols)
+            lines.append(f"The table '{table}' contains the columns: {col_text}.")
+        return "\n".join(lines)

src/schema_utils.py DELETED Viewed

@@ -1,222 +0,0 @@
-# import os
-# import sqlite3
-# import threading
-# from typing import Dict, List, Set, Tuple
-# def get_schema(db_path):
-#     schema_map = get_table_to_columns(db_path)
-#     schema_text = ""
-#     for table, col_names in schema_map.items():
-#         schema_text += f"{table}({', '.join(col_names)})\n"
-#     return schema_text
-# _SCHEMA_LOCK = threading.Lock()
-# _SCHEMA_CACHE: Dict[str, Tuple[str, Dict[str, List[str]]]] = {}
-# def _db_state_fingerprint(db_path: str) -> str:
-#     try:
-#         st = os.stat(db_path)
-#         return f"{st.st_mtime_ns}:{st.st_size}"
-#     except OSError:
-#         return "missing"
-# def _connect_readonly(db_path: str) -> sqlite3.Connection:
-#     uri = f"file:{os.path.abspath(db_path)}?mode=ro"
-#     conn = sqlite3.connect(uri, uri=True, check_same_thread=False)
-#     conn.execute("PRAGMA query_only = ON;")
-#     conn.execute("PRAGMA foreign_keys = ON;")
-#     return conn
-# def get_table_to_columns(db_path: str) -> Dict[str, List[str]]:
-#     """
-#     Return mapping of table -> column names for the SQLite DB at db_path.
-#     Tables and columns are returned lowercased.
-#     """
-#     fp = _db_state_fingerprint(db_path)
-#     with _SCHEMA_LOCK:
-#         cached = _SCHEMA_CACHE.get(db_path)
-#         if cached is not None and cached[0] == fp:
-#             return cached[1]
-#     schema: Dict[str, List[str]] = {}
-#     with _connect_readonly(db_path) as conn:
-#         cur = conn.execute(
-#             "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';"
-#         )
-#         tables = [r[0] for r in cur.fetchall() if r and isinstance(r[0], str)]
-#         for table in tables:
-#             table_l = table.lower()
-#             try:
-#                 cur = conn.execute(f'PRAGMA table_info("{table}")')
-#                 cols = [row[1].lower() for row in cur.fetchall() if row and isinstance(row[1], str)]
-#                 schema[table_l] = cols
-#             except sqlite3.Error:
-#                 schema[table_l] = []
-#     with _SCHEMA_LOCK:
-#         _SCHEMA_CACHE[db_path] = (fp, schema)
-#     return schema
-# def get_db_tables_and_columns(db_path: str) -> Tuple[Set[str], Set[str]]:
-#     schema = get_table_to_columns(db_path)
-#     tables = set(schema.keys())
-#     columns: Set[str] = set()
-#     for cols in schema.values():
-#         columns.update(cols)
-#     return tables, columns
-import os
-import sqlite3
-import threading
-from typing import Dict, List, Set, Tuple
-# ===============================
-# 🔥 SCHEMA TEXT (for prompting)
-# ===============================
-def get_schema(db_path: str) -> str:
-    schema_map = get_table_to_columns(db_path)
-    schema_text = ""
-    for table, col_names in schema_map.items():
-        schema_text += f"{table}({', '.join(col_names)})\n"
-    return schema_text
-# ===============================
-# 🔥 CACHE + LOCK
-# ===============================
-_SCHEMA_LOCK = threading.Lock()
-_SCHEMA_CACHE: Dict[str, Tuple[str, Dict[str, List[str]]]] = {}
-def _db_state_fingerprint(db_path: str) -> str:
-    try:
-        st = os.stat(db_path)
-        return f"{st.st_mtime_ns}:{st.st_size}"
-    except OSError:
-        return "missing"
-def _connect_readonly(db_path: str) -> sqlite3.Connection:
-    uri = f"file:{os.path.abspath(db_path)}?mode=ro"
-    conn = sqlite3.connect(uri, uri=True, check_same_thread=False)
-    conn.execute("PRAGMA query_only = ON;")
-    conn.execute("PRAGMA foreign_keys = ON;")
-    return conn
-# ===============================
-# 🔥 CORE: TABLE → COLUMNS
-# ===============================
-def get_table_to_columns(db_path: str) -> Dict[str, List[str]]:
-    """
-    Return mapping of table -> column names (ONLY names, no types).
-    """
-    fp = _db_state_fingerprint(db_path)
-    with _SCHEMA_LOCK:
-        cached = _SCHEMA_CACHE.get(db_path)
-        if cached is not None and cached[0] == fp:
-            return cached[1]
-    schema: Dict[str, List[str]] = {}
-    with _connect_readonly(db_path) as conn:
-        cur = conn.execute(
-            "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';"
-        )
-        tables = [r[0] for r in cur.fetchall() if r and isinstance(r[0], str)]
-        for table in tables:
-            table_l = table.lower()
-            try:
-                cur = conn.execute(f'PRAGMA table_info("{table}")')
-                cols = []
-                for row in cur.fetchall():
-                    col_name = row[1].lower()
-                    cols.append(col_name)
-                schema[table_l] = list(set(cols))  # remove duplicates
-            except sqlite3.Error:
-                schema[table_l] = []
-    with _SCHEMA_LOCK:
-        _SCHEMA_CACHE[db_path] = (fp, schema)
-    return schema
-# ===============================
-# 🔥 TABLE + COLUMN SETS
-# ===============================
-def get_db_tables_and_columns(db_path: str) -> Tuple[Set[str], Set[str]]:
-    schema = get_table_to_columns(db_path)
-    tables = set(schema.keys())
-    columns: Set[str] = set()
-    for cols in schema.values():
-        columns.update(cols)
-    return tables, columns
-# ===============================
-# 🔥 FOREIGN KEYS (IMPORTANT)
-# ===============================
-def get_foreign_keys(db_path: str) -> List[Tuple[str, str, str, str]]:
-    """
-    Returns list of foreign key relations:
-    (table, column, ref_table, ref_column)
-    """
-    fks = []
-    with _connect_readonly(db_path) as conn:
-        cur = conn.execute(
-            "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';"
-        )
-        tables = [r[0] for r in cur.fetchall()]
-        for table in tables:
-            try:
-                cur = conn.execute(f'PRAGMA foreign_key_list("{table}")')
-                for row in cur.fetchall():
-                    fks.append((
-                        table.lower(),
-                        row[3].lower(),  # column
-                        row[2].lower(),  # referenced table
-                        row[4].lower()   # referenced column
-                    ))
-            except sqlite3.Error:
-                continue
-    return fks
-# ===============================
-# 🔥 FINAL: CONSTRAINT GRAPH
-# ===============================
-def get_constraint_graph(db_path: str):
-    """
-    Build schema constraint graph:
-    - tables
-    - columns
-    - foreign key relations
-    """
-    tables, columns = get_db_tables_and_columns(db_path)
-    fks = get_foreign_keys(db_path)
-    return {
-        "tables": tables,
-        "columns": columns,
-        "foreign_keys": fks
-    }

src/sql_validator.py CHANGED Viewed

@@ -1,209 +1,6 @@
-# import re
-# from pathlib import Path
-# from typing import Optional, Set, Tuple
-# from schema_utils import get_db_tables_and_columns, get_table_to_columns
-# class SQLValidator:
-#     def __init__(self, db_root):
-#         self.db_root = Path(db_root)
-#     # ---------------------------
-#     # Load schema
-#     # ---------------------------
-#     def load_schema(self, db_id):
-#         db_path = self.db_root / db_id / f"{db_id}.sqlite"
-#         return get_table_to_columns(str(db_path))
-#     # ---------------------------
-#     # Basic syntax check
-#     # ---------------------------
-#     def basic_structure_valid(self, sql):
-#         s = sql.lower()
-#         if "select" not in s or "from" not in s:
-#             return False, "Missing SELECT or FROM"
-#         if len(s.split()) < 4:
-#             return False, "Too short to be SQL"
-#         return True, None
-#     # ---------------------------
-#     # Extract identifiers
-#     # ---------------------------
-#     def extract_identifiers(self, sql):
-#         tokens = re.findall(r"[A-Za-z_]+", sql.lower())
-#         return set(tokens)
-#     # ---------------------------
-#     # Table validation
-#     # ---------------------------
-#     def validate_tables(self, sql, schema):
-#         words = self.extract_identifiers(sql)
-#         tables = set(schema.keys())
-#         used_tables = [w for w in words if w in tables]
-#         if not used_tables:
-#             return False, "No valid table used"
-#         return True, None
-#     # ---------------------------
-#     # Column validation
-#     # ---------------------------
-#     def validate_columns(self, sql, schema):
-#         words = self.extract_identifiers(sql)
-#         valid_columns = set()
-#         for cols in schema.values():
-#             valid_columns.update(cols)
-#         # ignore SQL keywords
-#         keywords = {
-#             "select","from","where","join","on","group","by",
-#             "order","limit","count","sum","avg","min","max",
-#             "and","or","in","like","distinct","asc","desc"
-#         }
-#         invalid = []
-#         for w in words:
-#             if w not in valid_columns and w not in schema and w not in keywords:
-#                 if not w.isdigit():
-#                     invalid.append(w)
-#         # allow small hallucinations but block many
-#         if len(invalid) > 3:
-#             return False, f"Too many unknown identifiers: {invalid[:5]}"
-#         return True, None
-#     # ---------------------------
-#     # Dangerous query protection
-#     # ---------------------------
-#     def block_dangerous(self, sql):
-#         bad = ["drop", "delete", "update", "insert", "alter"]
-#         s = sql.lower()
-#         for b in bad:
-#             if b in s:
-#                 return False, f"Dangerous keyword detected: {b}"
-#         return True, None
-#     # ---------------------------
-#     # Main validation
-#     # ---------------------------
-#     def validate(self, sql, db_id):
-#         schema = self.load_schema(db_id)
-#         checks = [
-#             self.block_dangerous(sql),
-#             self.basic_structure_valid(sql),
-#             self.validate_tables(sql, schema),
-#             self.validate_columns(sql, schema),
-#         ]
-#         for ok, msg in checks:
-#             if not ok:
-#                 return False, msg
-#         return True, None
-# _VALIDATION_CACHE = {}
-# _VALIDATION_CACHE_MAX = 100_000
-# def _db_state_fingerprint(db_path: str) -> str:
-#     try:
-#         st = Path(db_path).stat()
-#         return f"{st.st_mtime_ns}:{st.st_size}"
-#     except OSError:
-#         return "missing"
-# def _extract_referenced_tables(sql: str) -> Set[str]:
-#     # Best-effort: FROM/JOIN targets (unquoted identifiers).
-#     tokens = re.findall(r"\b(from|join)\s+([a-zA-Z_][\w$]*)", sql, flags=re.I)
-#     return {t[1].lower() for t in tokens if t and len(t) > 1}
-# def validate_sql_schema(sql: str, db_path: str) -> Tuple[bool, Optional[str]]:
-#     """
-#     Strict schema validation for reward computation.
-#     - References must resolve to real tables/columns in the target DB.
-#     - Returns (ok, message). On failure, message is a short reason.
-#     """
-#     fp = _db_state_fingerprint(db_path)
-#     key = f"{fp}|{sql}"
-#     cached = _VALIDATION_CACHE.get(key)
-#     if cached is not None:
-#         return cached
-#     valid_tables, valid_columns = get_db_tables_and_columns(db_path)
-#     referenced_tables = _extract_referenced_tables(sql)
-#     unknown_tables = sorted(t for t in referenced_tables if t not in valid_tables)
-#     if unknown_tables:
-#         out = (False, f"Unknown table(s): {unknown_tables[:5]}")
-#         if len(_VALIDATION_CACHE) >= _VALIDATION_CACHE_MAX:
-#             _VALIDATION_CACHE.clear()
-#         _VALIDATION_CACHE[key] = out
-#         return out
-#     # Column-level correctness is hard to do reliably with regex alone; rely on SQLite compilation.
-#     # This does not execute the query, but will fail for unknown tables/columns.
-#     try:
-#         import sqlite3  # local import to keep module lightweight
-#         uri = f"file:{Path(db_path).resolve()}?mode=ro"
-#         conn = sqlite3.connect(uri, uri=True, check_same_thread=False)
-#         try:
-#             conn.execute("PRAGMA query_only = ON;")
-#             conn.execute("PRAGMA foreign_keys = ON;")
-#             conn.execute(f"EXPLAIN QUERY PLAN {sql}")
-#         finally:
-#             conn.close()
-#     except Exception as e:
-#         msg = str(e).lower()
-#         if "no such table" in msg:
-#             out = (False, "Unknown table")
-#         elif "no such column" in msg:
-#             out = (False, "Unknown column")
-#         else:
-#             out = (False, "Schema validation failed")
-#         if len(_VALIDATION_CACHE) >= _VALIDATION_CACHE_MAX:
-#             _VALIDATION_CACHE.clear()
-#         _VALIDATION_CACHE[key] = out
-#         return out
-#     out = (True, None)
-#     if len(_VALIDATION_CACHE) >= _VALIDATION_CACHE_MAX:
-#         _VALIDATION_CACHE.clear()
-#     _VALIDATION_CACHE[key] = out
-#     return out
 import re
 from pathlib import Path
-from typing import Optional, Set, Tuple, Dict, List
-from src.schema_utils import get_db_tables_and_columns, get_table_to_columns, get_constraint_graph
 class SQLValidator:
@@ -215,7 +12,23 @@ class SQLValidator:
     # ---------------------------
     def load_schema(self, db_id):
         db_path = self.db_root / db_id / f"{db_id}.sqlite"
-        return get_table_to_columns(str(db_path))
     # ---------------------------
     # Basic syntax check
@@ -231,13 +44,15 @@ class SQLValidator:
         return True, None
     # ---------------------------
     # Extract identifiers
     # ---------------------------
     def extract_identifiers(self, sql):
-        tokens = re.findall(r"[A-Za-z_][A-Za-z0-9_]*", sql.lower())
         return set(tokens)
     # ---------------------------
     # Table validation
     # ---------------------------
@@ -252,6 +67,7 @@ class SQLValidator:
         return True, None
     # ---------------------------
     # Column validation
     # ---------------------------
@@ -262,29 +78,26 @@ class SQLValidator:
         for cols in schema.values():
             valid_columns.update(cols)
         keywords = {
             "select","from","where","join","on","group","by",
             "order","limit","count","sum","avg","min","max",
-            "and","or","in","like","distinct","asc","desc",
-            "having","as","inner","left","right","outer"
         }
         invalid = []
         for w in words:
-            if (
-                w not in valid_columns
-                and w not in schema
-                and w not in keywords
-                and not w.isdigit()
-            ):
-                invalid.append(w)
-        # stricter than before
-        if len(invalid) > 2:
-            return False, f"Unknown identifiers: {invalid[:5]}"
         return True, None
     # ---------------------------
     # Dangerous query protection
     # ---------------------------
@@ -298,18 +111,6 @@ class SQLValidator:
         return True, None
-    # ---------------------------
-    # FK-aware JOIN validation (NEW 🔥)
-    # ---------------------------
-    def validate_joins(self, db_id):
-        db_path = self.db_root / db_id / f"{db_id}.sqlite"
-        graph = get_constraint_graph(str(db_path))
-        # not strict enforcement, just check FK existence
-        if len(graph["foreign_keys"]) == 0:
-            return True, None
-        return True, None  # placeholder (safe for now)
     # ---------------------------
     # Main validation
@@ -330,86 +131,3 @@ class SQLValidator:
                 return False, msg
         return True, None
-# ===============================
-# 🔥 FAST SCHEMA VALIDATION (REWARD)
-# ===============================
-_VALIDATION_CACHE = {}
-_VALIDATION_CACHE_MAX = 100_000
-def _db_state_fingerprint(db_path: str) -> str:
-    try:
-        st = Path(db_path).stat()
-        return f"{st.st_mtime_ns}:{st.st_size}"
-    except OSError:
-        return "missing"
-def _extract_referenced_tables(sql: str) -> Set[str]:
-    tokens = re.findall(r"\b(from|join)\s+([a-zA-Z_][\w$]*)", sql, flags=re.I)
-    return {t[1].lower() for t in tokens if t and len(t) > 1}
-def validate_sql_schema(sql: str, db_path: str) -> Tuple[bool, Optional[str]]:
-    """
-    STRICT schema validation (Task 3 core)
-    """
-    fp = _db_state_fingerprint(db_path)
-    key = f"{fp}|{sql}"
-    cached = _VALIDATION_CACHE.get(key)
-    if cached is not None:
-        return cached
-    valid_tables, valid_columns = get_db_tables_and_columns(db_path)
-    # ---------------------------
-    # Table validation
-    # ---------------------------
-    referenced_tables = _extract_referenced_tables(sql)
-    unknown_tables = [t for t in referenced_tables if t not in valid_tables]
-    if unknown_tables:
-        out = (False, f"Unknown table(s): {unknown_tables[:3]}")
-        _VALIDATION_CACHE[key] = out
-        return out
-    # ---------------------------
-    # Column validation via SQLite planner
-    # ---------------------------
-    try:
-        import sqlite3
-        uri = f"file:{Path(db_path).resolve()}?mode=ro"
-        conn = sqlite3.connect(uri, uri=True, check_same_thread=False)
-        try:
-            conn.execute("PRAGMA query_only = ON;")
-            conn.execute("PRAGMA foreign_keys = ON;")
-            # 🔥 Key idea: no execution, only planning
-            conn.execute(f"EXPLAIN QUERY PLAN {sql}")
-        finally:
-            conn.close()
-    except Exception as e:
-        msg = str(e).lower()
-        if "no such table" in msg:
-            out = (False, "Unknown table")
-        elif "no such column" in msg:
-            out = (False, "Unknown column")
-        else:
-            out = (False, "Invalid SQL")
-        _VALIDATION_CACHE[key] = out
-        return out
-    out = (True, None)
-    _VALIDATION_CACHE[key] = out
-    return out

+import sqlite3
 import re
 from pathlib import Path
 class SQLValidator:
     # ---------------------------
     def load_schema(self, db_id):
         db_path = self.db_root / db_id / f"{db_id}.sqlite"
+        conn = sqlite3.connect(db_path)
+        cursor = conn.cursor()
+        tables = cursor.execute(
+            "SELECT name FROM sqlite_master WHERE type='table';"
+        ).fetchall()
+        schema = {}
+        for (table,) in tables:
+            cols = cursor.execute(f"PRAGMA table_info({table});").fetchall()
+            schema[table.lower()] = [c[1].lower() for c in cols]
+        conn.close()
+        return schema
     # ---------------------------
     # Basic syntax check
         return True, None
     # ---------------------------
     # Extract identifiers
     # ---------------------------
     def extract_identifiers(self, sql):
+        tokens = re.findall(r"[A-Za-z_]+", sql.lower())
         return set(tokens)
     # ---------------------------
     # Table validation
     # ---------------------------
         return True, None
     # ---------------------------
     # Column validation
     # ---------------------------
         for cols in schema.values():
             valid_columns.update(cols)
+        # ignore SQL keywords
         keywords = {
             "select","from","where","join","on","group","by",
             "order","limit","count","sum","avg","min","max",
+            "and","or","in","like","distinct","asc","desc"
         }
         invalid = []
         for w in words:
+            if w not in valid_columns and w not in schema and w not in keywords:
+                if not w.isdigit():
+                    invalid.append(w)
+        # allow small hallucinations but block many
+        if len(invalid) > 3:
+            return False, f"Too many unknown identifiers: {invalid[:5]}"
         return True, None
     # ---------------------------
     # Dangerous query protection
     # ---------------------------
         return True, None
     # ---------------------------
     # Main validation
                 return False, msg
         return True, None

src/text2sql_engine.py CHANGED Viewed

@@ -1,223 +1,3 @@
-# import sqlite3
-# import torch
-# import re
-# import time
-# from pathlib import Path
-# from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
-# from peft import PeftModel
-# from src.sql_validator import SQLValidator
-# from src.schema_encoder import SchemaEncoder
-# PROJECT_ROOT = Path(__file__).resolve().parents[1]
-# # ================================
-# # DATABASE PATH AUTO DETECTION
-# # ================================
-# if (PROJECT_ROOT / "data/database").exists():
-#     DB_ROOT = PROJECT_ROOT / "data/database"
-# else:
-#     DB_ROOT = PROJECT_ROOT / "final_databases"
-# def normalize_question(q: str):
-#     q = q.lower().strip()
-#     q = re.sub(r"distinct\s+(\d+)", r"\1 distinct", q)
-#     q = re.sub(r"\s+", " ", q)
-#     return q
-# def semantic_fix(question, sql):
-#     q = question.lower().strip()
-#     s = sql.lower()
-#     num_match = re.search(r'\b(?:show|list|top|limit|get|first|last)\s+(\d+)\b', q)
-#     if num_match and "limit" not in s and "count(" not in s:
-#         limit_val = num_match.group(1)
-#         sql = sql.rstrip(";")
-#         sql = f"{sql.strip()} LIMIT {limit_val}"
-#     return sql
-# class Text2SQLEngine:
-#     def __init__(self,
-#                  adapter_path=None,
-#                  base_model_name="Salesforce/codet5-base",
-#                  use_lora=True):
-#         self.device = "mps" if torch.backends.mps.is_available() else (
-#             "cuda" if torch.cuda.is_available() else "cpu"
-#         )
-#         self.validator = SQLValidator(DB_ROOT)
-#         self.schema_encoder = SchemaEncoder(DB_ROOT)
-#         self.dml_keywords = r'\b(delete|update|insert|drop|alter|truncate)\b'
-#         print("Loading base model...")
-#         base = AutoModelForSeq2SeqLM.from_pretrained(base_model_name)
-#         if not use_lora:
-#             self.tokenizer = AutoTokenizer.from_pretrained(base_model_name)
-#             self.model = base.to(self.device)
-#             self.model.eval()
-#             return
-#         if (PROJECT_ROOT / "checkpoints/best_rlhf_model").exists():
-#             adapter_path = PROJECT_ROOT / "checkpoints/best_rlhf_model"
-#         else:
-#             adapter_path = PROJECT_ROOT / "best_rlhf_model"
-#         adapter_path = adapter_path.resolve()
-#         print("Loading tokenizer and LoRA adapter...")
-#         try:
-#             self.tokenizer = AutoTokenizer.from_pretrained(
-#                 str(adapter_path),
-#                 local_files_only=True
-#             )
-#         except Exception:
-#             self.tokenizer = AutoTokenizer.from_pretrained(base_model_name)
-#         self.model = PeftModel.from_pretrained(base, str(adapter_path)).to(self.device)
-#         self.model.eval()
-#         print("✅ RLHF model ready\n")
-#     def build_prompt(self, question, schema):
-#         return f"""You are an expert SQL generator.
-# Database schema:
-# {schema}
-# Generate a valid SQLite query for the question.
-# Question:
-# {question}
-# SQL:
-# """
-#     def get_schema(self, db_id):
-#         return self.schema_encoder.structured_schema(db_id)
-#     def extract_sql(self, text: str):
-#         text = text.strip()
-#         if "SQL:" in text:
-#             text = text.split("SQL:")[-1]
-#         match = re.search(r"select[\s\S]*", text, re.IGNORECASE)
-#         if match:
-#             text = match.group(0)
-#         return text.split(";")[0].strip()
-#     def clean_sql(self, sql: str):
-#         sql = sql.replace('"', "'")
-#         sql = re.sub(r"\s+", " ", sql)
-#         return sql.strip()
-#     def generate_sql(self, prompt):
-#         inputs = self.tokenizer(
-#             prompt,
-#             return_tensors="pt",
-#             truncation=True,
-#             max_length=512
-#         ).to(self.device)
-#         with torch.no_grad():
-#             outputs = self.model.generate(
-#                 **inputs,
-#                 max_new_tokens=128,
-#                 num_beams=5,
-#                 early_stopping=True
-#             )
-#         decoded = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-#         return self.clean_sql(self.extract_sql(decoded))
-#     def execute_sql(self, question, sql, db_id):
-#         if re.search(self.dml_keywords, sql, re.IGNORECASE):
-#             return sql, [], [], "❌ Security Alert"
-#         # FIXED DATABASE PATH
-#         db_path = DB_ROOT / f"{db_id}.sqlite"
-#         sql = self.clean_sql(sql)
-#         sql = semantic_fix(question, sql)
-#         try:
-#             conn = sqlite3.connect(db_path)
-#             cursor = conn.cursor()
-#             cursor.execute(sql)
-#             rows = cursor.fetchall()
-#             columns = [d[0] for d in cursor.description] if cursor.description else []
-#             conn.close()
-#             return sql, columns, rows, None
-#         except Exception as e:
-#             return sql, [], [], str(e)
-#     def ask(self, question, db_id):
-#         question = normalize_question(question)
-#         if re.search(self.dml_keywords, question, re.IGNORECASE):
-#             return {
-#                 "question": question,
-#                 "sql": "-- BLOCKED",
-#                 "columns": [],
-#                 "rows": [],
-#                 "error": "Malicious prompt"
-#             }
-#         schema = self.get_schema(db_id)
-#         prompt = self.build_prompt(question, schema)
-#         raw_sql = self.generate_sql(prompt)
-#         final_sql, cols, rows, error = self.execute_sql(question, raw_sql, db_id)
-#         return {
-#             "question": question,
-#             "sql": final_sql,
-#             "columns": cols,
-#             "rows": rows,
-#             "error": error
-#         }
-# _engine = None
-# def get_engine():
-#     global _engine
-#     if _engine is None:
-#         _engine = Text2SQLEngine()
-#     return _engine
 import sqlite3
 import torch
 import re
@@ -226,7 +6,7 @@ from pathlib import Path
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 from peft import PeftModel
 from src.sql_validator import SQLValidator
-from src.schema_encoder import SchemaEncoder, build_schema_graph # Added build_schema_graph
 PROJECT_ROOT = Path(__file__).resolve().parents[1]
@@ -239,92 +19,6 @@ else:
     DB_ROOT = PROJECT_ROOT / "final_databases"
-# ==========================================
-# INPUT VALIDATION & RELEVANCE (From Code 1)
-# ==========================================
-def is_valid_question(q: str):
-    """Extremely relaxed valid question checker. As long as there is 1 word, it passes."""
-    words = re.findall(r"[a-zA-Z0-9]+", q)
-    return len(words) >= 1
-def is_relevant_to_db(question: str, schema_graph: dict):
-    """
-    Lexical heuristic to block completely out-of-domain questions
-    while allowing valid plurals.
-    """
-    q_words = set(re.findall(r'\b[a-z]{3,}\b', question.lower()))
-    stop_words = {"show", "list", "all", "and", "the", "get", "find", "how", "many", "what", "where", "which", "who", "give", "display", "count", "from", "for", "with", "that", "have", "has", "are", "there"}
-    q_words = q_words - stop_words
-    if not q_words:
-        return True
-    schema_words = set()
-    for table, cols in schema_graph.items():
-        schema_words.update(re.findall(r'\b[a-z]{3,}\b', table.lower()))
-        for col in cols:
-            schema_words.update(re.findall(r'\b[a-z]{3,}\b', col.lower()))
-    synonyms = {
-        "customer": ["client", "buyer", "shopper", "person", "people", "user"],
-        "employee": ["staff", "worker", "boss", "manager", "person", "people"],
-        "track": ["song", "music", "audio", "tune"],
-        "album": ["record", "cd", "music"],
-        "artist": ["singer", "band", "musician", "creator"],
-        "invoice": ["bill", "receipt", "purchase", "sale", "order", "buy", "bought", "cost"],
-        "city": ["town", "location", "place"],
-        "country": ["nation", "location", "place"],
-        "flight": ["plane", "airline", "trip", "fly", "airport"],
-        "student": ["pupil", "learner", "kid", "child"],
-        "club": ["group", "organization", "team"],
-        "course": ["class", "subject"],
-        "cinema": ["movie", "film", "theater", "screen"]
-    }
-    extended_schema_words = set(schema_words)
-    for db_word in schema_words:
-        if db_word in synonyms:
-            extended_schema_words.update(synonyms[db_word])
-    extended_schema_words.update({"id", "name", "total", "sum", "average", "avg", "min", "max", "number", "amount", "record", "data", "info", "information", "detail", "first", "last", "most", "least", "cheapest", "expensive", "best"})
-    # Check if the word OR its singular form is in the schema
-    for qw in q_words:
-        qw_singular = qw[:-1] if qw.endswith('s') else qw
-        if qw in extended_schema_words or qw_singular in extended_schema_words:
-            return True
-    return False
-# ==========================================
-# SCHEMA CONSTRAINTS (From Code 1)
-# ==========================================
-def apply_schema_constraints(sql, schema_graph):
-    sql = sql.lower()
-    used_tables = [t[1] for t in re.findall(r'(from|join)\s+(\w+)', sql)]
-    for t in used_tables:
-        if t not in schema_graph:
-            return None
-    valid_columns = set()
-    for cols in schema_graph.values():
-        valid_columns.update(cols)
-    col_blocks = re.findall(r'select\s+(.*?)\s+from', sql)
-    for block in col_blocks:
-        for c in block.split(","):
-            c = c.strip().split()[-1]
-            if "." in c:
-                c = c.split(".")[-1]
-            if c != "*" and "(" not in c and c != "":
-                if c not in valid_columns:
-                    return None
-    return sql
 def normalize_question(q: str):
     q = q.lower().strip()
     q = re.sub(r"distinct\s+(\d+)", r"\1 distinct", q)
@@ -350,8 +44,7 @@ class Text2SQLEngine:
     def __init__(self,
                  adapter_path=None,
                  base_model_name="Salesforce/codet5-base",
-                 use_lora=True,
-                 use_constrained_decoding=True): # Added constrained decoding flag
         self.device = "mps" if torch.backends.mps.is_available() else (
             "cuda" if torch.cuda.is_available() else "cpu"
@@ -359,7 +52,6 @@ class Text2SQLEngine:
         self.validator = SQLValidator(DB_ROOT)
         self.schema_encoder = SchemaEncoder(DB_ROOT)
-        self.use_constrained_decoding = use_constrained_decoding
         self.dml_keywords = r'\b(delete|update|insert|drop|alter|truncate)\b'
@@ -408,20 +100,28 @@ SQL:
         return self.schema_encoder.structured_schema(db_id)
     def extract_sql(self, text: str):
         text = text.strip()
         if "SQL:" in text:
             text = text.split("SQL:")[-1]
         match = re.search(r"select[\s\S]*", text, re.IGNORECASE)
         if match:
             text = match.group(0)
         return text.split(";")[0].strip()
     def clean_sql(self, sql: str):
         sql = sql.replace('"', "'")
         sql = re.sub(r"\s+", " ", sql)
         return sql.strip()
     def generate_sql(self, prompt):
         inputs = self.tokenizer(
             prompt,
             return_tensors="pt",
@@ -430,6 +130,7 @@ SQL:
         ).to(self.device)
         with torch.no_grad():
             outputs = self.model.generate(
                 **inputs,
                 max_new_tokens=128,
@@ -438,85 +139,64 @@ SQL:
             )
         decoded = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
         return self.clean_sql(self.extract_sql(decoded))
     def execute_sql(self, question, sql, db_id):
         if re.search(self.dml_keywords, sql, re.IGNORECASE):
             return sql, [], [], "❌ Security Alert"
-        # FIXED DATABASE PATH (From Code 2)
         db_path = DB_ROOT / f"{db_id}.sqlite"
         sql = self.clean_sql(sql)
         sql = semantic_fix(question, sql)
         try:
             conn = sqlite3.connect(db_path)
             cursor = conn.cursor()
             cursor.execute(sql)
             rows = cursor.fetchall()
             columns = [d[0] for d in cursor.description] if cursor.description else []
             conn.close()
             return sql, columns, rows, None
         except Exception as e:
             return sql, [], [], str(e)
     def ask(self, question, db_id):
-        # 1. Normalize
-        question_norm = normalize_question(question)
-        question_context = f"Database question: {question_norm}"
-        # 2. Block dangerous inputs
-        if re.search(self.dml_keywords, question_context, re.IGNORECASE):
             return {
-                "question": question_norm,
                 "sql": "-- BLOCKED",
                 "columns": [],
                 "rows": [],
-                "error": "❌ Malicious prompt"
             }
-        # 3. Check basic validity of question
-        if not is_valid_question(question_context):
-            return {"sql": "", "error": "❌ Invalid input. Please type words."}
         schema = self.get_schema(db_id)
-        schema_graph = build_schema_graph(schema)
-        # 4. LEXICAL RELEVANCE GUARDRAIL
-        if not is_relevant_to_db(question_norm, schema_graph):
-            return {"sql": "", "error": "❌ Question is completely out of domain for the selected database."}
-        # 5. INITIAL GENERATION
-        prompt = self.build_prompt(question_context, schema)
         raw_sql = self.generate_sql(prompt)
-        # 6. STRONGER CONSTRAINT LOGIC
-        if self.use_constrained_decoding:
-            filtered_sql = apply_schema_constraints(raw_sql, schema_graph)
-            if filtered_sql is None:
-                constraint_prompt = f"""Use ONLY valid schema.
-Database schema:
-{schema}
-Generate a valid SQLite query for the question.
-Question:
-{question_context}
-SQL:
-"""
-                sql_retry = self.generate_sql(constraint_prompt)
-                filtered_sql = apply_schema_constraints(sql_retry, schema_graph)
-                if filtered_sql:
-                    raw_sql = filtered_sql
-                else:
-                    raw_sql = sql_retry
-        # 7. EXECUTION
-        final_sql, cols, rows, error = self.execute_sql(question_norm, raw_sql, db_id)
         return {
-            "question": question_norm,
             "sql": final_sql,
             "columns": cols,
             "rows": rows,
@@ -526,338 +206,12 @@ SQL:
 _engine = None
-def get_engine(use_constrained=True): # Added parameter to control constraints
     global _engine
     if _engine is None:
-        _engine = Text2SQLEngine(use_constrained_decoding=use_constrained)
     return _engine
-# import sqlite3
-# import torch
-# import re
-# import os
-# from pathlib import Path
-# from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
-# from peft import PeftModel
-# from src.sql_validator import SQLValidator
-# from src.schema_encoder import SchemaEncoder  # Removed build_schema_graph import
-# PROJECT_ROOT = Path(__file__).resolve().parents[1]
-# # ================================
-# # DATABASE PATH AUTO DETECTION
-# # ================================
-# if (PROJECT_ROOT / "data/database").exists():
-#     DB_ROOT = PROJECT_ROOT / "data/database"
-# else:
-#     DB_ROOT = PROJECT_ROOT / "final_databases"
-# # ==========================================
-# # SCHEMA PARSING
-# # ==========================================
-# def build_schema_graph(schema_text):
-#     """
-#     Parses a structured schema text string into a dictionary graph.
-#     Matches formats like: table_name(col1, col2, col3)
-#     """
-#     tables = {}
-#     for match in re.findall(r'(\w+)\s*\((.*?)\)', schema_text):
-#         table = match[0]
-#         cols = [c.strip().split()[0] for c in match[1].split(",")]
-#         tables[table] = cols
-#     return tables
-# # ==========================================
-# # INPUT VALIDATION & RELEVANCE
-# # ==========================================
-# def is_valid_question(q: str):
-#     q = q.strip().lower()
-#     if len(q) < 3:
-#         return False
-#     words = re.findall(r"[a-zA-Z]+", q)
-#     if len(words) < 1:
-#         return False
-#     return True
-# def is_relevant_to_db(question: str, schema_graph: dict):
-#     q_words = set(re.findall(r'\b[a-z]{3,}\b', question.lower()))
-#     stop_words = {"show", "list", "all", "and", "the", "get", "find", "how", "many", "what", "where", "which", "who", "give", "display", "count", "from", "for", "with", "that", "have", "has", "are", "there"}
-#     q_words = q_words - stop_words
-#     if not q_words:
-#         return True
-#     schema_words = set()
-#     for table, cols in schema_graph.items():
-#         schema_words.update(re.findall(r'\b[a-z]{3,}\b', table.lower()))
-#         for col in cols:
-#             schema_words.update(re.findall(r'\b[a-z]{3,}\b', col.lower()))
-#     synonyms = {
-#         "customer": ["client", "buyer", "shopper", "person", "people", "user"],
-#         "employee": ["staff", "worker", "boss", "manager", "person", "people"],
-#         "track": ["song", "music", "audio", "tune"],
-#         "album": ["record", "cd", "music"],
-#         "artist": ["singer", "band", "musician", "creator"],
-#         "invoice": ["bill", "receipt", "purchase", "sale", "order", "buy", "bought", "cost"],
-#         "city": ["town", "location", "place"],
-#         "country": ["nation", "location", "place"],
-#         "flight": ["plane", "airline", "trip", "fly", "airport"],
-#         "student": ["pupil", "learner", "kid", "child"],
-#         "club": ["group", "organization", "team"],
-#         "course": ["class", "subject"],
-#         "cinema": ["movie", "film", "theater", "screen"]
-#     }
-#     extended_schema_words = set(schema_words)
-#     for db_word in schema_words:
-#         if db_word in synonyms:
-#             extended_schema_words.update(synonyms[db_word])
-#     extended_schema_words.update({"id", "name", "total", "sum", "average", "avg", "min", "max", "number", "amount", "record", "data", "info", "information", "detail", "first", "last", "most", "least", "cheapest", "expensive", "best"})
-#     for qw in q_words:
-#         qw_singular = qw[:-1] if qw.endswith('s') else qw
-#         if qw in extended_schema_words or qw_singular in extended_schema_words:
-#             return True
-#     return False
-# def normalize_question(q: str):
-#     return re.sub(r"\s+", " ", q.lower().strip())
-# def semantic_fix(question, sql):
-#     q = question.lower()
-#     num_match = re.search(r'\b(?:show|list|top|get)\s+(\d+)\b', q)
-#     if num_match and "limit" not in sql.lower():
-#         sql = f"{sql} LIMIT {num_match.group(1)}"
-#     return sql
-# # ==========================================
-# # SCHEMA CONSTRAINTS (SIMULATED LOGIT BLOCKING)
-# # ==========================================
-# def apply_schema_constraints(sql, schema_graph):
-#     sql = sql.lower()
-#     used_tables = [t[1] for t in re.findall(r'(from|join)\s+(\w+)', sql)]
-#     for t in used_tables:
-#         if t not in schema_graph:
-#             return None
-#     valid_columns = set()
-#     for cols in schema_graph.values():
-#         valid_columns.update(cols)
-#     col_blocks = re.findall(r'select\s+(.*?)\s+from', sql)
-#     for block in col_blocks:
-#         for c in block.split(","):
-#             c = c.strip().split()[-1]
-#             if "." in c:
-#                 c = c.split(".")[-1]
-#             if c != "*" and "(" not in c and c != "":
-#                 if c not in valid_columns:
-#                     return None
-#     return sql
-# # ==========================================
-# # ENGINE
-# # ==========================================
-# class Text2SQLEngine:
-#     def __init__(self,
-#                  adapter_path="checkpoints/best_rlhf_model_2",
-#                  base_model_name="Salesforce/codet5-base",
-#                  use_lora=True,
-#                  use_constrained_decoding=False):
-#         self.device = "mps" if torch.backends.mps.is_available() else (
-#             "cuda" if torch.cuda.is_available() else "cpu"
-#         )
-#         self.validator = SQLValidator(DB_ROOT)
-#         self.schema_encoder = SchemaEncoder(DB_ROOT)
-#         self.use_constrained_decoding = use_constrained_decoding
-#         self.dml_keywords = r'\b(delete|update|insert|drop|alter|truncate|create)\b'
-#         print(f"\n📦 Loading model on {self.device}...")
-#         base = AutoModelForSeq2SeqLM.from_pretrained(base_model_name)
-#         # Override the redundant special tokens to prevent the tokenizer crash
-#         self.tokenizer = AutoTokenizer.from_pretrained(
-#             base_model_name,
-#             use_fast=False,
-#             additional_special_tokens=[]
-#         )
-#         # 🔥 FIXED LOADA ADAPTER PATH LOGIC
-#         if use_lora:
-#             if adapter_path and (PROJECT_ROOT / adapter_path).exists():
-#                 adapter_path = PROJECT_ROOT / adapter_path
-#             elif (PROJECT_ROOT / "checkpoints/best_rlhf_model_2").exists():
-#                 adapter_path = PROJECT_ROOT / "checkpoints/best_rlhf_model_2"
-#             else:
-#                 adapter_path = PROJECT_ROOT / "best_rlhf_model_2"
-#             adapter_path = adapter_path.resolve()
-#             if adapter_path.exists():
-#                 try:
-#                     self.model = PeftModel.from_pretrained(
-#                         base,
-#                         str(adapter_path),
-#                         local_files_only=True
-#                     ).to(self.device)
-#                     print(f"✅ LoRA loaded from {adapter_path}")
-#                 except Exception as e:
-#                     print(f"⚠️ LoRA load failed: {e}")
-#                     self.model = base.to(self.device)
-#             else:
-#                 print(f"⚠️ Adapter not found at {adapter_path}, using base model")
-#                 self.model = base.to(self.device)
-#         else:
-#             self.model = base.to(self.device)
-#         self.model.eval()
-#     def build_prompt(self, question, schema):
-#         return f"""
-# You are an expert SQL generator.
-# IMPORTANT:
-# - Use correct tables and columns
-# - Use JOINs when needed
-# Schema:
-# {schema}
-# Question:
-# {question}
-# SQL:
-# """
-#     def get_schema(self, db_id):
-#         return self.schema_encoder.structured_schema(db_id)
-#     def extract_sql(self, text):
-#         match = re.search(r"(select|with)[\s\S]*", text, re.IGNORECASE)
-#         return match.group(0).split(";")[0].strip() if match else ""
-#     def clean_sql(self, sql):
-#         return re.sub(r"\s+", " ", sql.replace('"', "'")).strip()
-#     def generate_sql(self, prompt):
-#         inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
-#         with torch.no_grad():
-#             outputs = self.model.generate(
-#                 **inputs,
-#                 max_new_tokens=128,
-#                 num_beams=8,
-#                 length_penalty=0.8,
-#                 early_stopping=True
-#             )
-#         decoded = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-#         return self.clean_sql(self.extract_sql(decoded))
-#     def execute_sql(self, question, sql, db_id):
-#         if re.search(self.dml_keywords, sql, re.IGNORECASE):
-#             return "", [], [], "❌ Blocked malicious SQL (Contains INSERT/UPDATE/DELETE/DROP)"
-#         # 🔥 FIXED DATABASE PATH
-#         db_path = DB_ROOT / f"{db_id}.sqlite"
-#         sql = semantic_fix(question, sql)
-#         try:
-#             conn = sqlite3.connect(db_path)
-#             cursor = conn.cursor()
-#             cursor.execute(sql)
-#             rows = cursor.fetchall()
-#             columns = [d[0] for d in cursor.description] if cursor.description else []
-#             conn.close()
-#             return sql, columns, rows, None
-#         except Exception as e:
-#             return sql, [], [], str(e)
-#     def ask(self, question, db_id):
-#         question = normalize_question(question)
-#         question_context = f"Database question: {question}"
-#         if re.search(self.dml_keywords, question_context, re.IGNORECASE):
-#             return {"sql": "", "error": "❌ Blocked dangerous query from input text."}
-#         if not is_valid_question(question_context):
-#             return {"sql": "", "error": "❌ Invalid input. Please type words."}
-#         schema = self.get_schema(db_id)
-#         schema_graph = build_schema_graph(schema)
-#         if not is_relevant_to_db(question, schema_graph):
-#             return {"sql": "", "error": "❌ Question is completely out of domain for the selected database."}
-#         sql = self.generate_sql(self.build_prompt(question_context, schema))
-#         if self.use_constrained_decoding:
-#             filtered_sql = apply_schema_constraints(sql, schema_graph)
-#             if filtered_sql is None:
-#                 constraint_prompt = f"""
-# Use ONLY valid schema.
-# Schema:
-# {schema}
-# Question:
-# {question_context}
-# SQL:
-# """
-#                 sql_retry = self.generate_sql(constraint_prompt)
-#                 filtered_sql = apply_schema_constraints(sql_retry, schema_graph)
-#                 if filtered_sql:
-#                     sql = filtered_sql
-#                 else:
-#                     sql = sql_retry
-#         final_sql, cols, rows, error = self.execute_sql(question_context, sql, db_id)
-#         return {
-#             "question": question_context,
-#             "sql": final_sql,
-#             "columns": cols,
-#             "rows": rows,
-#             "error": error
-#         }
-# def get_engine(
-#     adapter_path="checkpoints/best_rlhf_model_2",
-#     base_model_name="Salesforce/codet5-base",
-#     use_lora=True,
-#     use_constrained=True
-# ):
-#     return Text2SQLEngine(
-#         adapter_path=adapter_path,
-#         base_model_name=base_model_name,
-#         use_lora=use_lora,
-#         use_constrained_decoding=use_constrained
-#     )

 import sqlite3
 import torch
 import re
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 from peft import PeftModel
 from src.sql_validator import SQLValidator
+from src.schema_encoder import SchemaEncoder
 PROJECT_ROOT = Path(__file__).resolve().parents[1]
     DB_ROOT = PROJECT_ROOT / "final_databases"
 def normalize_question(q: str):
     q = q.lower().strip()
     q = re.sub(r"distinct\s+(\d+)", r"\1 distinct", q)
     def __init__(self,
                  adapter_path=None,
                  base_model_name="Salesforce/codet5-base",
+                 use_lora=True):
         self.device = "mps" if torch.backends.mps.is_available() else (
             "cuda" if torch.cuda.is_available() else "cpu"
         self.validator = SQLValidator(DB_ROOT)
         self.schema_encoder = SchemaEncoder(DB_ROOT)
         self.dml_keywords = r'\b(delete|update|insert|drop|alter|truncate)\b'
         return self.schema_encoder.structured_schema(db_id)
     def extract_sql(self, text: str):
         text = text.strip()
         if "SQL:" in text:
             text = text.split("SQL:")[-1]
         match = re.search(r"select[\s\S]*", text, re.IGNORECASE)
         if match:
             text = match.group(0)
         return text.split(";")[0].strip()
     def clean_sql(self, sql: str):
         sql = sql.replace('"', "'")
         sql = re.sub(r"\s+", " ", sql)
         return sql.strip()
     def generate_sql(self, prompt):
         inputs = self.tokenizer(
             prompt,
             return_tensors="pt",
         ).to(self.device)
         with torch.no_grad():
             outputs = self.model.generate(
                 **inputs,
                 max_new_tokens=128,
             )
         decoded = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
         return self.clean_sql(self.extract_sql(decoded))
     def execute_sql(self, question, sql, db_id):
         if re.search(self.dml_keywords, sql, re.IGNORECASE):
             return sql, [], [], "❌ Security Alert"
+        # FIXED DATABASE PATH
         db_path = DB_ROOT / f"{db_id}.sqlite"
         sql = self.clean_sql(sql)
         sql = semantic_fix(question, sql)
         try:
             conn = sqlite3.connect(db_path)
             cursor = conn.cursor()
             cursor.execute(sql)
             rows = cursor.fetchall()
             columns = [d[0] for d in cursor.description] if cursor.description else []
             conn.close()
             return sql, columns, rows, None
         except Exception as e:
             return sql, [], [], str(e)
     def ask(self, question, db_id):
+        question = normalize_question(question)
+        if re.search(self.dml_keywords, question, re.IGNORECASE):
             return {
+                "question": question,
                 "sql": "-- BLOCKED",
                 "columns": [],
                 "rows": [],
+                "error": "Malicious prompt"
             }
         schema = self.get_schema(db_id)
+        prompt = self.build_prompt(question, schema)
         raw_sql = self.generate_sql(prompt)
+        final_sql, cols, rows, error = self.execute_sql(question, raw_sql, db_id)
         return {
+            "question": question,
             "sql": final_sql,
             "columns": cols,
             "rows": rows,
 _engine = None
+def get_engine():
     global _engine
     if _engine is None:
+        _engine = Text2SQLEngine()
     return _engine