Spaces:

tjhalanigrid
/

text2sql-demo

Sleeping

App Files Files Community

tjhalanigrid commited on 22 days ago

Commit

b5ae35c

1 Parent(s): 2c420d1

fix tokenizer folder structure for custom backend

Browse files

Files changed (8) hide show

README.md +6 -5
app.py +427 -337
int8_dynamic/{merges.txt → tokenizer/merges.txt} +0 -0
int8_dynamic/{special_tokens_map.json → tokenizer/special_tokens_map.json} +0 -0
int8_dynamic/{tokenizer.json → tokenizer/tokenizer.json} +0 -0
int8_dynamic/{tokenizer_config.json → tokenizer/tokenizer_config.json} +0 -0
int8_dynamic/{vocab.json → tokenizer/vocab.json} +0 -0
requirements.txt +1 -3

README.md CHANGED Viewed

@@ -1,12 +1,13 @@
 ---
 title: Text2sql Demo
-emoji: 🐨
-colorFrom: yellow
-colorTo: pink
-sdk: streamlit
 app_file: app.py
 pinned: false
 license: mit
 python_version: 3.10.13
-short_description: 'to show the streamlit interface'
 ---

 ---
 title: Text2sql Demo
+emoji: 📊
+colorFrom: blue
+colorTo: green
+sdk: gradio
+sdk_version: 5.8.0
 app_file: app.py
 pinned: false
 license: mit
 python_version: 3.10.13
+short_description: 'Text to SQL with RLHF'
 ---

app.py CHANGED Viewed

@@ -1,4 +1,9 @@
-import streamlit as st
 import pandas as pd
 import re
 import time
@@ -10,11 +15,7 @@ import subprocess
 import base64
 import io
 from pathlib import Path
-# ==========================================
-# PAGE CONFIG
-# ==========================================
-st.set_page_config(page_title="Text-to-SQL RLHF", layout="wide")
 # ==========================================
 # RELATIVE PATH RESOLUTION (GLOBAL)
@@ -39,45 +40,52 @@ def get_db_path(db_id: str) -> str:
 # ==========================================
 if not torch.cuda.is_available():
     class MockCUDAEvent:
-        def __init__(self, enable_timing=False, blocking=False, interprocess=False): self.t = 0.0
-        def record(self, stream=None): self.t = time.perf_counter()
-        def elapsed_time(self, end_event): return (end_event.t - self.t) * 1000.0
     torch.cuda.Event = MockCUDAEvent
     if not hasattr(torch.cuda, 'synchronize'):
         torch.cuda.synchronize = lambda: None
 # ==========================================
-# IMPORTS & GLOBAL STATE
 # ==========================================
 from src.quantized_text2sql_engine import QuantizedText2SQLEngine
 from src.schema_encoder import SchemaEncoder
 DEFAULT_QUANT_ARTIFACT = str(PROJECT_ROOT / "int8_dynamic")
-# Use st.session_state to persist logs across UI reruns safely
-if '_QUERY_LOG' not in st.session_state:
-    st.session_state._QUERY_LOG = []
-    st.session_state._PERF_LOG = []
-    st.session_state._SUCCESS_LOG = []
-    st.session_state._OP_STATS = {
-        "SELECT": {"ok": 0, "fail": 0}, "WHERE": {"ok": 0, "fail": 0}, "JOIN": {"ok": 0, "fail": 0},
-        "GROUP_BY": {"ok": 0, "fail": 0}, "ORDER_BY": {"ok": 0, "fail": 0}, "HAVING": {"ok": 0, "fail": 0}, "LIMIT": {"ok": 0, "fail": 0},
-    }
-# 🚨 LAZY LOADING: Streamlit caches the engine so it only loads ONCE, and ONLY when called.
-@st.cache_resource(show_spinner=False)
-def load_engine_and_schema():
-    engine = None
-    try:
-        engine = QuantizedText2SQLEngine(DEFAULT_QUANT_ARTIFACT, device="cpu", use_constrained=False, exec_workers=8, use_cache=True)
-    except Exception as e:
-        print(f"Failed to load engine: {e}")
-    encoder = SchemaEncoder(DB_ROOT)
-    return engine, encoder
-# ==========================================
-# HELPER FUNCTIONS
-# ==========================================
 SAMPLES = [
     ("Show 10 distinct employee first names.", "chinook_1"), ("Which artist has the most albums?", "chinook_1"),
     ("List all the tracks that belong to the 'Rock' genre.", "chinook_1"), ("What are the names of all the cities?", "flight_1"),
@@ -87,7 +95,6 @@ SAMPLES = [
     ("Show the names of all cinemas.", "cinema"), ("Which cinema has the most screens?", "cinema")
 ]
 SAMPLE_QUESTIONS = [q[0] for q in SAMPLES]
-DBS = sorted(["flight_1", "student_assessment", "store_1", "bike_1", "book_2", "chinook_1", "academic", "aircraft", "car_1", "cinema", "club_1", "csu_1", "college_1", "college_2", "company_1", "company_employee", "customer_complaints", "department_store", "employee_hire_evaluation", "museum_visit", "products_for_hire", "restaurant_1", "school_finance", "shop_membership", "small_bank_1", "student_1", "tvshow", "voter_1", "world_1"])
 def explain_sql(sql):
     if not sql: return ""
@@ -138,7 +145,8 @@ def get_hint(error_type):
     }
     return hints.get(error_type, "Review query.")
-def is_relevant_to_schema(question, db_id, schema_encoder):
     try: raw_schema = schema_encoder.structured_schema(db_id).lower()
     except: return True
     schema_words = set(re.findall(r'[a-z0-9_]+', raw_schema))
@@ -151,319 +159,401 @@ def is_relevant_to_schema(question, db_id, schema_encoder):
         if word in schema_words or singular_word in schema_words: return True
     return False
-def _log(error_type: str, *, question: str, db_id_val: str, sql: str = "", error_msg: str = "") -> None:
-    st.session_state._QUERY_LOG.append({"t": time.time(), "db_id": str(db_id_val), "question": str(question), "sql": str(sql), "error_type": str(error_type), "error_msg": str(error_msg)})
-def _perf_log(payload: dict) -> None:
-    st.session_state._PERF_LOG.append(payload)
-    if len(st.session_state._PERF_LOG) > 1000: del st.session_state._PERF_LOG[:200]
-# ==========================================
-# MAIN UI
-# ==========================================
-st.markdown("""
-    <div style="text-align: center; background-color: #e0e7ff; padding: 20px; border-radius: 10px; margin-bottom: 20px; border: 1px solid #c7d2fe;">
-        <h1 style="color: #3730a3; margin-top: 0; margin-bottom: 10px; font-size: 2.2em;"> Text-to-SQL using RLHF + Execution Reward</h1>
-        <p style="color: #4f46e5; font-size: 1.1em; margin: 0;">Convert Natural Language to SQL, strictly validated and safely executed on local SQLite databases.</p>
-    </div>
-""", unsafe_allow_html=True)
-tab1, tab2 = st.tabs(["Inference", "Diagnostics"])
-with tab1:
-    col1, col2 = st.columns([1, 2])
-    with col1:
-        st.markdown("### 1. Configuration & Input")
-        method = st.radio("How do you want to ask?", ["💡 Pick a Sample", "✍️ Type my own"])
-        if method == "💡 Pick a Sample":
-            sample_q = st.selectbox("Select a Sample Question", SAMPLE_QUESTIONS)
-            db_id = next((db for q, db in SAMPLES if q == sample_q), "chinook_1")
-            st.text_input("Database", value=db_id, disabled=True)
-            custom_q = ""
-        else:
-            db_id = st.selectbox("Select Database", DBS, index=DBS.index("chinook_1"))
-            sample_q = ""
-            custom_q = st.text_area("Ask your Custom Question", placeholder="Type your own question here...", height=100)
-        # Schema Viewer
-        _, schema_encoder = load_engine_and_schema() # Encoder loads instantly
-        st.markdown("#### 📋 Database Structure")
-        st.caption("Use these exact names! Table names are **Dark**, Column names are Light.")
-        with st.container(height=250):
-            try:
-                st.code(schema_encoder.structured_schema(db_id), language="sql")
-            except Exception as e:
-                st.error(f"Error loading schema: {e}")
-        run_btn = st.button("🚀 Generate & Run SQL", type="primary", use_container_width=True)
-    with col2:
-        st.markdown("### 2. Execution Results")
-        sql_placeholder = st.empty()
-        df_placeholder = st.empty()
-        exp_placeholder = st.empty()
-        if run_btn:
-            raw_question = sample_q if method == "💡 Pick a Sample" else custom_q
-            if not raw_question or str(raw_question).strip() == "":
-                exp_placeholder.warning("⚠️ Please enter a question.")
-                st.stop()
-            typo_corrections = [(r'\bshaw\b', 'show'), (r'\bshw\b', 'show'), (r'\bsho\b', 'show'), (r'\blsit\b', 'list'), (r'\blis\b', 'list'), (r'\bfidn\b', 'find'), (r'\bfnd\b', 'find'), (r'\bgte\b', 'get')]
-            question = str(raw_question)
-            for bad, good in typo_corrections: question = re.sub(bad, good, question, flags=re.IGNORECASE)
-            q_lower = question.strip().lower()
-            if len(q_lower.split()) < 2:
-                _log("gibberish", question=question, db_id_val=str(db_id), error_msg="gibberish filtered")
-                exp_placeholder.warning("⚠️ Please enter a clear, meaningful natural language question (more than one word).")
-                st.stop()
-            if re.search(r'\b(delete|update|insert|drop|alter|truncate)\b', q_lower):
-                _log("blocked_dml", question=question, db_id_val=str(db_id), error_msg="DML blocked")
-                exp_placeholder.error("🛑 Security Alert: Modifying or deleting data is strictly prohibited.")
-                st.stop()
-            if not is_relevant_to_schema(question, db_id, schema_encoder):
-                _log("out_of_domain", question=question, db_id_val=str(db_id), error_msg="out of domain")
-                exp_placeholder.error(f"🛑 Relevance Alert: I don't see anything related to your question in the '{db_id}' schema.")
-                st.stop()
-            start_time = time.time()
-            t0 = time.perf_counter()
-            # LAZY LOAD TRIGGER: We only spin up the engine here!
-            with st.spinner("Booting AI Engine & Generating SQL..."):
-                quant_engine, _ = load_engine_and_schema()
-                if quant_engine is None:
-                    exp_placeholder.error("❌ CRITICAL BACKEND CRASH: Quantized engine is not available. Ensure 'int8_dynamic' folder is uploaded.")
-                    st.stop()
-                try:
-                    result = quant_engine.ask(question, str(db_id), num_beams=4, max_new_tokens=120, timeout_s=2.0)
-                except TypeError:
-                    result = quant_engine.ask(question, str(db_id))
-                except Exception as e:
-                    _log("backend_crash", question=question, db_id_val=str(db_id), error_msg=str(e))
-                    exp_placeholder.error(f"❌ CRITICAL BACKEND CRASH:\n{str(e)}")
-                    st.stop()
-            final_sql = str(result.get("sql", ""))
-            model_sql = final_sql
-            # Semantic limit cleaner
-            num_match = re.search(r'\b(?:show|list|top|limit|get|first|last|sample|of)\s+(?:[a-zA-Z_]+\s+)?(\d+)\b', q_lower)
-            if not num_match and q_lower.startswith(("show", "list", "get")):
-                num_match = re.search(r'\b(\d+)\b', q_lower)
-            if num_match and final_sql:
-                limit_val = num_match.group(1)
-                final_sql = re.sub(rf"(?i)\s*(?:where|having|and)?\s*count\s*\(\s*\*\s*\)\s*=\s*{limit_val}", "", final_sql)
-                final_sql = re.sub(rf"(?i)\s*(?:where|and)\s+[a-zA-Z0-9_.]+\s*=\s*['\"]?{limit_val}['\"]?", "", final_sql)
-                final_sql = re.sub(r"(?i)\s*where\s*$", "", final_sql)
-                final_sql = re.sub(r"(?i)\s*where\s+(group by|order by|limit)", r" \1", final_sql)
-                agg_kws = ["most", "top", "highest", "lowest", "count", "many", "group", "frequent", "popular"]
-                if not any(k in q_lower for k in agg_kws):
-                    final_sql = re.sub(r"(?i)\s*group by\s+[a-zA-Z0-9_.]+\s*order by\s+count\(\*\)\s*(?:desc|asc)?", "", final_sql)
-                    final_sql = re.sub(r"(?i)\s*order by\s+count\(\*\)\s*(?:desc|asc)?", "", final_sql)
-                    final_sql = re.sub(r"(?i),\s*count\(\*\)", "", final_sql)
-                    final_sql = re.sub(r"(?i)count\(\*\)\s*,", "", final_sql)
-                if "group by" in final_sql.lower() and not re.search(r'(?i)\b(count|sum|avg|max|min)\b\(', final_sql):
-                    final_sql = re.sub(r"(?i)\s*group by\s+[a-zA-Z0-9_.]+", "", final_sql)
-                if "limit" not in final_sql.lower():
-                    final_sql = f"{final_sql.strip().rstrip(';')} LIMIT {limit_val}"
-            sql_placeholder.code(final_sql, language="sql")
-            # Execution
-            from src.sql_validator import validate_sql_schema
-            db_path = get_db_path(str(db_id))
-            try: strict_valid, _ = validate_sql_schema(final_sql, db_path)
-            except Exception: strict_valid = False
             error_msg = None
-            rows, cols = [], []
-            sqlite_success = False
-            with st.spinner("Executing query..."):
-                try:
-                    rows, cols = quant_engine._execute_one(final_sql, db_path, timeout_s=2.0)
-                    sqlite_success = True
-                except Exception as e:
-                    error_msg = str(e)
-                    sqlite_success = False
-                if not sqlite_success and model_sql and model_sql != final_sql:
-                    try:
-                        alt_rows, alt_cols = quant_engine._execute_one(model_sql, db_path, timeout_s=2.0)
-                        final_sql = model_sql
-                        sql_placeholder.code(final_sql, language="sql")
-                        rows, cols = alt_rows, alt_cols
-                        sqlite_success = True
-                        error_msg = None
-                    except Exception: pass
-            valid = sqlite_success
-            if error_msg or not valid:
-                et = classify_error(final_sql, str(error_msg or ""), timed_out=("interrupted" in str(error_msg or "").lower()))
-                _log(et, question=str(question), db_id_val=str(db_id), sql=str(final_sql), error_msg=str(error_msg or "Execution failed"))
-            latency = round(time.time() - start_time, 3)
-            t1 = time.perf_counter()
-            engine_stats_after = quant_engine.stats() if hasattr(quant_engine, 'stats') else {}
-            perf = {
-                "db_id": str(db_id), "use_constrained_decoding": False, "num_beams": 4,
-                "latency_total_ms": round((t1 - t0) * 1000.0, 2), "constraint_ok": bool(strict_valid), "has_error": bool(error_msg),
-                "exec_cache_hit_rate": float(engine_stats_after.get("exec_cache_hit_rate", 0.0) or 0.0),
-            }
-            _perf_log(perf)
-            window = st.session_state._PERF_LOG[-50:]
-            avg_ms = sum(float(x.get("latency_total_ms", 0.0) or 0.0) for x in window) / len(window) if window else 0.0
-            constraint_rate = sum(1 for x in window if x.get("constraint_ok")) / len(window) if window else 0.0
-            perf_block = (
-                f"\n\n---\n**Performance (task impact)**\n"
-                f"- Total latency (ms): {perf['latency_total_ms']}\n"
-                f"- Strict Python Validator OK (Task 3): {perf['constraint_ok']}\n"
-                f"- Exec cache hit-rate (Task 1/5): {round(perf['exec_cache_hit_rate'], 3)}\n"
-                f"- Rolling avg latency last 50 (ms): {round(avg_ms, 2)}\n"
-                f"- Rolling constraint rate last 50: {round(constraint_rate, 3)}\n"
-            )
-            ops = sql_ops(final_sql)
-            if error_msg or not valid:
-                for op in ops:
-                    if op in st.session_state._OP_STATS: st.session_state._OP_STATS[op]["fail"] += 1
-                error_type = classify_error(final_sql, str(error_msg or ""))
-                explanation = f"❌ Error Details:\n\n{error_msg}\n\nError Type: {error_type}\nHint: {get_hint(error_type)}{perf_block}"
-                exp_placeholder.error(explanation)
-            else:
-                safe_cols = cols if cols else ["Result"]
-                df_placeholder.dataframe(pd.DataFrame(rows, columns=safe_cols), use_container_width=True)
-                for op in ops:
-                    if op in st.session_state._OP_STATS: st.session_state._OP_STATS[op]["ok"] += 1
-                st.session_state._SUCCESS_LOG.append({"t": time.time(), "db_id": str(db_id), "question": question, "sql": final_sql, "ops": ops})
-                explanation = f"✅ Query executed successfully\n\nRows returned: {len(rows)}\nExecution Time: {latency} sec\n\n{explain_sql(final_sql)}{perf_block}"
-                limit_match = re.search(r'LIMIT\s+(\d+)', final_sql, re.IGNORECASE)
-                if limit_match and len(rows) < int(limit_match.group(1)):
-                    explanation += f"\n\nℹ️ Query allowed up to {int(limit_match.group(1))} rows but only {len(rows)} matched."
-                exp_placeholder.info(explanation)
-with tab2:
-    st.markdown("## Diagnostics & Telemetry")
-    with st.expander("Task 1: Parallel Reward Benchmark"):
-        st.markdown("*(Simulates the heavy RLHF training workload by running hundreds of complex SQL queries concurrently to test SQLite multi-threading performance.)*")
-        t1_n = st.number_input("Rollouts (n)", value=20, step=1)
-        t1_workers = st.number_input("Max workers", value=10, step=1)
-        if st.button("Run Task 1 benchmark"):
-            output_container = st.empty()
-            env = os.environ.copy()
-            env["PYTHONPATH"] = str(PROJECT_ROOT) + (os.pathsep + env["PYTHONPATH"] if env.get("PYTHONPATH") else "")
-            env.setdefault("MPLBACKEND", "Agg")
-            env.setdefault("MPLCONFIGDIR", "/tmp/mplconfig")
-            os.makedirs(env["MPLCONFIGDIR"], exist_ok=True)
-            cmd = [sys.executable, "-u", "scripts/benchmark_parallel_reward.py", "--n", str(int(t1_n)), "--max-workers", str(int(t1_workers)), "--skip-profile"]
-            try:
-                proc = subprocess.Popen(cmd, cwd=str(PROJECT_ROOT), env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1)
-                lines = []
-                last_update = time.time()
-                for line in proc.stdout:
-                    lines.append(line)
-                    if time.time() - last_update > 0.5:
-                        output_container.text("".join(lines[-50:]))
-                        last_update = time.time()
-                proc.wait()
-                output_container.text("".join(lines))
-                plot_path = PROJECT_ROOT / "results" / "task1_plot.png"
-                if plot_path.exists():
-                    st.image(str(plot_path))
-                else:
-                    st.write("*No plot generated.*")
-            except Exception as e:
-                output_container.error(f"Failed to run benchmark: {e}")
-    with st.expander("Task 2: Error Dashboard", expanded=True):
-        st.markdown("*(Live telemetry tracking the most common SQL failures.)*")
-        if st.button("Refresh Dashboard"):
-            st.rerun()
-        counts = {}
-        for r in st.session_state._QUERY_LOG[-1000:]:
-            k = r.get("error_type") or "other"
-            counts[k] = counts.get(k, 0) + 1
-        if not counts:
-            st.write("No errors logged yet.")
-        else:
-            rows = [{"error_type": k, "count": int(v), "hint": get_hint(k)} for k, v in sorted(counts.items(), key=lambda x: (-x[1], x[0]))]
-            st.dataframe(pd.DataFrame(rows), use_container_width=True)
-            recent = []
-            for r in st.session_state._QUERY_LOG[-100:]:
-                ts = r.get("t")
-                ts_s = time.strftime("%H:%M:%S", time.localtime(float(ts))) if ts else ""
-                recent.append({"time": ts_s, "db_id": r.get("db_id", ""), "error_type": r.get("error_type", ""), "question": r.get("question", ""), "error_msg": r.get("error_msg", "")})
-            st.dataframe(pd.DataFrame(recent), use_container_width=True)
-            choices = [str(x["error_type"]) for x in rows]
-            if choices:
-                sel_type = st.selectbox("View Examples for Error Type", choices)
-                matches = [r for r in reversed(st.session_state._QUERY_LOG) if (r.get("error_type") or "") == str(sel_type)][:3]
-                st.write(f"**Hint:** {get_hint(sel_type)}")
-                for i, r in enumerate(matches, 1):
-                    st.markdown(f"**Example {i}**\n* **DB:** {r.get('db_id','')}\n* **Q:** {r.get('question','')}\n* **SQL:** `{r.get('sql','')}`\n* **Msg:** {r.get('error_msg','')}")
-    with st.expander("Task 2: Clause Telemetry"):
-        st.markdown("*(Analyzes which specific SQL clauses are most prone to errors.)*")
-        if st.button("Refresh SQL-op stats"):
-            st.rerun()
-        rows = []
-        for op, d in st.session_state._OP_STATS.items():
-            ok, fail = int(d.get("ok", 0)), int(d.get("fail", 0))
-            total = ok + fail
-            rows.append({"op": op, "ok": ok, "fail": fail, "total": total, "success_rate": (ok / total) if total else 0.0})
-        st.dataframe(pd.DataFrame(rows), use_container_width=True)
-        try:
-            import matplotlib.pyplot as plt
-            labels = list(st.session_state._OP_STATS.keys())
-            oks = [int(st.session_state._OP_STATS[k]["ok"]) for k in labels]
-            fails = [int(st.session_state._OP_STATS[k]["fail"]) for k in labels]
-            fig, ax = plt.subplots(figsize=(9, 3.5))
-            x = list(range(len(labels)))
-            ax.bar(x, oks, label="ok", color="#16a34a")
-            ax.bar(x, fails, bottom=oks, label="fail", color="#dc2626")
-            ax.set_xticks(x)
-            ax.set_xticklabels(labels, rotation=30, ha="right")
-            ax.set_title("Success/Failure by SQL operation")
-            ax.legend()
-            fig.tight_layout()
-            st.pyplot(fig)
-        except Exception as e:
-            st.error(f"Plot error: {e}")

+"""
+GRADIO DEMO UI - LAZY LOADING EDITION
+NL → SQL → Result Table
+"""
+import gradio as gr
 import pandas as pd
 import re
 import time
 import base64
 import io
 from pathlib import Path
+from typing import Iterator
 # ==========================================
 # RELATIVE PATH RESOLUTION (GLOBAL)
 # ==========================================
 if not torch.cuda.is_available():
     class MockCUDAEvent:
+        def __init__(self, enable_timing=False, blocking=False, interprocess=False):
+            self.t = 0.0
+        def record(self, stream=None):
+            self.t = time.perf_counter()
+        def elapsed_time(self, end_event):
+            return (end_event.t - self.t) * 1000.0
     torch.cuda.Event = MockCUDAEvent
     if not hasattr(torch.cuda, 'synchronize'):
         torch.cuda.synchronize = lambda: None
 # ==========================================
+# IMPORTS & ENGINE SETUP
 # ==========================================
 from src.quantized_text2sql_engine import QuantizedText2SQLEngine
 from src.schema_encoder import SchemaEncoder
 DEFAULT_QUANT_ARTIFACT = str(PROJECT_ROOT / "int8_dynamic")
+_ENGINE_CACHE = {}
+_QUERY_LOG = []
+_PERF_LOG = []
+_SUCCESS_LOG = []
+_OP_STATS = {
+    "SELECT": {"ok": 0, "fail": 0}, "WHERE": {"ok": 0, "fail": 0}, "JOIN": {"ok": 0, "fail": 0},
+    "GROUP_BY": {"ok": 0, "fail": 0}, "ORDER_BY": {"ok": 0, "fail": 0}, "HAVING": {"ok": 0, "fail": 0}, "LIMIT": {"ok": 0, "fail": 0},
+}
+def get_quant_engine(artifact_dir: str, use_constrained: bool = False, exec_workers: int = 8, use_cache: bool = True):
+    key = (artifact_dir, bool(use_constrained), int(exec_workers), bool(use_cache))
+    if key not in _ENGINE_CACHE:
+        try:
+            _ENGINE_CACHE[key] = QuantizedText2SQLEngine(artifact_dir, device="cpu", use_constrained=bool(use_constrained), exec_workers=int(exec_workers), use_cache=bool(use_cache))
+        except TypeError:
+            _ENGINE_CACHE[key] = QuantizedText2SQLEngine(artifact_dir)
+    return _ENGINE_CACHE[key]
+# 🚨 LAZY LOADING: We DO NOT load the model here! We only load the fast Schema Encoder.
+quant_engine = None
+try:
+    schema_encoder = SchemaEncoder(DB_ROOT)
+except Exception as e:
+    print(f"Warning: SchemaEncoder failed to load: {e}")
+    schema_encoder = None
 SAMPLES = [
     ("Show 10 distinct employee first names.", "chinook_1"), ("Which artist has the most albums?", "chinook_1"),
     ("List all the tracks that belong to the 'Rock' genre.", "chinook_1"), ("What are the names of all the cities?", "flight_1"),
     ("Show the names of all cinemas.", "cinema"), ("Which cinema has the most screens?", "cinema")
 ]
 SAMPLE_QUESTIONS = [q[0] for q in SAMPLES]
 def explain_sql(sql):
     if not sql: return ""
     }
     return hints.get(error_type, "Review query.")
+def is_relevant_to_schema(question, db_id):
+    if schema_encoder is None: return True
     try: raw_schema = schema_encoder.structured_schema(db_id).lower()
     except: return True
     schema_words = set(re.findall(r'[a-z0-9_]+', raw_schema))
         if word in schema_words or singular_word in schema_words: return True
     return False
+def run_query(method, sample_q, custom_q, db_id):
+    global quant_engine
+    # 🚨 LAZY LOADING: We load the heavy AI model ONLY when the button is clicked.
+    if quant_engine is None:
+        print(f"First request detected! Loading AI model from {DEFAULT_QUANT_ARTIFACT}...", flush=True)
+        try:
+            quant_engine = get_quant_engine(DEFAULT_QUANT_ARTIFACT, use_constrained=False, exec_workers=8, use_cache=True)
+            if quant_engine is None:
+                return "-- ❌ ENGINE CRASH", pd.DataFrame(columns=["Error"]), "Failed to load model. Did you move the tokenizer files and add config.json to int8_dynamic/?"
+        except Exception as e:
+            return f"-- ❌ ENGINE CRASH\n-- {str(e)}", pd.DataFrame(columns=["Error Status"]), f"Critical failure loading model: {e}"
+    def _log(error_type: str, *, question: str, db_id_val: str, sql: str = "", error_msg: str = "") -> None:
+        _QUERY_LOG.append({"t": time.time(), "db_id": str(db_id_val), "question": str(question), "sql": str(sql), "error_type": str(error_type), "error_msg": str(error_msg)})
+    def _perf_log(payload: dict) -> None:
+        _PERF_LOG.append(payload)
+        if len(_PERF_LOG) > 1000: del _PERF_LOG[:200]
+    raw_question = sample_q if method == "💡 Pick a Sample" else custom_q
+    if not raw_question or str(raw_question).strip() == "":
+        return "-- No input provided", pd.DataFrame(columns=["Warning"]), "⚠️ Please enter a question."
+    if not db_id or str(db_id).strip() == "":
+        return "-- No database selected", pd.DataFrame(columns=["Warning"]), "⚠️ Please select a database."
+    typo_corrections = [(r'\bshaw\b', 'show'), (r'\bshw\b', 'show'), (r'\bsho\b', 'show'), (r'\blsit\b', 'list'), (r'\blis\b', 'list'), (r'\bfidn\b', 'find'), (r'\bfnd\b', 'find'), (r'\bgte\b', 'get')]
+    question = str(raw_question)
+    for bad, good in typo_corrections: question = re.sub(bad, good, question, flags=re.IGNORECASE)
+    q_lower = question.strip().lower()
+    if len(q_lower.split()) < 2:
+        _log("gibberish", question=question, db_id_val=str(db_id), error_msg="gibberish filtered")
+        return "-- Input Blocked", pd.DataFrame(columns=["Warning"]), "⚠️ Please enter a clear, meaningful natural language question (more than one word)."
+    if re.search(r'\b(delete|update|insert|drop|alter|truncate)\b', q_lower):
+        _log("blocked_dml", question=question, db_id_val=str(db_id), error_msg="DML blocked")
+        return "-- ❌ BLOCKED: Data Modification", pd.DataFrame(columns=["Security Alert"]), "🛑 Security Alert: Modifying or deleting data is strictly prohibited."
+    if not is_relevant_to_schema(question, db_id):
+        _log("out_of_domain", question=question, db_id_val=str(db_id), error_msg="out of domain")
+        return "-- ❌ BLOCKED: Out of Domain", pd.DataFrame(columns=["Domain Alert"]), f"🛑 Relevance Alert: I don't see anything related to your question in the '{db_id}' schema."
+    start_time = time.time()
+    t0 = time.perf_counter()
+    ui_warnings = ""
+    try:
+        try:
+            result = quant_engine.ask(question, str(db_id), num_beams=4, max_new_tokens=120, timeout_s=2.0)
+        except TypeError:
+            result = quant_engine.ask(question, str(db_id))
+    except Exception as e:
+        _log("backend_crash", question=question, db_id_val=str(db_id), error_msg=str(e))
+        return f"-- ❌ BACKEND CRASH\n-- {str(e)}", pd.DataFrame(columns=["Error Status"]), f"❌ CRITICAL BACKEND CRASH:\n{str(e)}"
+    final_sql = str(result.get("sql", ""))
+    model_sql = final_sql
+    num_match = re.search(r'\b(?:show|list|top|limit|get|first|last|sample|of)\s+(?:[a-zA-Z_]+\s+)?(\d+)\b', q_lower)
+    if not num_match and q_lower.startswith(("show", "list", "get")):
+        num_match = re.search(r'\b(\d+)\b', q_lower)
+    if num_match and final_sql:
+        limit_val = num_match.group(1)
+        final_sql = re.sub(rf"(?i)\s*(?:where|having|and)?\s*count\s*\(\s*\*\s*\)\s*=\s*{limit_val}", "", final_sql)
+        final_sql = re.sub(rf"(?i)\s*(?:where|and)\s+[a-zA-Z0-9_.]+\s*=\s*['\"]?{limit_val}['\"]?", "", final_sql)
+        final_sql = re.sub(r"(?i)\s*where\s*$", "", final_sql)
+        final_sql = re.sub(r"(?i)\s*where\s+(group by|order by|limit)", r" \1", final_sql)
+        agg_kws = ["most", "top", "highest", "lowest", "count", "many", "group", "frequent", "popular"]
+        if not any(k in q_lower for k in agg_kws):
+            final_sql = re.sub(r"(?i)\s*group by\s+[a-zA-Z0-9_.]+\s*order by\s+count\(\*\)\s*(?:desc|asc)?", "", final_sql)
+            final_sql = re.sub(r"(?i)\s*order by\s+count\(\*\)\s*(?:desc|asc)?", "", final_sql)
+            final_sql = re.sub(r"(?i),\s*count\(\*\)", "", final_sql)
+            final_sql = re.sub(r"(?i)count\(\*\)\s*,", "", final_sql)
+        if "group by" in final_sql.lower() and not re.search(r'(?i)\b(count|sum|avg|max|min)\b\(', final_sql):
+            final_sql = re.sub(r"(?i)\s*group by\s+[a-zA-Z0-9_.]+", "", final_sql)
+        if "limit" not in final_sql.lower():
+            final_sql = f"{final_sql.strip().rstrip(';')} LIMIT {limit_val}"
+    # Execution
+    from src.sql_validator import validate_sql_schema
+    db_path = get_db_path(str(db_id))
+    try: strict_valid, _ = validate_sql_schema(final_sql, db_path)
+    except Exception: strict_valid = False
+    error_msg = None
+    rows, cols = [], []
+    sqlite_success = False
+    try:
+        rows, cols = quant_engine._execute_one(final_sql, db_path, timeout_s=2.0)
+        sqlite_success = True
+    except Exception as e:
+        error_msg = str(e)
+        sqlite_success = False
+    if not sqlite_success and model_sql and model_sql != final_sql:
+        try:
+            alt_rows, alt_cols = quant_engine._execute_one(model_sql, db_path, timeout_s=2.0)
+            final_sql = model_sql
+            rows, cols = alt_rows, alt_cols
+            sqlite_success = True
             error_msg = None
+        except Exception: pass
+    valid = sqlite_success
+    if error_msg or not valid:
+        et = classify_error(final_sql, str(error_msg or ""), timed_out=("interrupted" in str(error_msg or "").lower()))
+        _log(et, question=str(question), db_id_val=str(db_id), sql=str(final_sql), error_msg=str(error_msg or "Execution failed"))
+    latency = round(time.time() - start_time, 3)
+    t1 = time.perf_counter()
+    engine_stats_after = quant_engine.stats() if hasattr(quant_engine, 'stats') else {}
+    perf = {
+        "db_id": str(db_id), "use_constrained_decoding": False, "num_beams": 4,
+        "latency_total_ms": round((t1 - t0) * 1000.0, 2), "constraint_ok": bool(strict_valid), "has_error": bool(error_msg),
+        "exec_cache_hit_rate": float(engine_stats_after.get("exec_cache_hit_rate", 0.0) or 0.0),
+    }
+    _perf_log(perf)
+    window = _PERF_LOG[-50:]
+    avg_ms = sum(float(x.get("latency_total_ms", 0.0) or 0.0) for x in window) / len(window) if window else 0.0
+    constraint_rate = sum(1 for x in window if x.get("constraint_ok")) / len(window) if window else 0.0
+    perf_block = (
+        "\n\n---\nPerformance (task impact)\n"
+        f"- Total latency (ms): {perf['latency_total_ms']}\n"
+        f"- Strict Python Validator OK (Task 3): {perf['constraint_ok']}\n"
+        f"- Exec cache hit-rate (Task 1/5): {round(perf['exec_cache_hit_rate'], 3)}\n"
+        f"- Rolling avg latency last 50 (ms): {round(avg_ms, 2)}\n"
+        f"- Rolling constraint rate last 50: {round(constraint_rate, 3)}\n"
+    )
+    if error_msg or not valid:
+        display_sql = final_sql if final_sql.strip() else "-- ❌ INVALID SQL"
+        explanation = f"{ui_warnings}❌ Error Details:\n\n"
+        if error_msg: explanation += f"{error_msg}\n\n"
+        error_type = classify_error(final_sql, str(error_msg or ""))
+        explanation += f"Error Type: {error_type}\nHint: {get_hint(error_type)}"
+        explanation += perf_block
+        ops = sql_ops(final_sql)
+        for op in ops:
+            if op in _OP_STATS: _OP_STATS[op]["fail"] += 1
+        return display_sql, pd.DataFrame(columns=["Execution Notice"]), explanation
+    safe_cols = cols if cols else ["Result"]
+    explanation = f"{ui_warnings}✅ Query executed successfully\n\nRows returned: {len(rows)}\nExecution Time: {latency} sec\n\n{explain_sql(final_sql)}{perf_block}"
+    ops = sql_ops(final_sql)
+    for op in ops:
+        if op in _OP_STATS: _OP_STATS[op]["ok"] += 1
+    _SUCCESS_LOG.append({"t": time.time(), "db_id": str(db_id), "question": question, "sql": final_sql, "ops": ops})
+    limit_match = re.search(r'LIMIT\s+(\d+)', final_sql, re.IGNORECASE)
+    if limit_match and len(rows) < int(limit_match.group(1)):
+        explanation += f"\n\nℹ️ Query allowed up to {int(limit_match.group(1))} rows but only {len(rows)} matched."
+    return final_sql, pd.DataFrame(rows, columns=safe_cols), explanation
+def task1_benchmark(n_rollouts: int, max_workers: int) -> Iterator[tuple[str, str]]:
+    project_root = str(PROJECT_ROOT)
+    env = os.environ.copy()
+    env["PYTHONPATH"] = project_root + (os.pathsep + env["PYTHONPATH"] if env.get("PYTHONPATH") else "")
+    env.setdefault("MPLBACKEND", "Agg")
+    env.setdefault("MPLCONFIGDIR", "/tmp/mplconfig")
+    try: os.makedirs(env["MPLCONFIGDIR"], exist_ok=True)
+    except Exception: pass
+    cmd = [sys.executable, "-u", "scripts/benchmark_parallel_reward.py", "--n", str(int(n_rollouts)), "--max-workers", str(int(max_workers)), "--skip-profile"]
+    proc = subprocess.Popen(cmd, cwd=project_root, env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1)
+    last_yield = time.perf_counter()
+    lines: list[str] = []
+    yield "Running Task 1 benchmark...\n", "<i>Running...</i>"
+    assert proc.stdout is not None
+    for line in proc.stdout:
+        lines.append(line)
+        now = time.perf_counter()
+        if now - last_yield >= 0.5:
+            last_yield = now
+            yield "".join(lines[-200:]).strip(), "<i>Running...</i>"
+    proc.wait()
+    out = "".join(lines).strip()
+    plot_path = str(PROJECT_ROOT / "results" / "task1_plot.png")
+    if os.path.exists(plot_path):
+        try:
+            b64 = base64.b64encode(Path(plot_path).read_bytes()).decode("ascii")
+            yield out, f"<img src='data:image/png;base64,{b64}' style='max-width: 100%; border: 1px solid #e2e8f0; border-radius: 8px;' />"
+            return
+        except Exception:
+            yield out, f"<pre>{plot_path}</pre>"
+            return
+    yield out, "<i>No plot generated</i>"
+def task2_dashboard_structured():
+    if not _QUERY_LOG:
+        empty_counts = pd.DataFrame(columns=["error_type", "count", "hint"])
+        empty_recent = pd.DataFrame(columns=["time", "db_id", "error_type", "question", "error_msg"])
+        return empty_counts, empty_recent, gr.update(choices=[], value=None)
+    counts = {}
+    for r in _QUERY_LOG[-1000:]:
+        k = r.get("error_type") or "other"
+        counts[k] = counts.get(k, 0) + 1
+    rows = [{"error_type": k, "count": int(v), "hint": get_hint(k)} for k, v in sorted(counts.items(), key=lambda x: (-x[1], x[0]))]
+    counts_df = pd.DataFrame(rows)
+    recent = []
+    for r in _QUERY_LOG[-100:]:
+        ts = r.get("t")
+        try: ts_s = time.strftime("%H:%M:%S", time.localtime(float(ts))) if ts else ""
+        except Exception: ts_s = ""
+        recent.append({"time": ts_s, "db_id": r.get("db_id", ""), "error_type": r.get("error_type", ""), "question": r.get("question", ""), "error_msg": r.get("error_msg", "")})
+    recent_df = pd.DataFrame(recent)
+    choices = [str(x["error_type"]) for x in rows]
+    default = choices[0] if choices else None
+    return counts_df, recent_df, gr.update(choices=choices, value=default)
+def task2_error_examples(error_type: str) -> str:
+    if not error_type: return ""
+    hint = get_hint(error_type)
+    matches = [r for r in reversed(_QUERY_LOG) if (r.get("error_type") or "") == str(error_type)][:3]
+    if not matches: return f"Error type: {error_type}\nHint: {hint}\n\nNo examples yet."
+    out = [f"Error type: {error_type}", f"Hint: {hint}", ""]
+    for i, r in enumerate(matches, 1):
+        out.extend([f"Example {i}", f"DB: {r.get('db_id','')}", f"Q: {r.get('question','')}", f"SQL: {r.get('sql','')}", f"Msg: {r.get('error_msg','')}", ""])
+    return "\n".join(out).strip()
+def _plot_op_stats_html() -> str:
+    try:
+        import matplotlib.pyplot as plt
+        labels = list(_OP_STATS.keys())
+        oks = [int(_OP_STATS[k]["ok"]) for k in labels]
+        fails = [int(_OP_STATS[k]["fail"]) for k in labels]
+        fig, ax = plt.subplots(figsize=(9, 3.5))
+        x = list(range(len(labels)))
+        ax.bar(x, oks, label="ok", color="#16a34a")
+        ax.bar(x, fails, bottom=oks, label="fail", color="#dc2626")
+        ax.set_xticks(x)
+        ax.set_xticklabels(labels, rotation=30, ha="right")
+        ax.set_title("Success/Failure by SQL operation")
+        ax.legend()
+        fig.tight_layout()
+        buf = io.BytesIO()
+        fig.savefig(buf, format="png", dpi=160)
+        plt.close(fig)
+        b64 = base64.b64encode(buf.getvalue()).decode("ascii")
+        return f"<img src='data:image/png;base64,{b64}' style='max-width: 100%; border: 1px solid #e2e8f0; border-radius: 8px;' />"
+    except Exception as e: return f"<pre>Plot error: {e}</pre>"
+def task2_ops_table():
+    rows = []
+    for op, d in _OP_STATS.items():
+        ok = int(d.get("ok", 0))
+        fail = int(d.get("fail", 0))
+        total = ok + fail
+        rows.append({"op": op, "ok": ok, "fail": fail, "total": total, "success_rate": (ok / total) if total else 0.0})
+    return pd.DataFrame(rows), _plot_op_stats_html()
+def toggle_input_method(method, current_sample):
+    if method == "💡 Pick a Sample":
+        db = next((db for q, db in SAMPLES if q == current_sample), "chinook_1")
+        return (gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(value=db, interactive=False))
+    return (gr.update(visible=False), gr.update(visible=True), gr.update(visible=True), gr.update(interactive=True))
+def load_sample(selected_question):
+    if not selected_question: return gr.update()
+    return gr.update(value=next((db for q, db in SAMPLES if q == selected_question), "chinook_1"))
+def clear_inputs():
+    return (gr.update(value="💡 Pick a Sample"), gr.update(value=SAMPLE_QUESTIONS[0], visible=True), gr.update(visible=False), gr.update(value="", visible=False), gr.update(value="chinook_1", interactive=False), "", pd.DataFrame(), "")
+def update_schema(db_id):
+    if not db_id or schema_encoder is None: return ""
+    try:
+        raw_schema = schema_encoder.structured_schema(db_id)
+        html_output = "<div style='max-height: 250px; overflow-y: auto; background: #f8fafc; padding: 12px; border-radius: 8px; border: 1px solid #e2e8f0; font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace; font-size: 0.9em; line-height: 1.6;'>"
+        for line in raw_schema.strip().split('\n'):
+            line = line.strip()
+            if not line: continue
+            match = re.search(r'^([a-zA-Z0-9_]+)\s*\((.*)\)', line)
+            if match: html_output += f"<div style='margin-bottom: 8px;'><strong style='color: #0f172a; font-size: 1.05em; font-weight: 800;'>{match.group(1).upper()}</strong> <span style='color: #64748b;'>( {match.group(2).lower()} )</span></div>"
+            else: html_output += f"<div style='color: #475569;'>{line}</div>"
+        html_output += "</div>"
+        return html_output
+    except Exception as e: return f"<div style='color: red;'>Error loading schema: {str(e)}</div>"
+# =========================
+# UI LAYOUT
+# =========================
+with gr.Blocks(title="Text-to-SQL RLHF") as demo:
+    gr.HTML("""
+        <div style="text-align: center; background-color: #e0e7ff; padding: 20px; border-radius: 10px; margin-bottom: 20px; border: 1px solid #c7d2fe;">
+            <h1 style="color: #3730a3; margin-top: 0; margin-bottom: 10px; font-size: 2.2em;"> Text-to-SQL using RLHF + Execution Reward</h1>
+            <p style="color: #4f46e5; font-size: 1.1em; margin: 0;">Convert Natural Language to SQL, strictly validated and safely executed on local SQLite databases.</p>
+        </div>
+    """)
+    DBS = sorted(["flight_1", "student_assessment", "store_1", "bike_1", "book_2", "chinook_1", "academic", "aircraft", "car_1", "cinema", "club_1", "csu_1", "college_1", "college_2", "company_1", "company_employee", "customer_complaints", "department_store", "employee_hire_evaluation", "museum_visit", "products_for_hire", "restaurant_1", "school_finance", "shop_membership", "small_bank_1", "student_1", "tvshow", "voter_1", "world_1"])
+    with gr.Tabs():
+        with gr.Tab("Inference"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    gr.Markdown("### 1. Configuration & Input")
+                    input_method = gr.Radio(choices=["💡 Pick a Sample", "✍️ Type my own"], value="💡 Pick a Sample", label="How do you want to ask?")
+                    sample_dropdown = gr.Dropdown(choices=SAMPLE_QUESTIONS, value=SAMPLE_QUESTIONS[0], label="Select a Sample Question", info="The database will be selected automatically.", visible=True)
+                    type_own_warning = gr.Markdown("**⚠️ Please select a Database first, then type your custom question below:**", visible=False)
+                    gr.Markdown("---")
+                    db_id = gr.Dropdown(choices=DBS, value="chinook_1", label="Select Database", interactive=False)
+                    custom_question = gr.Textbox(label="Ask your Custom Question", placeholder="Type your own question here...", lines=3, visible=False)
+                    gr.Markdown("#### 📋 Database Structure")
+                    gr.HTML("<p style='font-size: 0.85em; color: #64748b; margin-top: -10px; margin-bottom: 5px;'>Use these exact names! Table names are <strong>Dark</strong>, Column names are <span style='color: #94a3b8;'>Light</span>.</p>")
+                    schema_display = gr.HTML(value=update_schema("chinook_1"))
+                    with gr.Row():
+                        clear_btn = gr.Button("🗑️ Clear", variant="secondary")
+                        run_btn = gr.Button(" Generate & Run SQL", variant="primary")
+                with gr.Column(scale=2):
+                    gr.Markdown("### 2. Execution Results")
+                    final_sql = gr.Code(language="sql", label="Final Executed SQL")
+                    result_table = gr.Dataframe(label="Query Result Table", interactive=False, wrap=True)
+                    explanation = gr.Textbox(label="AI Explanation + Execution Details", lines=8)
+        with gr.Tab("Diagnostics"):
+            gr.Markdown("## Diagnostics & Telemetry")
+            with gr.Accordion("Task 1: Parallel Reward Benchmark", open=False):
+                gr.Markdown("*(Simulates the heavy RLHF training workload by running hundreds of complex SQL queries concurrently to test SQLite multi-threading performance.)*")
+                t1_n = gr.Number(value=20, precision=0, label="Rollouts (n)")
+                t1_workers = gr.Number(value=10, precision=0, label="Max workers")
+                t1_run = gr.Button("Run Task 1 benchmark")
+                t1_out = gr.Textbox(label="Output", lines=12)
+                t1_plot = gr.HTML(label="Plot (if generated)")
+                t1_run.click(fn=task1_benchmark, inputs=[t1_n, t1_workers], outputs=[t1_out, t1_plot])
+            with gr.Accordion("Task 2: Error Dashboard", open=True):
+                gr.Markdown("*(Live telemetry tracking the most common SQL failures. Populates automatically when queries fail in the Inference tab.)*")
+                t2_refresh = gr.Button("Refresh dashboard")
+                t2_counts = gr.Dataframe(label="Error counts", interactive=False, wrap=True)
+                t2_recent = gr.Dataframe(label="Recent errors", interactive=False, wrap=True)
+                t2_type = gr.Dropdown(choices=[], value=None, label="Select error type")
+                t2_examples = gr.Textbox(label="Examples + hint", lines=10)
+                t2_refresh.click(fn=task2_dashboard_structured, inputs=[], outputs=[t2_counts, t2_recent, t2_type])
+                t2_type.change(fn=task2_error_examples, inputs=[t2_type], outputs=[t2_examples])
+            with gr.Accordion("Task 2: Clause Telemetry", open=False):
+                gr.Markdown("*(Analyzes which specific SQL clauses—SELECT, WHERE, JOIN, etc.—are most prone to errors during natural language generation.)*")
+                t2_ops_refresh = gr.Button("Refresh SQL-op stats")
+                t2_ops_tbl = gr.Dataframe(label="Success/failure by op", interactive=False, wrap=True)
+                t2_ops_plot = gr.HTML(label="Op plot")
+                t2_ops_refresh.click(fn=task2_ops_table, inputs=[], outputs=[t2_ops_tbl, t2_ops_plot])
+    # EVENT BINDING: The .then() forces the diagnostic tab to update live in the background!
+    input_method.change(fn=toggle_input_method, inputs=[input_method, sample_dropdown], outputs=[sample_dropdown, type_own_warning, custom_question, db_id])
+    sample_dropdown.change(fn=load_sample, inputs=[sample_dropdown], outputs=[db_id])
+    db_id.change(fn=update_schema, inputs=[db_id], outputs=[schema_display])
+    run_btn.click(
+        fn=run_query,
+        inputs=[input_method, sample_dropdown, custom_question, db_id],
+        outputs=[final_sql, result_table, explanation]
+    ).then(
+        fn=task2_dashboard_structured, inputs=[], outputs=[t2_counts, t2_recent, t2_type]
+    ).then(
+        fn=task2_ops_table, inputs=[], outputs=[t2_ops_tbl, t2_ops_plot]
+    )
+    clear_btn.click(fn=clear_inputs, inputs=[], outputs=[input_method, sample_dropdown, type_own_warning, custom_question, db_id, final_sql, result_table, explanation])
+if __name__ == "__main__":
+    server_name = os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0")
+    server_port = 7860
+    print(f"Starting Gradio UI on {server_name}:{server_port}...", flush=True)
+    try:
+        demo.launch(server_name=server_name, server_port=server_port, ssr_mode=False)
+    except TypeError:
+        demo.launch(server_name=server_name, server_port=server_port)

int8_dynamic/{merges.txt → tokenizer/merges.txt} RENAMED Viewed

File without changes

int8_dynamic/{special_tokens_map.json → tokenizer/special_tokens_map.json} RENAMED Viewed

File without changes

int8_dynamic/{tokenizer.json → tokenizer/tokenizer.json} RENAMED Viewed

File without changes

int8_dynamic/{tokenizer_config.json → tokenizer/tokenizer_config.json} RENAMED Viewed

File without changes

int8_dynamic/{vocab.json → tokenizer/vocab.json} RENAMED Viewed

File without changes

requirements.txt CHANGED Viewed

@@ -1,5 +1,4 @@
 gradio==5.8.0
-streamlit
 pandas
 sqlparse
 transformers
@@ -8,5 +7,4 @@ peft
 trl
 sentencepiece
 matplotlib
-huggingface_hub

 gradio==5.8.0
 pandas
 sqlparse
 transformers
 trl
 sentencepiece
 matplotlib
+huggingface_hub