Spaces:

nilotpaldhar2004
/

nilotpal-sql-bot

Sleeping

App Files Files Community

nilotpaldhar2004 commited on 19 days ago

Commit

e1f4b42

unverified ·

1 Parent(s): 5170b6a

Update app.py

Browse files

Files changed (1) hide show

app.py +142 -375

app.py CHANGED Viewed

@@ -1,9 +1,7 @@
 """
-Nilotpal SQL Bot — Telegram Bot + Web App
-FastAPI backend serving:
-  - Telegram Bot (standard messages + inline buttons)
-  - Telegram Web App (full HTML/CSS/JS UI via /webapp)
-Model: cssupport/t5-small-awesome-text-to-sql (CPU-friendly)
 """
 import os
@@ -12,411 +10,180 @@ import io
 import json
 import sqlite3
 import tempfile
-import hashlib
 import pandas as pd
-from fastapi import FastAPI, File, UploadFile, HTTPException, Request
 from fastapi.staticfiles import StaticFiles
-from fastapi.responses import FileResponse, JSONResponse, HTMLResponse
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
-import torch
-import httpx
-# ── Config ────────────────────────────────────────────────────────────────────
-MODEL_NAME    = "cssupport/t5-small-awesome-text-to-sql"
-MAX_NEW_TOKENS = 256
-DEVICE         = "cuda" if torch.cuda.is_available() else "cpu"
-BOT_TOKEN      = os.getenv("BOT_TOKEN", "")          # set in HF Space secrets
-WEBHOOK_SECRET = os.getenv("WEBHOOK_SECRET", "nilotpalsqlbot")
-SPACE_URL      = os.getenv("SPACE_URL", "")           # e.g. https://nilotpaldhar2004-nilotpal-sql-bot.hf.space
-TELEGRAM_API   = f"https://api.telegram.org/bot{BOT_TOKEN}"
-# ── Load model ────────────────────────────────────────────────────────────────
-print(f"[INFO] Loading {MODEL_NAME} on {DEVICE}...")
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-model     = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME).to(DEVICE)
-model.eval()
-print("[INFO] Model ready.")
-# ── In-memory stores ──────────────────────────────────────────────────────────
-_db_store:      dict[str, bytes] = {}   # session_id → sqlite bytes
-_schema_store:  dict[str, str]   = {}   # session_id → schema string
-_col_store:     dict[str, list]  = {}   # session_id → column list
-_table_store:   dict[str, str]   = {}   # session_id → table name
-_user_session:  dict[int, str]   = {}   # telegram user_id → session_id
-app = FastAPI(title="Nilotpal SQL Bot", version="1.0.0")
 app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
-app.mount("/static", StaticFiles(directory="static"), name="static")
-# ── Helpers ───────────────────────────────────────────────────────────────────
-def csv_to_sqlite(df: pd.DataFrame, table_name: str) -> bytes:
-    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
-        tmp_path = tmp.name
-    conn = sqlite3.connect(tmp_path)
-    df.to_sql(table_name, conn, if_exists="replace", index=False)
-    conn.close()
-    with open(tmp_path, "rb") as f:
-        db_bytes = f.read()
-    os.unlink(tmp_path)
-    return db_bytes
-def get_schema(db_bytes: bytes) -> str:
-    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
-        tmp.write(db_bytes)
-        tmp_path = tmp.name
-    conn = sqlite3.connect(tmp_path)
-    cur  = conn.cursor()
-    cur.execute("SELECT sql FROM sqlite_master WHERE type='table'")
-    rows = cur.fetchall()
-    conn.close()
-    os.unlink(tmp_path)
-    return "\n".join(r[0] for r in rows if r[0])
-def generate_sql(question: str, schema: str, table_name: str) -> str:
-    quoted = f'"{table_name}"'
     q = question.lower().strip()
-    # ── Rule-based shortcuts (fast + accurate) ────────────────────────────
-    if re.search(r'show.*(first|top).*\d+|first.*\d+.*row|top.*\d+', q):
-        n = re.search(r'\d+', q)
-        return f'SELECT * FROM {quoted} LIMIT {n.group() if n else 10}'
-    if re.search(r'(show|display|get|give).*(first|all).*row|first.*row|show.*row', q):
-        return f'SELECT * FROM {quoted} LIMIT 10'
-    if re.search(r'count.*(total|all|record|row)|total.*(record|row|count)|how many', q):
-        return f'SELECT COUNT(*) FROM {quoted}'
-    if re.search(r'show.*(all|every).*row|all.*row|select all', q):
-        return f'SELECT * FROM {quoted} LIMIT 50'
-    if re.search(r'average|avg', q):
-        col_match = re.findall(r'"(\w+)"', schema)
-        # find numeric-looking column
-        num_col = next((c for c in col_match if re.search(r'num|price|val|amt|count|qty|sal|rev|cost|pm|aqi|no|co|so|o3', c, re.I)), col_match[1] if len(col_match) > 1 else col_match[0])
-        return f'SELECT AVG("{num_col}") FROM {quoted}'
-    if re.search(r'unique|distinct', q):
-        col_match = re.findall(r'"(\w+)"', schema)
-        return f'SELECT COUNT(DISTINCT "{col_match[0]}") FROM {quoted}'
-    if re.search(r'group by', q):
-        col_match = re.findall(r'"(\w+)"', schema)
-        return f'SELECT "{col_match[0]}", COUNT(*) FROM {quoted} GROUP BY "{col_match[0]}"'
-    if re.search(r'max|maximum|highest', q):
-        col_match = re.findall(r'"(\w+)"', schema)
-        num_col = col_match[1] if len(col_match) > 1 else col_match[0]
-        return f'SELECT MAX("{num_col}") FROM {quoted}'
-    if re.search(r'min|minimum|lowest', q):
-        col_match = re.findall(r'"(\w+)"', schema)
-        num_col = col_match[1] if len(col_match) > 1 else col_match[0]
-        return f'SELECT MIN("{num_col}") FROM {quoted}'
-    # ── T5 model fallback ─────────────────────────────────────────────────
-    col_match = re.findall(r'"(\w+)"', schema)
-    col_hint  = ", ".join(col_match)
-    prompt = f"tables:\n{schema}\ncolumns: {col_hint}\nquery for: {question}"
-    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(DEVICE)
-    with torch.no_grad():
-        outputs = model.generate(**inputs, max_new_tokens=MAX_NEW_TOKENS, num_beams=4, early_stopping=True)
-    sql = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
-    sql = re.sub(r'\bFROM\s+("?\w+"?)', f'FROM {quoted}', sql, flags=re.IGNORECASE)
-    sql = re.sub(r'\bJOIN\s+("?\w+"?)', f'JOIN {quoted}', sql, flags=re.IGNORECASE)
-    sql = re.sub(
-        r'(FROM\s+"?\w+"?)\s+(?!WHERE|LIMIT|ORDER|GROUP|HAVING|JOIN|LEFT|RIGHT|INNER|ON|AND|OR|\d)(\w+)',
-        r'\1', sql, flags=re.IGNORECASE
     )
-    if not re.search(r'\bSELECT\b', sql, re.IGNORECASE):
-        sql = f'SELECT * FROM {quoted} LIMIT 10'
     return sql
-def execute_sql(sql: str, db_bytes: bytes) -> list[dict]:
-    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
-        tmp.write(db_bytes)
-        tmp_path = tmp.name
-    conn = sqlite3.connect(tmp_path)
     conn.row_factory = sqlite3.Row
     try:
-        cur  = conn.execute(sql)
-        rows = [dict(r) for r in cur.fetchall()]
     except Exception as e:
-        conn.close(); os.unlink(tmp_path)
-        raise HTTPException(status_code=400, detail=f"SQL error: {e}")
-    conn.close(); os.unlink(tmp_path)
-    return rows
-def format_table(rows: list[dict]) -> str:
-    """Format query results as plain text for Telegram."""
-    if not rows:
-        return "No rows returned."
-    cols = list(rows[0].keys())
-    # Simple text table
-    lines = [" | ".join(cols)]
-    lines.append("-" * len(lines[0]))
-    for r in rows[:20]:
-        lines.append(" | ".join(str(r[c]) if r[c] is not None else "null" for c in cols))
-    if len(rows) > 20:
-        lines.append(f"... ({len(rows)} rows total, showing 20)")
-    return "\n".join(lines)
-# ── Telegram API helpers ───────────────────────────────────────────────────────
-async def tg(method: str, **kwargs):
-    try:
-        async with httpx.AsyncClient(timeout=30) as client:
-            r = await client.post(f"{TELEGRAM_API}/{method}", json=kwargs)
-        return r.json()
-    except Exception as e:
-        print(f"[ERROR] Telegram API call failed ({method}): {e}")
-        return {"ok": False, "error": str(e)}
-async def send_msg(chat_id: int, text: str, reply_markup=None, parse_mode="Markdown"):
-    payload = dict(chat_id=chat_id, text=text, parse_mode=parse_mode)
-    if reply_markup:
-        payload["reply_markup"] = reply_markup
-    return await tg("sendMessage", **payload)
-async def send_doc_request(chat_id: int):
-    """Ask user to send a CSV file."""
-    await send_msg(
-        chat_id,
-        "📂 *Send me a CSV file* to get started!\n\nI'll convert your questions to SQL and query it instantly.",
-        reply_markup={
-            "inline_keyboard": [[
-                {"text": "🌐 Open Web App", "web_app": {"url": f"{SPACE_URL}/webapp"}}
-            ]]
-        }
-    )
-# ── REST: CSV Upload (used by both bot and webapp) ────────────────────────────
 @app.post("/upload")
-async def upload_csv(file: UploadFile = File(...), user_id: int = 0):
-    if not file.filename.endswith(".csv"):
-        raise HTTPException(status_code=400, detail="Only CSV files accepted.")
     contents = await file.read()
-    try:
-        df = pd.read_csv(io.BytesIO(contents))
-    except Exception as e:
-        raise HTTPException(status_code=400, detail=f"CSV parse error: {e}")
-    session_id = hashlib.md5(contents[:1024]).hexdigest()[:12]
-    table_name = re.sub(r"[^a-zA-Z0-9_]", "_", os.path.splitext(file.filename)[0])[:32] or "data"
-    if table_name[0].isdigit():
-        table_name = "t_" + table_name
-    db_bytes = csv_to_sqlite(df, table_name)
-    schema   = get_schema(db_bytes)
-    columns  = list(df.columns)
-    _db_store[session_id]     = db_bytes
     _schema_store[session_id] = schema
-    _col_store[session_id]    = columns
-    _table_store[session_id]  = table_name
-    if user_id:
-        _user_session[user_id] = session_id
-    return JSONResponse({
         "session_id": session_id,
-        "table_name": table_name,
-        "columns":    columns,
-        "row_count":  len(df),
-        "schema":     schema,
-        "preview":    df.head(5).to_dict(orient="records"),
-    })
-# ── REST: Query (used by both bot and webapp) ─────────────────────────────────
-class QueryRequest(BaseModel):
-    session_id: str
-    question:   str
 @app.post("/query")
 async def query(req: QueryRequest):
     if req.session_id not in _db_store:
-        raise HTTPException(status_code=404, detail="Session not found. Upload CSV first.")
-    schema     = _schema_store[req.session_id]
-    table_name = _table_store[req.session_id]
-    sql        = generate_sql(req.question, schema, table_name)
-    results    = execute_sql(sql, _db_store[req.session_id])
-    return JSONResponse({"sql": sql, "results": results})
-# ── Web App route ──────────────────────────────────────────────────────────────
-@app.get("/webapp", response_class=HTMLResponse)
-async def webapp():
-    return FileResponse("static/webapp.html")
 @app.get("/")
-async def root():
-    return FileResponse("static/webapp.html")
-# ── Health ────────────────────────────────────────────────────────────────────
 @app.get("/health")
 def health():
-    return {"status": "ok", "model": MODEL_NAME, "device": DEVICE, "bot": bool(BOT_TOKEN)}
-# ── Telegram Webhook ──────────────────────────────────────────────────────────
-@app.post(f"/webhook/{WEBHOOK_SECRET}")
-async def webhook(request: Request):
-    update = await request.json()
-    # Handle document (CSV upload via bot)
-    msg = update.get("message", {})
-    if not msg:
-        msg = update.get("edited_message", {})
-    chat_id = msg.get("chat", {}).get("id")
-    user_id = msg.get("from", {}).get("id", 0)
-    text    = msg.get("text", "").strip()
-    # ── /start ──
-    if text in ["/start", "/help"]:
-        await send_msg(
-            chat_id,
-            "👋 *Nilotpal SQL Bot*\n\n"
-            "I convert plain English questions into SQL and query your CSV data.\n\n"
-            "📌 *How to use:*\n"
-            "1️⃣ Send a CSV file\n"
-            "2️⃣ Ask me anything about your data\n\n"
-            "Or use the Web App for a richer experience ↓",
-            reply_markup={
-                "inline_keyboard": [[
-                    {"text": "🌐 Open Web App", "web_app": {"url": f"{SPACE_URL}/webapp"}}
-                ]]
-            }
-        )
-        return {"ok": True}
-    # ── CSV Document ──
-    doc = msg.get("document")
-    if doc and doc.get("file_name", "").endswith(".csv"):
-        await send_msg(chat_id, "⏳ Processing your CSV...")
-        # Download file from Telegram
-        file_info = await tg("getFile", file_id=doc["file_id"])
-        file_path = file_info["result"]["file_path"]
-        async with httpx.AsyncClient() as client:
-            file_resp = await client.get(f"https://api.telegram.org/file/bot{BOT_TOKEN}/{file_path}")
-        contents  = file_resp.content
-        try:
-            df = pd.read_csv(io.BytesIO(contents))
-        except Exception as e:
-            await send_msg(chat_id, f"❌ Could not parse CSV: {e}")
-            return {"ok": True}
-        fname      = doc["file_name"]
-        session_id = hashlib.md5(contents[:1024]).hexdigest()[:12]
-        table_name = re.sub(r"[^a-zA-Z0-9_]", "_", os.path.splitext(fname)[0])[:32] or "data"
-        if table_name[0].isdigit():
-            table_name = "t_" + table_name
-        db_bytes = csv_to_sqlite(df, table_name)
-        schema   = get_schema(db_bytes)
-        columns  = list(df.columns)
-        _db_store[session_id]     = db_bytes
-        _schema_store[session_id] = schema
-        _col_store[session_id]    = columns
-        _table_store[session_id]  = table_name
-        _user_session[user_id]    = session_id
-        col_preview = ", ".join(columns[:8]) + ("..." if len(columns) > 8 else "")
-        await send_msg(
-            chat_id,
-            f"✅ *Loaded:* `{fname}`\n"
-            f"📊 *{len(df):,} rows · {len(columns)} columns*\n"
-            f"📋 *Columns:* `{col_preview}`\n\n"
-            f"Now ask me anything about your data!\n"
-            f'Example: _"Show first 10 rows"_',
-            reply_markup={
-                "inline_keyboard": [
-                    [{"text": "📊 Show first 10 rows", "callback_data": f"q:{session_id}:Show the first 10 rows"}],
-                    [{"text": "🔢 Count total records",  "callback_data": f"q:{session_id}:Count total number of records"}],
-                    [{"text": "🌐 Open Web App",         "web_app": {"url": f"{SPACE_URL}/webapp"}}],
-                ]
-            }
-        )
-        return {"ok": True}
-    # ── Text question ──
-    if text and not text.startswith("/"):
-        sid = _user_session.get(user_id)
-        if not sid or sid not in _db_store:
-            await send_msg(
-                chat_id,
-                "📂 Please send a CSV file first so I can query it for you.",
-                reply_markup={
-                    "inline_keyboard": [[
-                        {"text": "🌐 Open Web App", "web_app": {"url": f"{SPACE_URL}/webapp"}}
-                    ]]
-                }
-            )
-            return {"ok": True}
-        await tg("sendChatAction", chat_id=chat_id, action="typing")
-        try:
-            schema     = _schema_store[sid]
-            table_name = _table_store[sid]
-            sql        = generate_sql(text, schema, table_name)
-            results    = execute_sql(sql, _db_store[sid])
-            table_txt  = format_table(results)
-            reply = f"🔍 *Query*\n```sql\n{sql}\n```\n\n📋 *Results*\n```\n{table_txt}\n```"
-        except Exception as e:
-            reply = f"⚠️ Error: {e}"
-        await send_msg(chat_id, reply, parse_mode="Markdown")
-        return {"ok": True}
-    # ── Callback query (button press) ──
-    cb = update.get("callback_query", {})
-    if cb:
-        cb_id   = cb["id"]
-        cb_data = cb.get("data", "")
-        cb_chat = cb["message"]["chat"]["id"]
-        cb_user = cb["from"]["id"]
-        if cb_data.startswith("q:"):
-            _, sid, question = cb_data.split(":", 2)
-            if sid not in _db_store:
-                await tg("answerCallbackQuery", callback_query_id=cb_id, text="Session expired. Re-upload CSV.")
-                return {"ok": True}
-            await tg("answerCallbackQuery", callback_query_id=cb_id, text="Running query...")
-            await tg("sendChatAction", chat_id=cb_chat, action="typing")
-            try:
-                schema     = _schema_store[sid]
-                table_name = _table_store[sid]
-                sql        = generate_sql(question, schema, table_name)
-                results    = execute_sql(sql, _db_store[sid])
-                table_txt  = format_table(results)
-                reply = f"🔍 *Query*\n```sql\n{sql}\n```\n\n📋 *Results*\n```\n{table_txt}\n```"
-            except Exception as e:
-                reply = f"⚠️ Error: {e}"
-            await send_msg(cb_chat, reply, parse_mode="Markdown")
-    return {"ok": True}
-# ── Startup: register webhook ─────────────────────────────────────────────────
-@app.on_event("startup")
-async def set_webhook():
-    if not BOT_TOKEN or not SPACE_URL:
-        print("[WARN] BOT_TOKEN or SPACE_URL not set — webhook skipped.")
-        return
-    url = f"{SPACE_URL}/webhook/{WEBHOOK_SECRET}"
-    for attempt in range(1, 4):
-        try:
-            async with httpx.AsyncClient(timeout=15) as client:
-                r = await client.post(f"{TELEGRAM_API}/setWebhook", json={"url": url})
-            print(f"[INFO] Webhook set: {r.json()}")
-            return
-        except Exception as e:
-            print(f"[WARN] Webhook attempt {attempt}/3 failed: {e}")
-            if attempt < 3:
-                import asyncio; await asyncio.sleep(3)
-    print("[WARN] Webhook registration failed — bot still runs, set webhook manually.")

 """
+QueryMind — CSV-to-SQL Engine (v3.0.0 - Gemini Powered)
+Engine: Gemini 1.5 Flash + Heuristic Rules
+Hardware: HuggingFace Free Tier (Ultra-Light)
 """
 import os
 import json
 import sqlite3
 import tempfile
 import pandas as pd
+import urllib.request
+from fastapi import FastAPI, File, UploadFile, HTTPException
 from fastapi.staticfiles import StaticFiles
+from fastapi.responses import FileResponse
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
+# ── Configuration ──────────────────────────────────────────────────────────────
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "")
+_db_store = {}
+_schema_store = {}
+app = FastAPI(title="QueryMind Gemini", version="3.0.0")
 app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
+class QueryRequest(BaseModel):
+    session_id: str
+    question: str
+# ── Heuristic Logic (Fast Layer) ──────────────────────────────────────────────
+def _find_col(question: str, columns: list) -> str or None:
+    q = question.lower()
+    # Sort by length DESC so 'AQI_Bucket' matches before 'AQI'
+    for col in sorted(columns, key=len, reverse=True):
+        if col.lower() in q:
+            return col
+    return None
+def _heuristic_sql(question: str, table: str, columns: list) -> str or None:
     q = question.lower().strip()
+    t = f'"{table}"'
+    if re.search(r'\bgroup\s+by\b', q):
+        col = _find_col(q, columns) or columns[0]
+        return f'SELECT "{col}", COUNT(*) AS count FROM {t} GROUP BY "{col}" ORDER BY count DESC'
+    if re.search(r'\bunique\b|\bdistinct\b', q):
+        col = _find_col(q, columns) or columns[0]
+        if re.search(r'\bhow many\b|\bcount\b', q):
+            return f'SELECT COUNT(DISTINCT "{col}") AS unique_count FROM {t}'
+        return f'SELECT DISTINCT "{col}" FROM {t} LIMIT 50'
+    if re.search(r'\bhow many\b|\bcount\b|\btotal\s+(records|rows)\b', q):
+        return f'SELECT COUNT(*) AS total_rows FROM {t}'
+    if re.search(r'\baverage\b|\bavg\b', q):
+        col = _find_col(q, columns) or columns[0]
+        return f'SELECT AVG(CAST("{col}" AS REAL)) AS average FROM {t}'
+    if re.search(r'\bfirst\b|\bpreview\b|\bshow\b|\bhead\b', q):
+        m = re.search(r'\b(\d+)\b', q)
+        return f'SELECT * FROM {t} LIMIT {int(m.group(1)) if m else 10}'
+    return None
+# ── Gemini API Call (Neural Layer) ───────────────────────────────────────────
+def _call_gemini(question: str, schema: str, columns: list, table: str) -> str:
+    if not GEMINI_API_KEY:
+        raise Exception("Gemini API Key missing")
+    col_list = ", ".join(columns[:30])
+    prompt = (
+        f"You are a SQLite expert. Output ONLY a single valid SQLite SELECT statement. "
+        f"No explanation, no backticks, no markdown.\n\n"
+        f"Table: {table}\n"
+        f"Columns: {col_list}\n"
+        f"Schema: {schema}\n\n"
+        f"Question: {question}\n\nSQL:"
     )
+    payload = json.dumps({
+        "contents": [{"parts": [{"text": prompt}]}],
+        "generationConfig": {"temperature": 0, "maxOutputTokens": 200}
+    }).encode("utf-8")
+    url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key={GEMINI_API_KEY}"
+    req = urllib.request.Request(url, data=payload, headers={"Content-Type": "application/json"})
+    with urllib.request.urlopen(req, timeout=10) as resp:
+        data = json.loads(resp.read())
+        sql = data["candidates"][0]["content"]["parts"][0]["text"].strip()
+    # Cleaning up common LLM artifacts
+    sql = sql.replace("```sql", "").replace("```", "").strip()
+    sql = sql.split(";")[0].strip()
+    # Force the correct table name into the generated SQL
+    sql = re.sub(r'\bFROM\s+["\'\w\.]+', f'FROM "{table}"', sql, flags=re.IGNORECASE)
     return sql
+# ── Logic Helpers ──────────────────────────────────────────────────────────────
+def csv_to_sqlite(df, table_name):
+    temp_db = io.BytesIO()
+    conn = sqlite3.connect(temp_db)
+    df.to_sql(table_name, conn, if_exists="replace", index=False)
+    # Extract schema string
+    schema = conn.execute("SELECT sql FROM sqlite_master WHERE type='table'").fetchone()[0]
+    conn.close()
+    return temp_db.getvalue(), schema
+def execute_sql(sql, db_bytes):
+    # Load DB into memory for execution
+    conn = sqlite3.connect(":memory:")
+    source = sqlite3.connect(io.BytesIO(db_bytes))
+    source.backup(conn)
+    source.close()
     conn.row_factory = sqlite3.Row
     try:
+        cur = conn.execute(sql)
+        results = [dict(r) for r in cur.fetchall()]
+        conn.close()
+        return results
     except Exception as e:
+        conn.close()
+        raise HTTPException(status_code=400, detail=str(e))
+# ── API Endpoints ─────────────────────────────────────────────────────────────
 @app.post("/upload")
+async def upload_csv(file: UploadFile = File(...)):
     contents = await file.read()
+    df = pd.read_csv(io.BytesIO(contents)).dropna(how='all')
+    session_id = os.urandom(8).hex()
+    clean_name = re.sub(r'[^a-zA-Z0-9_]', '_', os.path.splitext(file.filename)[0])
+    if clean_name[0].isdigit(): clean_name = "t_" + clean_name
+    table_name = clean_name[:32]
+    db_bytes, schema = csv_to_sqlite(df, table_name)
+    _db_store[session_id] = {"bytes": db_bytes, "table": table_name, "cols": list(df.columns)}
     _schema_store[session_id] = schema
+    return {
         "session_id": session_id,
+        "columns": list(df.columns),
+        "preview": df.head(5).to_dict(orient="records"),
+        "table_name": table_name
+    }
 @app.post("/query")
 async def query(req: QueryRequest):
     if req.session_id not in _db_store:
+        raise HTTPException(status_code=404, detail="Session expired.")
+    data = _db_store[req.session_id]
+    schema = _schema_store[req.session_id]
+    # 1. Try Fast Heuristics
+    sql = _heuristic_sql(req.question, data["table"], data["cols"])
+    # 2. Try Gemini
+    if not sql:
+        try:
+            sql = _call_gemini(req.question, schema, data["cols"], data["table"])
+        except Exception as e:
+            print(f"[API ERROR] {e}")
+            raise HTTPException(status_code=500, detail="Gemini API failed.")
+    results = execute_sql(sql, data["bytes"])
+    return {"sql": sql, "results": results}
+# ── Static & Main ──
+app.mount("/static", StaticFiles(directory="static"), name="static")
 @app.get("/")
+def root():
+    return FileResponse("static/index.html")
 @app.get("/health")
 def health():
+    return {"status": "ok", "mode": "gemini-api"}