Spaces:

melikakheirieh
/

nl2sql-copilot

Sleeping

App Files Files Community

Melika Kheirieh commited on Nov 2, 2025

Commit

b568b83

1 Parent(s): a0aff5b

feat(demo): add initial Gradio UI with optional SQLite upload

Browse files

Files changed (4) hide show

app/main.py +2 -1
app/routers/nl2sql.py +141 -34
demo/app.py +108 -0
requirements.txt +3 -1

app/main.py CHANGED Viewed

@@ -1,8 +1,9 @@
 from dotenv import load_dotenv
 from fastapi import FastAPI
 from app.routers import nl2sql
-load_dotenv()
 app = FastAPI(

 from dotenv import load_dotenv
+load_dotenv()
 from fastapi import FastAPI
 from app.routers import nl2sql
 app = FastAPI(

app/routers/nl2sql.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from dataclasses import asdict, is_dataclass
-from fastapi import APIRouter, HTTPException
 from app.schemas import NL2SQLRequest, NL2SQLResponse, ClarifyResponse
 from nl2sql.pipeline import Pipeline, FinalResult
 from nl2sql.ambiguity_detector import AmbiguityDetector
@@ -12,75 +12,182 @@ from nl2sql.verifier import Verifier
 from nl2sql.repair import Repair
 from adapters.db.sqlite_adapter import SQLiteAdapter
 from adapters.db.postgres_adapter import PostgresAdapter
-import os
-from typing import Union
 router = APIRouter(prefix="/nl2sql")
-_db: Union[PostgresAdapter, SQLiteAdapter]
-if os.getenv("DB_MODE", "sqlite") == "postgres":
-    _db = PostgresAdapter(os.environ["POSTGRES_DSN"])
-else:
-    _db = SQLiteAdapter("data/chinook.db")
 def get_llm():
     return OpenAIProvider()
-# _db = SQLiteAdapter("data/chinook.db")
-_executor = Executor(_db)
 _verifier = Verifier()
 _repair = Repair(get_llm())
-_pipeline = Pipeline(
-    detector=AmbiguityDetector(),
-    planner=Planner(get_llm()),
-    generator=Generator(get_llm()),
-    safety=Safety(),
-    executor=_executor,
-    verifier=_verifier,
-    repair=_repair,
-)
 def _to_dict(obj):
-    """Helper: safely convert dataclass → dict."""
     return asdict(obj) if is_dataclass(obj) else obj
 def _round_trace(t: dict) -> dict:
     if t.get("cost_usd") is not None:
         t["cost_usd"] = round(t["cost_usd"], 6)
     if t.get("duration_ms") is not None:
         t["duration_ms"] = round(t["duration_ms"], 2)
     return t
 @router.post("", name="nl2sql_handler")
 def nl2sql_handler(request: NL2SQLRequest):
-    result = _pipeline.run(
         user_query=request.query,
-        schema_preview=request.schema_preview,
     )
-    # --- Ensure result type ---
     if not isinstance(result, FinalResult):
         raise HTTPException(status_code=500, detail="Pipeline returned unexpected type")
-    # --- Handle ambiguity ---
     if result.ambiguous and result.questions:
         return ClarifyResponse(ambiguous=True, questions=result.questions)
-    # --- Handle error ---
     if not result.ok or result.error:
         detail = "; ".join(result.details or ["Unknown error"])
         raise HTTPException(status_code=400, detail=detail)
-    # --- Success case ---
     traces = [_round_trace(t) for t in (result.traces or [])]
     return NL2SQLResponse(
         ambiguous=False,

 from dataclasses import asdict, is_dataclass
+from fastapi import APIRouter, HTTPException, UploadFile, File
 from app.schemas import NL2SQLRequest, NL2SQLResponse, ClarifyResponse
 from nl2sql.pipeline import Pipeline, FinalResult
 from nl2sql.ambiguity_detector import AmbiguityDetector
 from nl2sql.repair import Repair
 from adapters.db.sqlite_adapter import SQLiteAdapter
 from adapters.db.postgres_adapter import PostgresAdapter
+import os
+import time
+import uuid
+from typing import Union, Optional, Dict
 router = APIRouter(prefix="/nl2sql")
+# -------------------------------
+# Runtime DB registry (for uploaded SQLite files)
+# Files are stored under /tmp, mapped by a short-lived db_id
+# -------------------------------
+_DB_UPLOAD_DIR = os.getenv("DB_UPLOAD_DIR", "/tmp/nl2sql_dbs")
+_DB_TTL_SECONDS = int(os.getenv("DB_TTL_SECONDS", "7200"))  # default 2 hours
+os.makedirs(_DB_UPLOAD_DIR, exist_ok=True)
+# In-memory map: db_id -> {"path": str, "ts": float}
+_DB_MAP: Dict[str, Dict[str, object]] = {}
+# -------------------------------
+# Default DB resolution
+# -------------------------------
+DB_MODE = os.getenv("DB_MODE", "sqlite").lower()  # "sqlite" or "postgres"
+POSTGRES_DSN = os.getenv("POSTGRES_DSN")
+DEFAULT_SQLITE_DB = os.getenv("DEFAULT_SQLITE_DB", "data/chinook.db")  # keep your current default
+def _cleanup_db_map() -> None:
+    """Remove expired uploaded DB files (best-effort)."""
+    now = time.time()
+    expired = [k for k, v in _DB_MAP.items() if now - float(v.get("ts", 0)) > _DB_TTL_SECONDS]
+    for k in expired:
+        path = _DB_MAP[k].get("path")
+        try:
+            if isinstance(path, str) and os.path.exists(path):
+                os.remove(path)
+        except Exception:
+            pass
+        _DB_MAP.pop(k, None)
+def _resolve_sqlite_path(db_id: Optional[str]) -> str:
+    """Resolve a SQLite file path from db_id or fallback to default."""
+    _cleanup_db_map()
+    if db_id and db_id in _DB_MAP:
+        return str(_DB_MAP[db_id]["path"])
+    return DEFAULT_SQLITE_DB
+def _select_adapter(db_id: Optional[str]) -> Union[PostgresAdapter, SQLiteAdapter]:
+    """
+    Build a DB adapter for this request.
+    - In postgres mode: always PostgresAdapter(POSTGRES_DSN).
+    - In sqlite mode: use uploaded SQLite by db_id if present, otherwise DEFAULT_SQLITE_DB.
+    """
+    if DB_MODE == "postgres":
+        if not POSTGRES_DSN:
+            raise HTTPException(status_code=500, detail="POSTGRES_DSN is not configured")
+        return PostgresAdapter(POSTGRES_DSN)
+    # sqlite mode
+    sqlite_path = _resolve_sqlite_path(db_id)
+    # NOTE: SQLiteAdapter should open DB in read-only mode internally if supported.
+    # If not, ensure your adapter enforces PRAGMA query_only=ON and prevents DDL/DML.
+    return SQLiteAdapter(sqlite_path)
+# -------------------------------
+# LLM providers & shared components (stateless)
+# -------------------------------
 def get_llm():
     return OpenAIProvider()
+_detector = AmbiguityDetector()
+_planner = Planner(get_llm())
+_generator = Generator(get_llm())
+_safety = Safety()
 _verifier = Verifier()
 _repair = Repair(get_llm())
+def _build_pipeline(adapter: Union[PostgresAdapter, SQLiteAdapter]) -> Pipeline:
+    """Build a fresh Pipeline with a per-request Executor bound to the chosen adapter."""
+    executor = Executor(adapter)
+    return Pipeline(
+        detector=_detector,
+        planner=_planner,
+        generator=_generator,
+        safety=_safety,
+        executor=executor,
+        verifier=_verifier,
+        repair=_repair,
+    )
+# -------------------------------
+# Helpers
+# -------------------------------
 def _to_dict(obj):
+    """Safely convert dataclass → dict."""
     return asdict(obj) if is_dataclass(obj) else obj
 def _round_trace(t: dict) -> dict:
+    """Round float fields to keep responses tidy and stable."""
     if t.get("cost_usd") is not None:
         t["cost_usd"] = round(t["cost_usd"], 6)
     if t.get("duration_ms") is not None:
         t["duration_ms"] = round(t["duration_ms"], 2)
     return t
+# -------------------------------
+# Upload endpoint (SQLite only)
+# Path will be /api/nl2sql/upload_db if your root APIRouter is mounted at /api
+# -------------------------------
+@router.post("/upload_db")
+async def upload_db(file: UploadFile = File(...)):
+    """
+    Upload a SQLite database (.db/.sqlite). Returns a short-lived db_id.
+    Notes:
+    - Only SQLite files are allowed here (not for Postgres mode).
+    - Max size ~20MB recommended for demo environments like HF Spaces.
+    - Files are stored under /tmp and cleaned by TTL.
+    """
+    if DB_MODE != "sqlite":
+        raise HTTPException(status_code=400, detail="DB upload is only supported in sqlite mode")
+    filename = file.filename or "db.sqlite"
+    if not (filename.endswith(".db") or filename.endswith(".sqlite")):
+        raise HTTPException(status_code=400, detail="Only .db or .sqlite files are allowed")
+    data = await file.read()
+    max_bytes = int(os.getenv("UPLOAD_MAX_BYTES", str(20 * 1024 * 1024)))  # 20 MB
+    if len(data) > max_bytes:
+        raise HTTPException(status_code=400, detail=f"File too large (> {max_bytes} bytes)")
+    db_id = str(uuid.uuid4())
+    out_path = os.path.join(_DB_UPLOAD_DIR, f"{db_id}.sqlite")
+    try:
+        with open(out_path, "wb") as f:
+            f.write(data)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to store DB: {e}")
+    _DB_MAP[db_id] = {"path": out_path, "ts": time.time()}
+    return {"db_id": db_id}
+# -------------------------------
+# Main NL2SQL endpoint
+# Path will be /api/nl2sql if your root APIRouter is mounted at /api
+# -------------------------------
 @router.post("", name="nl2sql_handler")
 def nl2sql_handler(request: NL2SQLRequest):
+    """
+    Handle NL → SQL pipeline execution.
+    Optional: if the incoming request model supports `db_id`, we switch DB for this call.
+    Otherwise we will silently ignore and use default DB (or Postgres, based on mode).
+    """
+    # Try to extract db_id if present in request (without breaking strict models)
+    db_id = getattr(request, "db_id", None)  # Optional[str]
+    # Build per-request pipeline bound to the selected adapter
+    adapter = _select_adapter(db_id)
+    pipeline = _build_pipeline(adapter)
+    result = pipeline.run(
         user_query=request.query,
+        schema_preview=getattr(request, "schema_preview", None),
     )
+    # Ensure result type
     if not isinstance(result, FinalResult):
         raise HTTPException(status_code=500, detail="Pipeline returned unexpected type")
+    # Ambiguity: return clarify payload
     if result.ambiguous and result.questions:
         return ClarifyResponse(ambiguous=True, questions=result.questions)
+    # Error: bubble up details
     if not result.ok or result.error:
         detail = "; ".join(result.details or ["Unknown error"])
         raise HTTPException(status_code=400, detail=detail)
+    # Success
     traces = [_round_trace(t) for t in (result.traces or [])]
     return NL2SQLResponse(
         ambiguous=False,

demo/app.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import io
+import requests
+import gradio as gr
+API_UPLOAD = "http://localhost:8000/api/v1/nl2sql/upload_db"
+API_QUERY  = "http://localhost:8000/api/v1/nl2sql"
+def upload_db(file_obj):
+    if file_obj is None:
+        return None, "No DB uploaded. Default DB will be used."
+    name = getattr(file_obj, "name", "db.sqlite")
+    if not (name.endswith(".db") or name.endswith(".sqlite")):
+        return None, "Only .db or .sqlite files are allowed."
+    size = getattr(file_obj, "size", None)
+    if size and size > 20 * 1024 * 1024:
+        return None, "File too large (>20MB). Use a smaller demo DB."
+    # Read bytes
+    with open(file_obj.name, "rb") as f:
+        data = f.read()
+    r = requests.post(
+        API_UPLOAD,
+        files={"file": (name, io.BytesIO(data), "application/octet-stream")},
+        timeout=60,
+    )
+    r.raise_for_status()
+    db_id = r.json().get("db_id")
+    return db_id, f"Uploaded OK. db_id={db_id}"
+def query_to_sql(user_query, db_id, debug):
+    payload = {"query": user_query, "debug": bool(debug)}
+    if db_id:
+        payload["db_id"] = db_id
+    r = requests.post(API_QUERY, json=payload, timeout=120)
+    r.raise_for_status()
+    d = r.json()
+    sql = d.get("sql_final") or d.get("sql") or ""
+    explanation = d.get("explanation", "")
+    result = d.get("result", [])
+    # Flags summary
+    ambiguous = "Yes" if d.get("ambiguous") else "No"
+    safety = ("Allowed" if d.get("safety", {}).get("allowed", True) else f"Blocked: {d.get('safety', {}).get('blocked_reason')}")
+    verification = ("Passed" if d.get("verification", {}).get("passed") else "Failed")
+    repair = d.get("repair", {})
+    repair_text = f"Applied: {repair.get('applied', False)}, Attempts: {repair.get('attempts', 0)}"
+    timings = d.get("timings_ms", {})
+    timings_table = [[k, timings[k]] for k in sorted(timings.keys())]
+    return (
+        f"Ambiguous: {ambiguous} | Safety: {safety} | Verification: {verification} | Repair: {repair_text}",
+        sql,
+        explanation,
+        result,
+        d.get("trace", []),
+        repair.get("candidates", []),
+        repair.get("diff", ""),
+        timings_table,
+    )
+with gr.Blocks(title="NL2SQL Copilot") as demo:
+    gr.Markdown("# NL2SQL Copilot\nUpload a SQLite DB (optional) or use default.")
+    db_state = gr.State(value=None)
+    with gr.Row():
+        db_file = gr.File(label="Upload SQLite (.db/.sqlite)", file_types=[".db", ".sqlite"])
+        upload_btn = gr.Button("Upload DB")
+    db_msg = gr.Markdown()
+    upload_btn.click(upload_db, inputs=[db_file], outputs=[db_state, db_msg])
+    with gr.Row():
+        q = gr.Textbox(label="Question", scale=4)
+        debug = gr.Checkbox(label="Debug", value=True, scale=1)
+        run = gr.Button("Run")
+    badges = gr.Markdown()
+    sql_out = gr.Code(label="Final SQL", language="sql")
+    exp_out = gr.Textbox(label="Explanation", lines=3)
+    with gr.Tab("Result"):
+        res_out = gr.JSON()
+    with gr.Tab("Trace"):
+        trace = gr.JSON(label="Stage trace")
+    with gr.Tab("Repair"):
+        repair_candidates = gr.JSON(label="Candidates")
+        repair_diff = gr.Code(label="SQL Diff", language="sql")
+    with gr.Tab("Timings"):
+        timings = gr.Dataframe(headers=["stage", "ms"], datatype=["str", "number"])
+    run.click(
+        query_to_sql,
+        inputs=[q, db_state, debug],
+        outputs=[badges, sql_out, exp_out, res_out, trace, repair_candidates, repair_diff, timings],
+    )
+if __name__ == "__main__":
+    # Let Gradio pick a free port by default to avoid collisions
+    demo.launch()

requirements.txt CHANGED Viewed

@@ -9,4 +9,6 @@ pytest==8.3.3
 python-dotenv==1.1.1
 openai==2.6.1
 psycopg[binary]~=3.2
-ruff

 python-dotenv==1.1.1
 openai==2.6.1
 psycopg[binary]~=3.2
+ruff
+gradio
+sqlalchemy