Spaces:

stvnnnnnn
/

nl2sql-backend-t5

Sleeping

App Files Files Community

stvnnnnnn commited on Dec 1, 2025

Commit

869f1a1

verified ·

1 Parent(s): 32750dd

Update app.py

Browse files

Files changed (1) hide show

app.py +281 -156

app.py CHANGED Viewed

@@ -4,7 +4,6 @@ import zipfile
 import re
 import difflib
 import tempfile
-import sqlite3
 import uuid
 from typing import List, Optional, Dict, Any
@@ -18,21 +17,27 @@ from langdetect import detect
 from transformers import MarianMTModel, MarianTokenizer
 from openai import OpenAI
 # ---- Supabase ----
 from supabase import create_client, Client
 SUPABASE_URL = "https://bnvmqgjawtaslczewqyd.supabase.co"
-SUPABASE_ANON_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImJudm1xZ2phd3Rhc2xjemV3cXlkIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NjQ0NjM5NDAsImV4cCI6MjA4MDAzOTk0MH0.9zkyqrsm-QOSwMTUPZEWqyFeNpbbuar01rB7pmObkUI"
 supabase: Client = create_client(SUPABASE_URL, SUPABASE_ANON_KEY)
 # ======================================================
-# 0) Configuración general de paths
 # ======================================================
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-UPLOAD_DIR = os.path.join(BASE_DIR, "uploaded_dbs")
-os.makedirs(UPLOAD_DIR, exist_ok=True)
 MODEL_DIR = os.getenv("MODEL_DIR", "stvnnnnnn/t5-large-nl2sql-spider")
 DEVICE = torch.device("cpu")
@@ -40,24 +45,32 @@ DEVICE = torch.device("cpu")
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 openai_client = OpenAI(api_key=OPENAI_API_KEY) if OPENAI_API_KEY else None
 # ======================================================
-# 1) SQLManager (versión actual: SQLite local)
 # ======================================================
-class SQLManager:
     """
-    Gestor de "conexiones" a bases dinámicas.
-    Versión actual: cada conexión es un archivo SQLite en UPLOAD_DIR.
-    API pensada para poder cambiar después a Postgres/MySQL (Railway).
     """
-    def __init__(self):
-        # connections[connection_id] = {
-        #   "label": str,
-        #   "engine": "sqlite",
-        #   "db_name": str,
-        #   "db_path": str
-        # }
         self.connections: Dict[str, Dict[str, Any]] = {}
     # ---------- utilidades internas ----------
@@ -70,51 +83,73 @@ class SQLManager:
             raise KeyError(f"connection_id '{connection_id}' no registrado")
         return self.connections[connection_id]
     # ---------- creación de BD desde dump ----------
     def create_database_from_dump(self, label: str, sql_text: str) -> str:
         """
-        Crea un archivo SQLite, ejecuta el dump SQL y
-        registra la conexión. Por ahora el dump debe ser
-        razonablemente compatible con SQLite.
         """
         connection_id = self._new_connection_id()
-        db_name = connection_id  # nombre lógico
-        db_path = os.path.join(UPLOAD_DIR, f"{db_name}.sqlite")
-        # Ejecutar todo el script. Si falla, borramos el archivo.
-        conn = sqlite3.connect(db_path)
         try:
-            conn.executescript(sql_text)
-            conn.commit()
         except Exception as e:
             conn.close()
-            if os.path.exists(db_path):
-                os.remove(db_path)
-            raise RuntimeError(f"Error ejecutando dump SQL en SQLite: {e}")
         finally:
             conn.close()
         self.connections[connection_id] = {
             "label": label,
-            "engine": "sqlite",
-            "db_name": db_name,
-            "db_path": db_path,
         }
         return connection_id
     # ---------- ejecución segura de SQL ----------
-    def execute_sql(self, connection_id: str, sql: str) -> Dict[str, Any]:
         """
-        Ejecuta un SELECT sobre la BD asociada al connection_id.
         Bloquea operaciones destructivas por seguridad.
         """
         info = self._get_info(connection_id)
-        db_path = info["db_path"]
         forbidden = ["drop ", "delete ", "update ", "insert ", "alter ", "replace "]
-        sql_low = sql.lower()
         if any(tok in sql_low for tok in forbidden):
             return {
                 "ok": False,
@@ -123,89 +158,152 @@ class SQLManager:
                 "columns": [],
             }
         try:
-            conn = sqlite3.connect(db_path)
-            cur = conn.cursor()
-            cur.execute(sql)
-            rows = cur.fetchall()
-            cols = [d[0] for d in cur.description] if cur.description else []
-            conn.close()
-            return {"ok": True, "error": None, "rows": [list(r) for r in rows], "columns": cols}
         except Exception as e:
             return {"ok": False, "error": str(e), "rows": None, "columns": []}
     # ---------- introspección de esquema ----------
     def get_schema(self, connection_id: str) -> Dict[str, Any]:
         info = self._get_info(connection_id)
-        db_path = info["db_path"]
-        if not os.path.exists(db_path):
-            raise RuntimeError(f"SQLite no encontrado: {db_path}")
-        conn = sqlite3.connect(db_path)
-        cur = conn.cursor()
-        cur.execute("SELECT name FROM sqlite_master WHERE type='table';")
-        tables = [row[0] for row in cur.fetchall()]
-        tables_info: Dict[str, Dict[str, Any]] = {}
-        foreign_keys: List[Dict[str, Any]] = []
-        for t in tables:
-            cur.execute(f"PRAGMA table_info('{t}');")
-            rows = cur.fetchall()
-            cols = [r[1] for r in rows]
-            tables_info[t] = {"columns": cols}
-            cur.execute(f"PRAGMA foreign_key_list('{t}');")
-            fks = cur.fetchall()
-            for (id_, seq, ref_table, from_col, to_col, on_update, on_delete, match) in fks:
-                foreign_keys.append({
-                    "from_table": t,
-                    "from_column": from_col,
-                    "to_table": ref_table,
-                    "to_column": to_col,
-                })
-        conn.close()
-        return {
-            "tables": tables_info,
-            "foreign_keys": foreign_keys,
-        }
     # ---------- preview de tabla ----------
-    def get_preview(self, connection_id: str, table: str, limit: int = 20) -> Dict[str, Any]:
         info = self._get_info(connection_id)
-        db_path = info["db_path"]
-        conn = sqlite3.connect(db_path)
-        cur = conn.cursor()
         try:
-            cur.execute(f'SELECT * FROM "{table}" LIMIT {int(limit)};')
-            rows = cur.fetchall()
-            cols = [d[0] for d in cur.description] if cur.description else []
         finally:
             conn.close()
-        return {
-            "columns": cols,
-            "rows": [list(r) for r in rows],
-        }
-# Instancia global de SQLManager
-sql_manager = SQLManager()
 # ======================================================
 # 2) Inicialización de FastAPI
 # ======================================================
 app = FastAPI(
-    title="NL2SQL Backend (with Supabase Auth + History)",
-    version="2.1.0",
 )
 app.add_middleware(
@@ -232,7 +330,9 @@ def load_nl2sql_model():
         return
     print(f"🔁 Cargando modelo NL→SQL desde: {MODEL_DIR}")
     t5_tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR, use_fast=True)
-    t5_model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_DIR, torch_dtype=torch.float32)
     t5_model.to(DEVICE)
     t5_model.eval()
     print("✅ Modelo NL→SQL listo en memoria.")
@@ -281,14 +381,16 @@ def translate_es_to_en(text: str) -> str:
 def _normalize_name_for_match(name: str) -> str:
     s = name.lower()
-    s = s.replace('"', '').replace("`", "")
     s = s.replace("_", "")
     if s.endswith("s") and len(s) > 3:
         s = s[:-1]
     return s
-def _build_schema_indexes(tables_info: Dict[str, Dict[str, List[str]]]) -> Dict[str, Dict[str, List[str]]]:
     table_index: Dict[str, List[str]] = {}
     column_index: Dict[str, List[str]] = {}
@@ -349,6 +451,8 @@ DOMAIN_SYNONYMS_COLUMN = {
 def try_repair_sql(sql: str, error: str, schema_meta: Dict[str, Any]) -> Optional[str]:
     """
     Intenta reparar nombres de tablas/columnas basándose en el esquema real.
     """
     tables_info = schema_meta["tables"]
     idx = _build_schema_indexes(tables_info)
@@ -361,13 +465,13 @@ def try_repair_sql(sql: str, error: str, schema_meta: Dict[str, Any]) -> Optiona
     missing_table = None
     missing_column = None
-    m_t = re.search(r"relation \"([\w\.]+)\" does not exist", error, re.IGNORECASE)
     if not m_t:
         m_t = re.search(r"no such table: ([\w\.]+)", error)
     if m_t:
         missing_table = m_t.group(1)
-    m_c = re.search(r"column \"([\w\.]+)\" does not exist", error, re.IGNORECASE)
     if not m_c:
         m_c = re.search(r"no such column: ([\w\.]+)", error)
     if m_c:
@@ -411,7 +515,7 @@ def try_repair_sql(sql: str, error: str, schema_meta: Dict[str, Any]) -> Optiona
 # ======================================================
-# 5) Construcción de prompt y NL→SQL + re-ranking
 # ======================================================
 def build_prompt(question_en: str, db_id: str, schema_str: str) -> str:
@@ -421,9 +525,9 @@ def build_prompt(question_en: str, db_id: str, schema_str: str) -> str:
         f"note: use JOIN when foreign keys link tables"
     )
 def normalize_score(raw: float) -> float:
     """Normaliza el score logit del modelo a un porcentaje 0-100."""
-    # Rango típico de logits beam-search: -20 a +5
     norm = (raw + 20) / 25
     norm = max(0, min(1, norm))
     return round(norm * 100, 2)
@@ -431,9 +535,10 @@ def normalize_score(raw: float) -> float:
 def nl2sql_with_rerank(question: str, conn_id: str) -> Dict[str, Any]:
     if conn_id not in sql_manager.connections:
-        raise HTTPException(status_code=404, detail=f"connection_id '{conn_id}' no registrado")
-    # Obtener esquema real desde SQLite (futuro: Postgres/MySQL)
     meta = sql_manager.get_schema(conn_id)
     tables_info = meta["tables"]
@@ -451,7 +556,9 @@ def nl2sql_with_rerank(question: str, conn_id: str) -> Dict[str, Any]:
     if t5_model is None:
         load_nl2sql_model()
-    inputs = t5_tokenizer([prompt], return_tensors="pt", truncation=True, max_length=768).to(DEVICE)
     num_beams = 6
     num_return = 6
@@ -478,7 +585,9 @@ def nl2sql_with_rerank(question: str, conn_id: str) -> Dict[str, Any]:
     best_score = -1e9
     for i in range(sequences.size(0)):
-        raw_sql = t5_tokenizer.decode(sequences[i], skip_special_tokens=True).strip()
         cand: Dict[str, Any] = {
             "sql": raw_sql,
             "score": float(scores[i]),
@@ -489,7 +598,6 @@ def nl2sql_with_rerank(question: str, conn_id: str) -> Dict[str, Any]:
         exec_info = sql_manager.execute_sql(conn_id, raw_sql)
-        # Intentar reparación solo si es error por tabla/columna
         err_lower = (exec_info["error"] or "").lower()
         if (not exec_info["ok"]) and (
             "no such table" in err_lower
@@ -503,7 +611,9 @@ def nl2sql_with_rerank(question: str, conn_id: str) -> Dict[str, Any]:
                 if not repaired_sql or repaired_sql == current_sql:
                     break
                 exec_info2 = sql_manager.execute_sql(conn_id, repaired_sql)
-                cand["repaired_from"] = current_sql if cand["repaired_from"] is None else cand["repaired_from"]
                 cand["repair_note"] = f"auto-repair (table/column name, step {step})"
                 cand["sql"] = repaired_sql
                 exec_info = exec_info2
@@ -556,7 +666,7 @@ def nl2sql_with_rerank(question: str, conn_id: str) -> Dict[str, Any]:
 class UploadResponse(BaseModel):
     connection_id: str
     label: str
-    db_path: str  # pseudo-path (engine://db_name o similar)
     note: Optional[str] = None
@@ -564,7 +674,7 @@ class ConnectionInfo(BaseModel):
     connection_id: str
     label: str
     engine: Optional[str] = None
-    db_name: Optional[str] = None
 class SchemaResponse(BaseModel):
@@ -649,7 +759,7 @@ def _combine_sql_files_from_zip(zip_bytes: bytes) -> str:
 @app.on_event("startup")
 async def startup_event():
     load_nl2sql_model()
-    print("✅ Backend NL2SQL inicializado (engine SQLite por ahora).")
     print(f"MODEL_DIR={MODEL_DIR}, DEVICE={DEVICE}")
     print(f"Conexiones activas al inicio: {len(sql_manager.connections)}")
@@ -657,7 +767,7 @@ async def startup_event():
 @app.post("/upload", response_model=UploadResponse)
 async def upload_database(
     db_file: UploadFile = File(...),
-    authorization: Optional[str] = Header(None)
 ):
     if authorization is None:
         raise HTTPException(401, "Missing Authorization header")
@@ -667,7 +777,7 @@ async def upload_database(
     if not user or not user.user:
         raise HTTPException(401, "Invalid Supabase token")
-    filename = db_file.filename
     fname_lower = filename.lower()
     contents = await db_file.read()
@@ -682,7 +792,7 @@ async def upload_database(
     else:
         raise HTTPException(400, "Formato no soportado. Usa .sql o .zip.")
-    # --- crear BD dinámica (SQLite temporal) ---
     try:
         conn_id = sql_manager.create_database_from_dump(label=filename, sql_text=sql_text)
     except Exception as e:
@@ -690,19 +800,21 @@ async def upload_database(
     meta = sql_manager.connections[conn_id]
-    # --- guardar en Supabase ---
-    supabase.table("databases").insert({
-        "user_id": user.user.id,
-        "filename": filename,
-        "engine": meta["engine"],
-        "connection_id": conn_id
-    }).execute()
     return UploadResponse(
         connection_id=conn_id,
         label=filename,
-        db_path=f"{meta['engine']}://{meta['db_name']}",
-        note="Database stored and indexed in Supabase."
     )
@@ -713,7 +825,7 @@ async def list_connections():
             connection_id=cid,
             label=meta.get("label", ""),
             engine=meta.get("engine"),
-            db_name=meta.get("db_name"),
         )
         for cid, meta in sql_manager.connections.items()
     ]
@@ -748,7 +860,9 @@ async def preview_table(connection_id: str, table: str, limit: int = 20):
     try:
         preview = sql_manager.get_preview(connection_id, table, limit)
     except Exception as e:
-        raise HTTPException(status_code=400, detail=f"Error al leer tabla '{table}': {e}")
     return PreviewResponse(
         connection_id=connection_id,
@@ -761,7 +875,7 @@ async def preview_table(connection_id: str, table: str, limit: int = 20):
 @app.post("/infer", response_model=InferResponse)
 async def infer_sql(
     req: InferRequest,
-    authorization: Optional[str] = Header(None)
 ):
     if authorization is None:
         raise HTTPException(401, "Missing Authorization header")
@@ -774,27 +888,28 @@ async def infer_sql(
     result = nl2sql_with_rerank(req.question, req.connection_id)
     score = normalize_score(result["candidates"][0]["score"])
-    # buscar db_id en supabase
-    db_row = supabase.table("databases") \
-        .select("id") \
-        .eq("connection_id", req.connection_id) \
-        .eq("user_id", user.user.id) \
         .execute()
     db_id = db_row.data[0]["id"] if db_row.data else None
-    # guardar historial
-    supabase.table("queries").insert({
-        "user_id": user.user.id,
-        "db_id": db_id,
-        "nl": result["question_original"],
-        "sql_generated": result["best_sql"],
-        "sql_repaired": result["candidates"][0]["sql"],
-        "execution_ok": result["best_exec_ok"],
-        "error": result["best_exec_error"],
-        "rows_preview": result["best_rows_preview"],
-        "score": score
-    }).execute()
     result["score_percent"] = score
     return InferResponse(**result)
@@ -803,12 +918,12 @@ async def infer_sql(
 @app.post("/speech-infer", response_model=SpeechInferResponse)
 async def speech_infer(
     connection_id: str = Form(...),
-    audio: UploadFile = File(...)
 ):
     if openai_client is None:
         raise HTTPException(
             status_code=500,
-            detail="OPENAI_API_KEY no está configurado en el backend."
         )
     if audio.content_type is None:
@@ -819,7 +934,9 @@ async def speech_infer(
             tmp.write(await audio.read())
             tmp_path = tmp.name
     except Exception:
-        raise HTTPException(status_code=500, detail="No se pudo procesar el audio recibido.")
     try:
         with open(tmp_path, "rb") as f:
@@ -847,8 +964,10 @@ async def health():
         "model_loaded": t5_model is not None,
         "connections": len(sql_manager.connections),
         "device": str(DEVICE),
     }
 @app.get("/history")
 def get_history(authorization: Optional[str] = Header(None)):
     if authorization is None:
@@ -857,11 +976,13 @@ def get_history(authorization: Optional[str] = Header(None)):
     jwt = authorization.replace("Bearer ", "")
     user = supabase.auth.get_user(jwt)
-    rows = supabase.table("queries") \
-        .select("*") \
-        .eq("user_id", user.user.id) \
-        .order("created_at", desc=True) \
         .execute()
     return rows.data
@@ -874,10 +995,12 @@ def get_my_databases(authorization: Optional[str] = Header(None)):
     jwt = authorization.replace("Bearer ", "")
     user = supabase.auth.get_user(jwt)
-    rows = supabase.table("databases") \
-        .select("*") \
-        .eq("user_id", user.user.id) \
         .execute()
     return rows.data
@@ -885,14 +1008,16 @@ def get_my_databases(authorization: Optional[str] = Header(None)):
 @app.get("/")
 async def root():
     return {
-        "message": "NL2SQL T5-large backend is running (engine SQLite, ready to upgrade to Postgres/MySQL).",
         "endpoints": [
-            "POST /upload            (subir .sql o .zip con .sql → crear BD dinámica)",
-            "GET  /connections       (listar BDs subidas)",
             "GET  /schema/{id}       (esquema resumido)",
             "GET  /preview/{id}/{t}  (preview de tabla)",
             "POST /infer             (NL→SQL + ejecución en BD)",
-            "POST /speech-infer      (NL por voz → SQL + ejecución)",
             "GET  /health            (estado del backend)",
             "GET  /docs              (OpenAPI UI)",
         ],

 import re
 import difflib
 import tempfile
 import uuid
 from typing import List, Optional, Dict, Any
 from transformers import MarianMTModel, MarianTokenizer
 from openai import OpenAI
+# ---- Postgres (Neon) ----
+import psycopg2
+from psycopg2 import sql as pgsql
 # ---- Supabase ----
 from supabase import create_client, Client
 SUPABASE_URL = "https://bnvmqgjawtaslczewqyd.supabase.co"
+SUPABASE_ANON_KEY = (
+    "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImJudm1x"
+    "Z2phd3Rhc2xjemV3cXlkIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NjQ0NjM5NDAsImV4cCI6MjA4"
+    "MDAzOTk0MH0.9zkyqrsm-QOSwMTUPZEWqyFeNpbbuar01rB7pmObkUI"
+)
 supabase: Client = create_client(SUPABASE_URL, SUPABASE_ANON_KEY)
 # ======================================================
+# 0) Configuración general de paths / modelo / OpenAI
 # ======================================================
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 MODEL_DIR = os.getenv("MODEL_DIR", "stvnnnnnn/t5-large-nl2sql-spider")
 DEVICE = torch.device("cpu")
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 openai_client = OpenAI(api_key=OPENAI_API_KEY) if OPENAI_API_KEY else None
+# DSN de Neon (Postgres) – EJEMPLO:
+# postgres://user:pass@host/neondb?sslmode=require
+POSTGRES_DSN = os.getenv("POSTGRES_DSN")
+if not POSTGRES_DSN:
+    raise RuntimeError(
+        "⚠️ POSTGRES_DSN no está definido. "
+        "Configúralo en los secrets del Space con la cadena de conexión de Neon."
+    )
 # ======================================================
+# 1) Gestor de conexiones dinámicas: Postgres (Neon)
 # ======================================================
+class PostgresManager:
     """
+    Cada upload crea un *schema* aislado en Neon.
+    connections[connection_id] = {
+        "label": str,        # nombre de archivo original
+        "engine": "postgres",
+        "schema": str        # nombre del schema en Neon
+    }
     """
+    def __init__(self, dsn: str):
+        self.dsn = dsn
         self.connections: Dict[str, Dict[str, Any]] = {}
     # ---------- utilidades internas ----------
             raise KeyError(f"connection_id '{connection_id}' no registrado")
         return self.connections[connection_id]
+    def _get_conn(self):
+        conn = psycopg2.connect(self.dsn)
+        conn.autocommit = True
+        return conn
     # ---------- creación de BD desde dump ----------
     def create_database_from_dump(self, label: str, sql_text: str) -> str:
         """
+        Crea un schema en Neon, fija search_path a ese schema
+        y ejecuta el dump SQL dentro de él.
         """
         connection_id = self._new_connection_id()
+        schema_name = f"sess_{uuid.uuid4().hex[:8]}"
+        conn = self._get_conn()
         try:
+            with conn.cursor() as cur:
+                # Crear schema aislado
+                cur.execute(
+                    pgsql.SQL("CREATE SCHEMA {}").format(
+                        pgsql.Identifier(schema_name)
+                    )
+                )
+                # Usar ese schema por defecto
+                cur.execute(
+                    pgsql.SQL("SET search_path TO {}").format(
+                        pgsql.Identifier(schema_name)
+                    )
+                )
+                # Ejecutar dump completo (puede tener múltiples sentencias)
+                cur.execute(sql_text)
         except Exception as e:
+            # Si falla, intentar limpiar el schema
+            try:
+                with conn.cursor() as cur:
+                    cur.execute(
+                        pgsql.SQL("DROP SCHEMA IF EXISTS {} CASCADE").format(
+                            pgsql.Identifier(schema_name)
+                        )
+                    )
+            except Exception:
+                pass
             conn.close()
+            raise RuntimeError(f"Error ejecutando dump SQL en Postgres: {e}")
         finally:
             conn.close()
         self.connections[connection_id] = {
             "label": label,
+            "engine": "postgres",
+            "schema": schema_name,
         }
         return connection_id
     # ---------- ejecución segura de SQL ----------
+    def execute_sql(self, connection_id: str, sql_text: str) -> Dict[str, Any]:
         """
+        Ejecuta un SELECT dentro del schema asociado al connection_id.
         Bloquea operaciones destructivas por seguridad.
         """
         info = self._get_info(connection_id)
+        schema = info["schema"]
         forbidden = ["drop ", "delete ", "update ", "insert ", "alter ", "replace "]
+        sql_low = sql_text.lower()
         if any(tok in sql_low for tok in forbidden):
             return {
                 "ok": False,
                 "columns": [],
             }
+        conn = self._get_conn()
         try:
+            with conn.cursor() as cur:
+                # usar el schema de la sesión
+                cur.execute(
+                    pgsql.SQL("SET search_path TO {}").format(
+                        pgsql.Identifier(schema)
+                    )
+                )
+                cur.execute(sql_text)
+                if cur.description:
+                    rows = cur.fetchall()
+                    cols = [d[0] for d in cur.description]
+                else:
+                    rows, cols = [], []
+            return {
+                "ok": True,
+                "error": None,
+                "rows": [list(r) for r in rows],
+                "columns": cols,
+            }
         except Exception as e:
             return {"ok": False, "error": str(e), "rows": None, "columns": []}
+        finally:
+            conn.close()
     # ---------- introspección de esquema ----------
     def get_schema(self, connection_id: str) -> Dict[str, Any]:
         info = self._get_info(connection_id)
+        schema = info["schema"]
+        conn = self._get_conn()
+        try:
+            tables_info: Dict[str, Dict[str, Any]] = {}
+            foreign_keys: List[Dict[str, Any]] = []
+            with conn.cursor() as cur:
+                # Tablas básicas
+                cur.execute(
+                    """
+                    SELECT table_name
+                    FROM information_schema.tables
+                    WHERE table_schema = %s
+                      AND table_type = 'BASE TABLE'
+                    ORDER BY table_name;
+                    """,
+                    (schema,),
+                )
+                tables = [r[0] for r in cur.fetchall()]
+                # Columnas por tabla
+                for t in tables:
+                    cur.execute(
+                        """
+                        SELECT column_name
+                        FROM information_schema.columns
+                        WHERE table_schema = %s
+                          AND table_name = %s
+                        ORDER BY ordinal_position;
+                        """,
+                        (schema, t),
+                    )
+                    cols = [r[0] for r in cur.fetchall()]
+                    tables_info[t] = {"columns": cols}
+                # Foreign keys
+                cur.execute(
+                    """
+                    SELECT
+                        tc.table_name      AS from_table,
+                        kcu.column_name    AS from_column,
+                        ccu.table_name     AS to_table,
+                        ccu.column_name    AS to_column
+                    FROM information_schema.table_constraints AS tc
+                    JOIN information_schema.key_column_usage AS kcu
+                      ON tc.constraint_name = kcu.constraint_name
+                     AND tc.table_schema   = kcu.table_schema
+                    JOIN information_schema.constraint_column_usage AS ccu
+                      ON ccu.constraint_name = tc.constraint_name
+                     AND ccu.table_schema   = tc.table_schema
+                    WHERE tc.constraint_type = 'FOREIGN KEY'
+                      AND tc.table_schema    = %s;
+                    """,
+                    (schema,),
+                )
+                for ft, fc, tt, tc2 in cur.fetchall():
+                    foreign_keys.append(
+                        {
+                            "from_table": ft,
+                            "from_column": fc,
+                            "to_table": tt,
+                            "to_column": tc2,
+                        }
+                    )
+            return {
+                "tables": tables_info,
+                "foreign_keys": foreign_keys,
+            }
+        finally:
+            conn.close()
     # ---------- preview de tabla ----------
+    def get_preview(
+        self, connection_id: str, table: str, limit: int = 20
+    ) -> Dict[str, Any]:
         info = self._get_info(connection_id)
+        schema = info["schema"]
+        conn = self._get_conn()
         try:
+            with conn.cursor() as cur:
+                cur.execute(
+                    pgsql.SQL("SET search_path TO {}").format(
+                        pgsql.Identifier(schema)
+                    )
+                )
+                query = pgsql.SQL("SELECT * FROM {} LIMIT %s").format(
+                    pgsql.Identifier(table)
+                )
+                cur.execute(query, (int(limit),))
+                rows = cur.fetchall()
+                cols = [d[0] for d in cur.description] if cur.description else []
+            return {
+                "columns": cols,
+                "rows": [list(r) for r in rows],
+            }
         finally:
             conn.close()
+# Instancia global de PostgresManager
+sql_manager = PostgresManager(POSTGRES_DSN)
 # ======================================================
 # 2) Inicialización de FastAPI
 # ======================================================
 app = FastAPI(
+    title="NL2SQL Backend (Supabase + Postgres/Neon)",
+    version="3.0.0",
 )
 app.add_middleware(
         return
     print(f"🔁 Cargando modelo NL→SQL desde: {MODEL_DIR}")
     t5_tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR, use_fast=True)
+    t5_model = AutoModelForSeq2SeqLM.from_pretrained(
+        MODEL_DIR, torch_dtype=torch.float32
+    )
     t5_model.to(DEVICE)
     t5_model.eval()
     print("✅ Modelo NL→SQL listo en memoria.")
 def _normalize_name_for_match(name: str) -> str:
     s = name.lower()
+    s = s.replace('"', "").replace("`", "")
     s = s.replace("_", "")
     if s.endswith("s") and len(s) > 3:
         s = s[:-1]
     return s
+def _build_schema_indexes(
+    tables_info: Dict[str, Dict[str, List[str]]]
+) -> Dict[str, Dict[str, List[str]]]:
     table_index: Dict[str, List[str]] = {}
     column_index: Dict[str, List[str]] = {}
 def try_repair_sql(sql: str, error: str, schema_meta: Dict[str, Any]) -> Optional[str]:
     """
     Intenta reparar nombres de tablas/columnas basándose en el esquema real.
+    Compatible con mensajes de Postgres y también con los de SQLite
+    (por si algún día reusamos la lógica).
     """
     tables_info = schema_meta["tables"]
     idx = _build_schema_indexes(tables_info)
     missing_table = None
     missing_column = None
+    m_t = re.search(r'relation "([\w\.]+)" does not exist', error, re.IGNORECASE)
     if not m_t:
         m_t = re.search(r"no such table: ([\w\.]+)", error)
     if m_t:
         missing_table = m_t.group(1)
+    m_c = re.search(r'column "([\w\.]+)" does not exist', error, re.IGNORECASE)
     if not m_c:
         m_c = re.search(r"no such column: ([\w\.]+)", error)
     if m_c:
 # ======================================================
+# 5) Prompt NL→SQL + re-ranking
 # ======================================================
 def build_prompt(question_en: str, db_id: str, schema_str: str) -> str:
         f"note: use JOIN when foreign keys link tables"
     )
 def normalize_score(raw: float) -> float:
     """Normaliza el score logit del modelo a un porcentaje 0-100."""
     norm = (raw + 20) / 25
     norm = max(0, min(1, norm))
     return round(norm * 100, 2)
 def nl2sql_with_rerank(question: str, conn_id: str) -> Dict[str, Any]:
     if conn_id not in sql_manager.connections:
+        raise HTTPException(
+            status_code=404, detail=f"connection_id '{conn_id}' no registrado"
+        )
     meta = sql_manager.get_schema(conn_id)
     tables_info = meta["tables"]
     if t5_model is None:
         load_nl2sql_model()
+    inputs = t5_tokenizer(
+        [prompt], return_tensors="pt", truncation=True, max_length=768
+    ).to(DEVICE)
     num_beams = 6
     num_return = 6
     best_score = -1e9
     for i in range(sequences.size(0)):
+        raw_sql = t5_tokenizer.decode(
+            sequences[i], skip_special_tokens=True
+        ).strip()
         cand: Dict[str, Any] = {
             "sql": raw_sql,
             "score": float(scores[i]),
         exec_info = sql_manager.execute_sql(conn_id, raw_sql)
         err_lower = (exec_info["error"] or "").lower()
         if (not exec_info["ok"]) and (
             "no such table" in err_lower
                 if not repaired_sql or repaired_sql == current_sql:
                     break
                 exec_info2 = sql_manager.execute_sql(conn_id, repaired_sql)
+                cand["repaired_from"] = (
+                    current_sql if cand["repaired_from"] is None else cand["repaired_from"]
+                )
                 cand["repair_note"] = f"auto-repair (table/column name, step {step})"
                 cand["sql"] = repaired_sql
                 exec_info = exec_info2
 class UploadResponse(BaseModel):
     connection_id: str
     label: str
+    db_path: str
     note: Optional[str] = None
     connection_id: str
     label: str
     engine: Optional[str] = None
+    db_name: Optional[str] = None  # ya no usamos archivo, pero mantenemos campo
 class SchemaResponse(BaseModel):
 @app.on_event("startup")
 async def startup_event():
     load_nl2sql_model()
+    print("✅ Backend NL2SQL inicializado (engine Postgres/Neon).")
     print(f"MODEL_DIR={MODEL_DIR}, DEVICE={DEVICE}")
     print(f"Conexiones activas al inicio: {len(sql_manager.connections)}")
 @app.post("/upload", response_model=UploadResponse)
 async def upload_database(
     db_file: UploadFile = File(...),
+    authorization: Optional[str] = Header(None),
 ):
     if authorization is None:
         raise HTTPException(401, "Missing Authorization header")
     if not user or not user.user:
         raise HTTPException(401, "Invalid Supabase token")
+    filename = db_file.filename or ""
     fname_lower = filename.lower()
     contents = await db_file.read()
     else:
         raise HTTPException(400, "Formato no soportado. Usa .sql o .zip.")
+    # --- crear schema dinámico en Postgres ---
     try:
         conn_id = sql_manager.create_database_from_dump(label=filename, sql_text=sql_text)
     except Exception as e:
     meta = sql_manager.connections[conn_id]
+    # --- guardar en Supabase (metadatos) ---
+    supabase.table("databases").insert(
+        {
+            "user_id": user.user.id,
+            "filename": filename,
+            "engine": meta["engine"],
+            "connection_id": conn_id,
+        }
+    ).execute()
     return UploadResponse(
         connection_id=conn_id,
         label=filename,
+        db_path=f"{meta['engine']}://schema/{meta['schema']}",
+        note="Database schema created in Neon and indexed in Supabase.",
     )
             connection_id=cid,
             label=meta.get("label", ""),
             engine=meta.get("engine"),
+            db_name=meta.get("schema"),  # usamos schema como "nombre"
         )
         for cid, meta in sql_manager.connections.items()
     ]
     try:
         preview = sql_manager.get_preview(connection_id, table, limit)
     except Exception as e:
+        raise HTTPException(
+            status_code=400, detail=f"Error al leer tabla '{table}': {e}"
+        )
     return PreviewResponse(
         connection_id=connection_id,
 @app.post("/infer", response_model=InferResponse)
 async def infer_sql(
     req: InferRequest,
+    authorization: Optional[str] = Header(None),
 ):
     if authorization is None:
         raise HTTPException(401, "Missing Authorization header")
     result = nl2sql_with_rerank(req.question, req.connection_id)
     score = normalize_score(result["candidates"][0]["score"])
+    db_row = (
+        supabase.table("databases")
+        .select("id")
+        .eq("connection_id", req.connection_id)
+        .eq("user_id", user.user.id)
         .execute()
+    )
     db_id = db_row.data[0]["id"] if db_row.data else None
+    supabase.table("queries").insert(
+        {
+            "user_id": user.user.id,
+            "db_id": db_id,
+            "nl": result["question_original"],
+            "sql_generated": result["best_sql"],
+            "sql_repaired": result["candidates"][0]["sql"],
+            "execution_ok": result["best_exec_ok"],
+            "error": result["best_exec_error"],
+            "rows_preview": result["best_rows_preview"],
+            "score": score,
+        }
+    ).execute()
     result["score_percent"] = score
     return InferResponse(**result)
 @app.post("/speech-infer", response_model=SpeechInferResponse)
 async def speech_infer(
     connection_id: str = Form(...),
+    audio: UploadFile = File(...),
 ):
     if openai_client is None:
         raise HTTPException(
             status_code=500,
+            detail="OPENAI_API_KEY no está configurado en el backend.",
         )
     if audio.content_type is None:
             tmp.write(await audio.read())
             tmp_path = tmp.name
     except Exception:
+        raise HTTPException(
+            status_code=500, detail="No se pudo procesar el audio recibido."
+        )
     try:
         with open(tmp_path, "rb") as f:
         "model_loaded": t5_model is not None,
         "connections": len(sql_manager.connections),
         "device": str(DEVICE),
+        "engine": "postgres",
     }
 @app.get("/history")
 def get_history(authorization: Optional[str] = Header(None)):
     if authorization is None:
     jwt = authorization.replace("Bearer ", "")
     user = supabase.auth.get_user(jwt)
+    rows = (
+        supabase.table("queries")
+        .select("*")
+        .eq("user_id", user.user.id)
+        .order("created_at", desc=True)
         .execute()
+    )
     return rows.data
     jwt = authorization.replace("Bearer ", "")
     user = supabase.auth.get_user(jwt)
+    rows = (
+        supabase.table("databases")
+        .select("*")
+        .eq("user_id", user.user.id)
         .execute()
+    )
     return rows.data
 @app.get("/")
 async def root():
     return {
+        "message": "NL2SQL T5-large backend running on Postgres/Neon (no SQLite).",
         "endpoints": [
+            "POST /upload            (subir .sql o .zip con .sql → crea schema en Neon)",
+            "GET  /connections       (listar BDs subidas en esta instancia)",
             "GET  /schema/{id}       (esquema resumido)",
             "GET  /preview/{id}/{t}  (preview de tabla)",
             "POST /infer             (NL→SQL + ejecución en BD)",
+            "POST /speech-infer      (voz → NL→SQL + ejecución)",
+            "GET  /history           (historial de consultas en Supabase)",
+            "GET  /my-databases      (BDs del usuario en Supabase)",
             "GET  /health            (estado del backend)",
             "GET  /docs              (OpenAPI UI)",
         ],