stvnnnnnn commited on
Commit
468cfed
·
verified ·
1 Parent(s): 13233ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +350 -460
app.py CHANGED
@@ -1,13 +1,10 @@
1
  import os
2
  import uuid
3
- import sqlite3
4
  import io
5
- import csv
6
  import zipfile
7
  import re
8
  import difflib
9
  import tempfile
10
- import shutil
11
  from typing import List, Optional, Dict, Any
12
 
13
  from fastapi import FastAPI, UploadFile, File, HTTPException, Form
@@ -20,6 +17,9 @@ from langdetect import detect
20
  from transformers import MarianMTModel, MarianTokenizer
21
  from openai import OpenAI
22
 
 
 
 
23
  # ======================================================
24
  # 0) Configuración general
25
  # ======================================================
@@ -28,12 +28,24 @@ from openai import OpenAI
28
  MODEL_DIR = os.getenv("MODEL_DIR", "stvnnnnnn/t5-large-nl2sql-spider")
29
  DEVICE = torch.device("cpu") # inferencia en CPU
30
 
31
- # Directorio donde se guardan las BDs convertidas a SQLite
32
- UPLOAD_DIR = os.getenv("UPLOAD_DIR", "uploaded_dbs")
33
- os.makedirs(UPLOAD_DIR, exist_ok=True)
34
-
35
- # Registro en memoria de conexiones (todas terminan siendo SQLite)
36
- # { conn_id: { "db_path": str, "label": str } }
 
 
 
 
 
 
 
 
 
 
 
 
37
  DB_REGISTRY: Dict[str, Dict[str, Any]] = {}
38
 
39
  # Cliente OpenAI para transcripción de audio (Whisper / gpt-4o-transcribe)
@@ -47,13 +59,13 @@ openai_client = OpenAI(api_key=OPENAI_API_KEY) if OPENAI_API_KEY else None
47
  # ======================================================
48
 
49
  app = FastAPI(
50
- title="NL2SQL T5-large Backend Universal (single-file)",
51
  description=(
52
  "Intérprete NL→SQL (T5-large Spider) para usuarios no expertos. "
53
- "El usuario solo sube su BD (SQLite / dump .sql / CSV / ZIP de datos) "
54
- "y todo se convierte internamente a SQLite."
55
  ),
56
- version="1.0.0",
57
  )
58
 
59
  app.add_middleware(
@@ -125,342 +137,271 @@ def translate_es_to_en(text: str) -> str:
125
 
126
 
127
  # ======================================================
128
- # 3) Utilidades de BDs: creación/ingesta a SQLite
129
  # ======================================================
130
 
131
- def _sanitize_identifier(name: str) -> str:
132
- """Hace un nombre de tabla/columna seguro para SQLite."""
133
- base = name.strip().replace(" ", "_")
134
- base = re.sub(r"[^0-9a-zA-Z_]", "_", base)
135
- if not base:
136
- base = "table"
137
- if base[0].isdigit():
138
- base = "_" + base
139
- return base
140
 
141
 
142
- def create_empty_sqlite_db(label: str) -> str:
143
- """Crea un archivo .sqlite vacío y lo devuelve."""
144
- conn_id = f"db_{uuid.uuid4().hex[:8]}"
145
- db_filename = f"{conn_id}.sqlite"
146
- db_path = os.path.join(UPLOAD_DIR, db_filename)
147
- conn = sqlite3.connect(db_path)
148
- conn.close()
149
- DB_REGISTRY[conn_id] = {"db_path": db_path, "label": label}
150
- return conn_id
 
151
 
152
 
153
- def import_sql_dump_to_sqlite(db_path: str, sql_text: str) -> None:
154
  """
155
- Convertidor avanzado MySQL → SQLite.
156
- Limpia, reordena y ejecuta el schema de forma segura en SQLite.
157
- Si al final no se crea ninguna tabla, lanza ValueError.
158
  """
 
 
 
 
 
 
159
 
160
- # ======================================================
161
- # 1) Limpieza inicial del dump
162
- # ======================================================
163
-
164
- # Remover comentarios estilo MySQL/PostgreSQL
165
- sql_text = re.sub(r"/\*![\s\S]*?\*/;", "", sql_text)
166
- sql_text = re.sub(r"/\*[\s\S]*?\*/", "", sql_text)
167
- sql_text = re.sub(r"--.*?\n", "", sql_text)
168
-
169
- # Remover DELIMITER y statements SET típicos
170
- sql_text = re.sub(r"DELIMITER\s+.+", "", sql_text)
171
- sql_text = re.sub(r"SET\s+[^;]+;", "", sql_text)
172
-
173
- # Quitar ENGINE, ROW_FORMAT, AUTO_INCREMENT
174
- sql_text = re.sub(r"ENGINE=\w+", "", sql_text)
175
- sql_text = re.sub(r"ROW_FORMAT=\w+", "", sql_text)
176
- sql_text = re.sub(r"AUTO_INCREMENT=\d+", "", sql_text)
177
-
178
- # Quitar COLLATE y CHARSET
179
- sql_text = re.sub(r"DEFAULT CHARSET=\w+", "", sql_text)
180
- sql_text = re.sub(r"CHARACTER SET \w+", "", sql_text)
181
- sql_text = re.sub(r"COLLATE \w+", "", sql_text)
182
-
183
- # Quitar dominios y tipos personalizados (PostgreSQL)
184
- sql_text = re.sub(r"CREATE\s+DOMAIN[\s\S]+?;", "", sql_text, flags=re.IGNORECASE)
185
- sql_text = re.sub(r"CREATE\s+TYPE[\s\S]+?;", "", sql_text, flags=re.IGNORECASE)
186
-
187
- # Reemplazar backticks por nada (MySQL → SQLite)
188
- sql_text = sql_text.replace("`", "")
189
 
190
- # ======================================================
191
- # 2) Dividir en statements individuales
192
- # ======================================================
193
- raw_statements = sql_text.split(";")
194
-
195
- # Tablas para ejecutar CREATE TABLE sin foreign keys primero
196
- create_tables = []
197
- foreign_keys = []
198
- inserts = []
199
- others = []
200
-
201
- for st in raw_statements:
202
- stmt = st.strip()
203
- if not stmt:
204
- continue
205
-
206
- upper = stmt.upper()
207
-
208
- if upper.startswith("CREATE TABLE"):
209
- # separar claves foráneas
210
- if "FOREIGN KEY" in upper:
211
- fixed = []
212
- fk_lines = []
213
-
214
- for line in stmt.split("\n"):
215
- if "FOREIGN KEY" in line.upper():
216
- fk_lines.append(line.strip().rstrip(","))
217
- else:
218
- fixed.append(line)
219
-
220
- table_sql = "\n".join(fixed)
221
- create_tables.append(table_sql)
222
- foreign_keys.append((extract_table_name(stmt), fk_lines))
223
-
224
- else:
225
- create_tables.append(stmt)
226
-
227
- elif upper.startswith("INSERT INTO"):
228
- inserts.append(stmt)
229
 
230
- else:
231
- others.append(stmt)
232
-
233
- # ======================================================
234
- # 3) Convertir tipos MySQL → SQLite
235
- # ======================================================
236
-
237
- def convert_types(sql: str) -> str:
238
- sql = re.sub(r"\bTINYINT\(1\)\b", "INTEGER", sql)
239
- sql = re.sub(r"\bINT\b|\bINTEGER\b", "INTEGER", sql)
240
- sql = re.sub(r"\bBIGINT\b", "INTEGER", sql)
241
- sql = re.sub(r"\bDECIMAL\([0-9,]+\)", "REAL", sql)
242
- sql = re.sub(r"\bDOUBLE\b|\bFLOAT\b", "REAL", sql)
243
- sql = re.sub(r"\bDATETIME\b|\bTIMESTAMP\b", "TEXT", sql)
244
- sql = re.sub(r"\bVARCHAR\([0-9]+\)", "TEXT", sql)
245
- sql = re.sub(r"\bCHAR\([0-9]+\)", "TEXT", sql)
246
- sql = re.sub(r"\bTEXT\b", "TEXT", sql)
247
- sql = re.sub(r"\bUNSIGNED\b", "", sql)
248
- return sql
249
-
250
- create_tables = [convert_types(c) for c in create_tables]
251
- inserts = [convert_types(i) for i in inserts]
252
-
253
- # ======================================================
254
- # 4) Ejecutar en orden
255
- # ======================================================
256
- conn = sqlite3.connect(db_path)
257
- cur = conn.cursor()
258
-
259
- cur.execute("PRAGMA foreign_keys = OFF;")
260
-
261
- for ct in create_tables:
262
  try:
263
- cur.executescript(ct + ";")
264
- except Exception as e:
265
- print("Error CREATE TABLE:", e)
266
- print("SQL:", ct)
267
-
268
- for ins in inserts:
 
 
 
 
 
269
  try:
270
- cur.executescript(ins + ";")
271
- except Exception as e:
272
- print("Error INSERT:", e)
273
- print("SQL:", ins)
274
-
275
- # ======================================================
276
- # 5) Reconstruir claves foráneas manualmente
277
- # ======================================================
278
-
279
- for table, fks in foreign_keys:
280
- for fk in fks:
281
- try:
282
- add_foreign_key_sqlite(conn, table, fk)
283
- except Exception as e:
284
- print("Error agregando FK:", e, " → ", fk)
285
-
286
- cur.execute("PRAGMA foreign_keys = ON;")
287
- conn.commit()
288
-
289
- # ======================================================
290
- # 6) Validar que realmente se crearon tablas
291
- # ======================================================
292
- cur.execute("SELECT COUNT(*) FROM sqlite_master WHERE type='table';")
293
- n_tables = cur.fetchone()[0]
294
- conn.close()
295
-
296
- if n_tables == 0:
297
- raise ValueError(
298
- "No se creó ninguna tabla desde el dump SQL. "
299
- "Probablemente es un dump de otro motor (por ejemplo PostgreSQL) "
300
- "o usa sintaxis no compatible con SQLite."
301
- )
302
-
303
 
304
- def extract_table_name(create_stmt: str) -> str:
305
- m = re.search(r"CREATE TABLE\s+(\w+)", create_stmt, re.IGNORECASE)
306
- return m.group(1) if m else "unknown"
307
 
 
308
 
309
- def add_foreign_key_sqlite(conn, table: str, fk_line: str):
310
- """
311
- Reconstrucción automática:
312
- - Lee schema actual
313
- - Añade FK en nueva versión
314
- - Copia datos
315
- """
316
- cur = conn.cursor()
317
 
318
- cur.execute(f"SELECT sql FROM sqlite_master WHERE type='table' AND name='{table}';")
319
- result = cur.fetchone()
320
- if not result:
321
- return
322
 
323
- original_sql = result[0]
324
- new_sql = original_sql.rstrip(")") + f", {fk_line} )"
325
 
326
- cur.execute(f"ALTER TABLE {table} RENAME TO _old_{table};")
327
- cur.execute(new_sql)
328
- cur.execute(f"INSERT INTO {table} SELECT * FROM _old_{table};")
329
- cur.execute(f"DROP TABLE _old_{table};")
330
 
331
- conn.commit()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
332
 
333
 
334
- def import_csv_to_sqlite(db_path: str, csv_bytes: bytes, table_name: str) -> None:
335
- """
336
- Crea una tabla en SQLite con columnas TEXT y carga datos desde un CSV.
337
- """
338
- table = _sanitize_identifier(table_name or "data")
339
- conn = sqlite3.connect(db_path)
340
  try:
341
- f = io.StringIO(csv_bytes.decode("utf-8", errors="ignore"))
342
- reader = csv.reader(f)
343
- rows = list(reader)
344
-
345
- if not rows:
346
- return
347
-
348
- header = rows[0]
349
- cols = [_sanitize_identifier(c or f"col_{i}") for i, c in enumerate(header)]
350
-
351
- col_defs = ", ".join(f'"{c}" TEXT' for c in cols)
352
- conn.execute(f'CREATE TABLE IF NOT EXISTS "{table}" ({col_defs});')
353
-
354
- placeholders = ", ".join(["?"] * len(cols))
355
- for row in rows[1:]:
356
- row = list(row) + [""] * (len(cols) - len(row))
357
- row = row[:len(cols)]
358
- conn.execute(
359
- f'INSERT INTO "{table}" ({", ".join(cols)}) VALUES ({placeholders})',
360
- row,
361
- )
362
-
363
  conn.commit()
 
364
  finally:
365
  conn.close()
366
 
367
 
368
- def import_zip_of_csvs_to_sqlite(db_path: str, zip_bytes: bytes) -> None:
369
  """
370
- Para un ZIP con múltiples CSV: cada CSV se vuelve una tabla.
371
- (Se mantiene por compatibilidad, aunque ahora manejamos ZIPs
372
- más generales en /upload.)
373
  """
374
- conn = sqlite3.connect(db_path)
375
- conn.close()
376
 
377
- with zipfile.ZipFile(io.BytesIO(zip_bytes)) as zf:
378
- for name in zf.namelist():
379
- if not name.lower().endswith(".csv"):
380
- continue
381
- with zf.open(name) as f:
382
- csv_bytes = f.read()
383
- base_name = os.path.basename(name)
384
- table_name = os.path.splitext(base_name)[0]
385
- import_csv_to_sqlite(db_path, csv_bytes, table_name)
386
 
 
 
387
 
388
- # ======================================================
389
- # 4) Introspección de esquema y ejecución (sobre SQLite)
390
- # ======================================================
 
391
 
392
- def introspect_sqlite_schema(db_path: str) -> Dict[str, Any]:
393
- if not os.path.exists(db_path):
394
- raise FileNotFoundError(f"SQLite no encontrado: {db_path}")
395
 
396
- conn = sqlite3.connect(db_path)
397
- cur = conn.cursor()
398
 
399
- cur.execute("SELECT name FROM sqlite_master WHERE type='table';")
400
- tables = [row[0] for row in cur.fetchall()]
 
401
 
402
- tables_info = {}
403
- foreign_keys = []
404
- parts = []
 
405
 
406
- for t in tables:
407
- cur.execute(f"PRAGMA table_info('{t}');")
408
- rows = cur.fetchall()
409
- cols = [r[1] for r in rows]
410
- tables_info[t] = {"columns": cols}
411
 
412
- cur.execute(f"PRAGMA foreign_key_list('{t}');")
413
- fks = cur.fetchall()
414
- for (id, seq, table, from_col, to_col, on_update, on_delete, match) in fks:
415
- foreign_keys.append({
416
- "from_table": t,
417
- "from_column": from_col,
418
- "to_table": table,
419
- "to_column": to_col
420
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
421
 
422
- parts.append(f"{t}(" + ", ".join(cols) + ")")
 
423
 
424
- conn.close()
425
  schema_str = " ; ".join(parts) if parts else "(empty_schema)"
426
 
427
  return {
428
  "tables": tables_info,
429
- "foreign_keys": foreign_keys,
430
- "schema_str": schema_str
431
  }
432
 
433
 
434
- def execute_sqlite(db_path: str, sql: str) -> Dict[str, Any]:
435
- forbidden = ["drop ", "delete ", "update ", "insert ", "alter ", "replace "]
 
 
 
 
 
 
 
 
436
  sql_low = sql.lower()
437
  if any(f in sql_low for f in forbidden):
438
  return {
439
  "ok": False,
440
- "error": "Query bloqueada por seguridad (operación destructiva).",
441
  "rows": None,
442
- "columns": []
443
  }
444
 
445
  try:
446
- conn = sqlite3.connect(db_path)
447
- cur = conn.cursor()
448
- cur.execute(sql)
449
- rows = cur.fetchall()
450
- col_names = [desc[0] for desc in cur.description] if cur.description else []
451
- conn.close()
452
- return {"ok": True, "error": None, "rows": rows, "columns": col_names}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
453
  except Exception as e:
454
  return {"ok": False, "error": str(e), "rows": None, "columns": []}
455
 
456
 
457
  # ======================================================
458
- # 4.1) SQL REPAIR LAYER (avanzado)
459
  # ======================================================
460
 
461
  def _normalize_name_for_match(name: str) -> str:
462
  s = name.lower()
463
- s = s.replace('"', '').replace("`", "")
464
  s = s.replace("_", "")
465
  if s.endswith("s") and len(s) > 3:
466
  s = s[:-1]
@@ -489,11 +430,9 @@ def _build_schema_indexes(tables_info: Dict[str, Dict[str, List[str]]]) -> Dict[
489
  def _best_match_name(missing: str, index: Dict[str, List[str]]) -> Optional[str]:
490
  if not index:
491
  return None
492
-
493
  key = _normalize_name_for_match(missing)
494
  if key in index and index[key]:
495
  return index[key][0]
496
-
497
  candidates = difflib.get_close_matches(key, list(index.keys()), n=1, cutoff=0.7)
498
  if not candidates:
499
  return None
@@ -537,11 +476,11 @@ def try_repair_sql(sql: str, error: str, schema_meta: Dict[str, Any]) -> Optiona
537
  missing_table = None
538
  missing_column = None
539
 
540
- m_t = re.search(r"no such table: ([\w\.]+)", error)
541
  if m_t:
542
  missing_table = m_t.group(1)
543
 
544
- m_c = re.search(r"no such column: ([\w\.]+)", error)
545
  if m_c:
546
  missing_column = m_c.group(1)
547
 
@@ -583,7 +522,7 @@ def try_repair_sql(sql: str, error: str, schema_meta: Dict[str, Any]) -> Optiona
583
 
584
 
585
  # ======================================================
586
- # 5) Construcción de prompt y NL→SQL + re-ranking
587
  # ======================================================
588
 
589
  def build_prompt(question_en: str, db_id: str, schema_str: str) -> str:
@@ -595,11 +534,8 @@ def build_prompt(question_en: str, db_id: str, schema_str: str) -> str:
595
 
596
 
597
  def nl2sql_with_rerank(question: str, conn_id: str) -> Dict[str, Any]:
598
- if conn_id not in DB_REGISTRY:
599
- raise HTTPException(status_code=404, detail=f"connection_id '{conn_id}' no registrado")
600
-
601
- db_path = DB_REGISTRY[conn_id]["db_path"]
602
- meta = introspect_sqlite_schema(db_path)
603
  schema_str = meta["schema_str"]
604
 
605
  detected = detect_language(question)
@@ -646,19 +582,19 @@ def nl2sql_with_rerank(question: str, conn_id: str) -> Dict[str, Any]:
646
  "raw_sql_model": raw_sql,
647
  }
648
 
649
- exec_info = execute_sqlite(db_path, raw_sql)
650
 
651
  if (not exec_info["ok"]) and (
652
- "no such table" in (exec_info["error"] or "")
653
- or "no such column" in (exec_info["error"] or "")
654
  ):
655
  current_sql = raw_sql
656
  last_error = exec_info["error"]
657
  for step in range(1, 4):
658
- repaired_sql = try_repair_sql(current_sql, last_error, meta)
659
  if not repaired_sql or repaired_sql == current_sql:
660
  break
661
- exec_info2 = execute_sqlite(db_path, repaired_sql)
662
  cand["repaired_from"] = current_sql if cand["repaired_from"] is None else cand["repaired_from"]
663
  cand["repair_note"] = f"auto-repair (table/column name, step {step})"
664
  cand["sql"] = repaired_sql
@@ -705,29 +641,33 @@ def nl2sql_with_rerank(question: str, conn_id: str) -> Dict[str, Any]:
705
 
706
 
707
  # ======================================================
708
- # 6) Schemas Pydantic
709
  # ======================================================
710
 
711
  class UploadResponse(BaseModel):
712
  connection_id: str
713
  label: str
714
- db_path: str
 
715
  note: Optional[str] = None
716
 
717
 
718
  class ConnectionInfo(BaseModel):
719
  connection_id: str
720
  label: str
 
721
 
722
 
723
  class SchemaResponse(BaseModel):
724
  connection_id: str
 
725
  schema_summary: str
726
  tables: Dict[str, Dict[str, List[str]]]
727
 
728
 
729
  class PreviewResponse(BaseModel):
730
  connection_id: str
 
731
  table: str
732
  columns: List[str]
733
  rows: List[List[Any]]
@@ -758,24 +698,27 @@ class SpeechInferResponse(BaseModel):
758
 
759
 
760
  # ======================================================
761
- # 7) Endpoints FastAPI
762
  # ======================================================
763
 
764
  @app.on_event("startup")
765
  async def startup_event():
766
  load_nl2sql_model()
767
- print(f"✅ Backend NL2SQL inicializado. MODEL_DIR={MODEL_DIR}, UPLOAD_DIR={UPLOAD_DIR}")
768
 
769
 
770
- @app.post("/upload", response_model=UploadResponse)
771
- async def upload_database(db_file: UploadFile = File(...)):
 
 
 
 
772
  """
773
- Subida universal de BD.
774
- El usuario puede subir:
775
- - .sqlite / .db se usa tal cual
776
- - .sql dump MySQL/SQLite (best effort convertido a SQLite)
777
- - .csv se crea una BD SQLite y una tabla
778
- - .zip → puede contener .sqlite/.db, .sql o .csv (se detecta automáticamente)
779
  """
780
  filename = db_file.filename
781
  if not filename:
@@ -784,168 +727,98 @@ async def upload_database(db_file: UploadFile = File(...)):
784
  fname_lower = filename.lower()
785
  contents = await db_file.read()
786
 
787
- note: Optional[str] = None
788
- conn_id: Optional[str] = None
789
-
790
- # Caso 1: SQLite nativa
791
- if fname_lower.endswith(".sqlite") or fname_lower.endswith(".db"):
792
- conn_id = f"db_{uuid.uuid4().hex[:8]}"
793
- dst_path = os.path.join(UPLOAD_DIR, f"{conn_id}.sqlite")
794
- with open(dst_path, "wb") as f:
795
- f.write(contents)
796
- DB_REGISTRY[conn_id] = {"db_path": dst_path, "label": filename}
797
- note = "SQLite file stored as-is."
798
-
799
- # Caso 2: dump .sql
800
- elif fname_lower.endswith(".sql"):
801
- conn_id = create_empty_sqlite_db(label=filename)
802
- db_path = DB_REGISTRY[conn_id]["db_path"]
803
- sql_text = contents.decode("utf-8", errors="ignore")
804
 
805
- try:
806
- import_sql_dump_to_sqlite(db_path, sql_text)
807
- note = "SQL dump imported into SQLite (best effort)."
808
- except Exception as e:
809
- # Limpiar registro porque la BD quedó inútil
810
- try:
811
- if os.path.exists(db_path):
812
- os.remove(db_path)
813
- except Exception:
814
- pass
815
- DB_REGISTRY.pop(conn_id, None)
816
 
 
 
 
 
 
 
817
  raise HTTPException(
818
  status_code=400,
819
  detail=(
820
- "No se pudo convertir este archivo .sql a una base de datos SQLite utilizable. "
821
- "Es probable que sea un dump de otro motor (por ejemplo PostgreSQL) "
822
- "o que contenga sintaxis avanzada no compatible con SQLite.\n"
823
- f"Detalle técnico: {e}"
824
  ),
825
  )
826
 
827
- # Caso 3: CSV simple
828
- elif fname_lower.endswith(".csv"):
829
- conn_id = create_empty_sqlite_db(label=filename)
830
- db_path = DB_REGISTRY[conn_id]["db_path"]
831
- table_name = os.path.splitext(os.path.basename(filename))[0]
832
- import_csv_to_sqlite(db_path, contents, table_name)
833
- note = "CSV imported into a single SQLite table."
834
-
835
- # Caso 4: ZIP universal
836
- elif fname_lower.endswith(".zip"):
837
- try:
838
- with zipfile.ZipFile(io.BytesIO(contents)) as zf:
839
- names = [info.filename for info in zf.infolist() if not info.is_dir()]
840
-
841
- sqlite_names = [n for n in names if n.lower().endswith((".sqlite", ".db"))]
842
- sql_names = [n for n in names if n.lower().endswith(".sql")]
843
- csv_names = [n for n in names if n.lower().endswith(".csv")]
844
-
845
- # 4.1: si el ZIP trae una BD SQLite nativa
846
- if sqlite_names:
847
- inner = sqlite_names[0]
848
- conn_id = f"db_{uuid.uuid4().hex[:8]}"
849
- dst_path = os.path.join(UPLOAD_DIR, f"{conn_id}.sqlite")
850
- with zf.open(inner) as src, open(dst_path, "wb") as dst:
851
- shutil.copyfileobj(src, dst)
852
- DB_REGISTRY[conn_id] = {
853
- "db_path": dst_path,
854
- "label": f"{filename}::{os.path.basename(inner)}",
855
- }
856
- note = "SQLite database extracted from ZIP."
857
-
858
- # 4.2: dumps SQL (uno o varios)
859
- elif sql_names:
860
- conn_id = create_empty_sqlite_db(label=filename)
861
- db_path = DB_REGISTRY[conn_id]["db_path"]
862
-
863
- if len(sql_names) == 1:
864
- with zf.open(sql_names[0]) as f:
865
- sql_text = f.read().decode("utf-8", errors="ignore")
866
- else:
867
- parts = []
868
- for n in sorted(sql_names):
869
- with zf.open(n) as f:
870
- parts.append(f"-- FILE: {n}\n")
871
- parts.append(f.read().decode("utf-8", errors="ignore"))
872
- sql_text = "\n\n".join(parts)
873
-
874
- try:
875
- import_sql_dump_to_sqlite(db_path, sql_text)
876
- note = "SQL dump(s) from ZIP imported into SQLite."
877
- except Exception as e:
878
- try:
879
- if os.path.exists(db_path):
880
- os.remove(db_path)
881
- except Exception:
882
- pass
883
- DB_REGISTRY.pop(conn_id, None)
884
-
885
- raise HTTPException(
886
- status_code=400,
887
- detail=(
888
- "No se pudo convertir los archivos .sql dentro del ZIP "
889
- "a una base de datos SQLite utilizable. "
890
- "Es probable que sean dumps de otro motor (por ejemplo PostgreSQL) "
891
- "o que contengan sintaxis avanzada no compatible con SQLite.\n"
892
- f"Detalle técnico: {e}"
893
- ),
894
- )
895
-
896
- # 4.3: solo CSVs
897
- elif csv_names:
898
- conn_id = create_empty_sqlite_db(label=filename)
899
- db_path = DB_REGISTRY[conn_id]["db_path"]
900
-
901
- for name in csv_names:
902
- with zf.open(name) as f:
903
- csv_bytes = f.read()
904
- table_name = os.path.splitext(os.path.basename(name))[0]
905
- import_csv_to_sqlite(db_path, csv_bytes, table_name)
906
-
907
- note = "CSV files from ZIP imported into SQLite (one table per CSV)."
908
-
909
- else:
910
- raise HTTPException(
911
- status_code=400,
912
- detail="El ZIP no contiene archivos .sqlite/.db/.sql/.csv utilizables.",
913
- )
914
-
915
- except zipfile.BadZipFile:
916
- raise HTTPException(status_code=400, detail="Archivo ZIP inválido o corrupto.")
917
 
918
- else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
919
  raise HTTPException(
920
  status_code=400,
921
- detail="Formato no soportado. Usa: .sqlite, .db, .sql, .csv o .zip",
 
 
 
922
  )
923
 
 
 
 
924
  return UploadResponse(
925
  connection_id=conn_id,
926
- label=DB_REGISTRY[conn_id]["label"],
927
- db_path=DB_REGISTRY[conn_id]["db_path"],
 
928
  note=note,
929
  )
930
 
931
 
932
  @app.get("/connections", response_model=List[ConnectionInfo])
933
  async def list_connections():
934
- out = []
935
  for cid, info in DB_REGISTRY.items():
936
- out.append(ConnectionInfo(connection_id=cid, label=info["label"]))
937
  return out
938
 
939
 
940
  @app.get("/schema/{connection_id}", response_model=SchemaResponse)
941
  async def get_schema(connection_id: str):
942
- if connection_id not in DB_REGISTRY:
943
- raise HTTPException(status_code=404, detail="connection_id no encontrado")
944
-
945
- db_path = DB_REGISTRY[connection_id]["db_path"]
946
- meta = introspect_sqlite_schema(db_path)
947
  return SchemaResponse(
948
  connection_id=connection_id,
 
949
  schema_summary=meta["schema_str"],
950
  tables=meta["tables"],
951
  )
@@ -953,22 +826,38 @@ async def get_schema(connection_id: str):
953
 
954
  @app.get("/preview/{connection_id}/{table}", response_model=PreviewResponse)
955
  async def preview_table(connection_id: str, table: str, limit: int = 20):
956
- if connection_id not in DB_REGISTRY:
957
- raise HTTPException(status_code=404, detail="connection_id no encontrado")
 
958
 
959
- db_path = DB_REGISTRY[connection_id]["db_path"]
960
  try:
961
- conn = sqlite3.connect(db_path)
962
- cur = conn.cursor()
963
- cur.execute(f'SELECT * FROM "{table}" LIMIT {int(limit)};')
964
- rows = cur.fetchall()
965
- cols = [d[0] for d in cur.description] if cur.description else []
966
- conn.close()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
967
  except Exception as e:
968
  raise HTTPException(status_code=400, detail=f"Error al leer tabla '{table}': {e}")
969
 
970
  return PreviewResponse(
971
  connection_id=connection_id,
 
972
  table=table,
973
  columns=cols,
974
  rows=[list(r) for r in rows],
@@ -984,12 +873,12 @@ async def infer_sql(req: InferRequest):
984
  @app.post("/speech-infer", response_model=SpeechInferResponse)
985
  async def speech_infer(
986
  connection_id: str = Form(...),
987
- audio: UploadFile = File(...)
988
  ):
989
  if openai_client is None:
990
  raise HTTPException(
991
  status_code=500,
992
- detail="OPENAI_API_KEY no está configurado en el backend."
993
  )
994
 
995
  if audio.content_type is None:
@@ -1027,6 +916,7 @@ async def health():
1027
  "status": "ok",
1028
  "model_loaded": t5_model is not None,
1029
  "connections": len(DB_REGISTRY),
 
1030
  "device": str(DEVICE),
1031
  }
1032
 
@@ -1034,13 +924,13 @@ async def health():
1034
  @app.get("/")
1035
  async def root():
1036
  return {
1037
- "message": "NL2SQL T5-large universal backend is running (single-file SQLite engine).",
1038
  "endpoints": [
1039
- "POST /upload (subir .sqlite / .db / .sql / .csv / .zip)",
1040
- "GET /connections (listar BDs subidas)",
1041
- "GET /schema/{id} (esquema resumido)",
1042
  "GET /preview/{id}/{t} (preview de tabla)",
1043
- "POST /infer (NL→SQL + ejecución)",
1044
  "POST /speech-infer (NL por voz → SQL + ejecución)",
1045
  "GET /health (estado del backend)",
1046
  "GET /docs (OpenAPI UI)",
 
1
  import os
2
  import uuid
 
3
  import io
 
4
  import zipfile
5
  import re
6
  import difflib
7
  import tempfile
 
8
  from typing import List, Optional, Dict, Any
9
 
10
  from fastapi import FastAPI, UploadFile, File, HTTPException, Form
 
17
  from transformers import MarianMTModel, MarianTokenizer
18
  from openai import OpenAI
19
 
20
+ import psycopg2
21
+ import mysql.connector
22
+
23
  # ======================================================
24
  # 0) Configuración general
25
  # ======================================================
 
28
  MODEL_DIR = os.getenv("MODEL_DIR", "stvnnnnnn/t5-large-nl2sql-spider")
29
  DEVICE = torch.device("cpu") # inferencia en CPU
30
 
31
+ # === Motores reales: variables de entorno ===
32
+ # PostgreSQL: usaremos UN solo DB (POSTGRES_DB) y un schema por conexión lógica
33
+ POSTGRES_HOST = os.getenv("POSTGRES_HOST", "localhost")
34
+ POSTGRES_PORT = int(os.getenv("POSTGRES_PORT", "5432"))
35
+ POSTGRES_USER = os.getenv("POSTGRES_USER", "postgres")
36
+ POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD", "postgres")
37
+ POSTGRES_DB = os.getenv("POSTGRES_DB", "postgres")
38
+
39
+ # MySQL: crearemos una base de datos por conexión lógica
40
+ MYSQL_HOST = os.getenv("MYSQL_HOST", "localhost")
41
+ MYSQL_PORT = int(os.getenv("MYSQL_PORT", "3306"))
42
+ MYSQL_USER = os.getenv("MYSQL_USER", "root")
43
+ MYSQL_PASSWORD = os.getenv("MYSQL_PASSWORD", "root")
44
+
45
+ # Registro en memoria de conexiones:
46
+ # { conn_id: { "engine": "postgresql"|"mysql", "namespace": str, "label": str } }
47
+ # - engine = motor real
48
+ # - namespace = schema (Postgres) o database (MySQL) donde vive esa BD
49
  DB_REGISTRY: Dict[str, Dict[str, Any]] = {}
50
 
51
  # Cliente OpenAI para transcripción de audio (Whisper / gpt-4o-transcribe)
 
59
  # ======================================================
60
 
61
  app = FastAPI(
62
+ title="NL2SQL T5-large Backend (MySQL/PostgreSQL)",
63
  description=(
64
  "Intérprete NL→SQL (T5-large Spider) para usuarios no expertos. "
65
+ "El usuario sube dumps .sql / .zip y se cargan en motores reales "
66
+ "(MySQL/PostgreSQL)."
67
  ),
68
+ version="2.0.0",
69
  )
70
 
71
  app.add_middleware(
 
137
 
138
 
139
  # ======================================================
140
+ # 3) Conexiones a motores reales y helpers
141
  # ======================================================
142
 
143
+ def get_pg_conn():
144
+ return psycopg2.connect(
145
+ host=POSTGRES_HOST,
146
+ port=POSTGRES_PORT,
147
+ user=POSTGRES_USER,
148
+ password=POSTGRES_PASSWORD,
149
+ dbname=POSTGRES_DB,
150
+ )
 
151
 
152
 
153
+ def get_mysql_conn(db_name: Optional[str] = None):
154
+ params = dict(
155
+ host=MYSQL_HOST,
156
+ port=MYSQL_PORT,
157
+ user=MYSQL_USER,
158
+ password=MYSQL_PASSWORD,
159
+ )
160
+ if db_name:
161
+ params["database"] = db_name
162
+ return mysql.connector.connect(**params)
163
 
164
 
165
+ def detect_sql_dialect(sql_text: str) -> str:
166
  """
167
+ Heurística simple:
168
+ - Si ve ENGINE, AUTO_INCREMENT, backticks, etc. MySQL
169
+ - Si ve SERIAL, search_path, ::, PL/pgSQL, etc. → PostgreSQL
170
  """
171
+ text = sql_text.lower()
172
+ if any(kw in text for kw in ["engine=", "auto_increment", "unsigned", " collate ", " character set ", "`"]):
173
+ return "mysql"
174
+ if any(kw in text for kw in ["serial", " set search_path", "copy ", "::", "language plpgsql"]):
175
+ return "postgresql"
176
+ return "unknown"
177
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
+ def create_logical_db(engine: str, label: str) -> str:
180
+ """
181
+ Crea una "BD lógica":
182
+ - En PostgreSQL: un SCHEMA nuevo dentro de POSTGRES_DB
183
+ - En MySQL: una DATABASE nueva
184
+ """
185
+ conn_id = f"db_{uuid.uuid4().hex[:8]}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
 
187
+ if engine == "postgresql":
188
+ schema_name = conn_id
189
+ conn = get_pg_conn()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  try:
191
+ conn.autocommit = True
192
+ cur = conn.cursor()
193
+ cur.execute(f'CREATE SCHEMA IF NOT EXISTS "{schema_name}";')
194
+ cur.close()
195
+ finally:
196
+ conn.close()
197
+ DB_REGISTRY[conn_id] = {"engine": "postgresql", "namespace": schema_name, "label": label}
198
+
199
+ elif engine == "mysql":
200
+ db_name = conn_id
201
+ conn = get_mysql_conn()
202
  try:
203
+ cur = conn.cursor()
204
+ cur.execute(f"CREATE DATABASE IF NOT EXISTS `{db_name}`;")
205
+ conn.commit()
206
+ cur.close()
207
+ finally:
208
+ conn.close()
209
+ DB_REGISTRY[conn_id] = {"engine": "mysql", "namespace": db_name, "label": label}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
 
211
+ else:
212
+ raise ValueError(f"Engine no soportado: {engine}")
 
213
 
214
+ return conn_id
215
 
 
 
 
 
 
 
 
 
216
 
217
+ def ensure_connection(conn_id: str) -> Dict[str, Any]:
218
+ if conn_id not in DB_REGISTRY:
219
+ raise HTTPException(status_code=404, detail=f"connection_id '{conn_id}' no registrado")
220
+ return DB_REGISTRY[conn_id]
221
 
 
 
222
 
223
+ # ======================================================
224
+ # 4) Carga de scripts SQL (schema.sql / data.sql / ZIP)
225
+ # ======================================================
 
226
 
227
+ def _execute_sql_script_postgres(schema: str, sql_text: str) -> None:
228
+ conn = get_pg_conn()
229
+ try:
230
+ conn.autocommit = False
231
+ cur = conn.cursor()
232
+ # Trabajamos siempre dentro del schema lógico
233
+ cur.execute(f'SET search_path TO "{schema}";')
234
+ # Ejecución simple por ';' (suficiente para Sakila/Pagila)
235
+ parts = sql_text.split(";")
236
+ for stmt in parts:
237
+ s = stmt.strip()
238
+ if not s:
239
+ continue
240
+ cur.execute(s + ";")
241
+ conn.commit()
242
+ cur.close()
243
+ finally:
244
+ conn.close()
245
 
246
 
247
+ def _execute_sql_script_mysql(db_name: str, sql_text: str) -> None:
248
+ conn = get_mysql_conn()
 
 
 
 
249
  try:
250
+ cur = conn.cursor()
251
+ cur.execute(f"USE `{db_name}`;")
252
+ # mysql-connector permite multi=True
253
+ for _ in cur.execute(sql_text, multi=True):
254
+ pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  conn.commit()
256
+ cur.close()
257
  finally:
258
  conn.close()
259
 
260
 
261
+ def load_sql_into_connection(sql_text: str, engine: str, conn_id: Optional[str], label: str) -> str:
262
  """
263
+ - Si conn_id es None crea nueva BD lógica y carga el script.
264
+ - Si conn_id existe ejecuta el script encima (ej: primero schema.sql, luego data.sql).
 
265
  """
266
+ if not engine or engine not in ("mysql", "postgresql"):
267
+ raise HTTPException(status_code=400, detail="Engine inválido o no soportado (usa 'mysql' o 'postgresql').")
268
 
269
+ if conn_id is None:
270
+ conn_id = create_logical_db(engine, label)
271
+ else:
272
+ ensure_connection(conn_id)
 
 
 
 
 
273
 
274
+ info = DB_REGISTRY[conn_id]
275
+ namespace = info["namespace"]
276
 
277
+ if engine == "postgresql":
278
+ _execute_sql_script_postgres(namespace, sql_text)
279
+ else:
280
+ _execute_sql_script_mysql(namespace, sql_text)
281
 
282
+ return conn_id
 
 
283
 
 
 
284
 
285
+ # ======================================================
286
+ # 5) Introspección de esquema y ejecución (sobre motores reales)
287
+ # ======================================================
288
 
289
+ def introspect_schema(conn_id: str) -> Dict[str, Any]:
290
+ info = ensure_connection(conn_id)
291
+ engine = info["engine"]
292
+ ns = info["namespace"]
293
 
294
+ tables_info: Dict[str, Dict[str, List[str]]] = {}
295
+ parts: List[str] = []
 
 
 
296
 
297
+ if engine == "postgresql":
298
+ conn = get_pg_conn()
299
+ try:
300
+ cur = conn.cursor()
301
+ cur.execute(
302
+ """
303
+ SELECT table_name, column_name
304
+ FROM information_schema.columns
305
+ WHERE table_schema = %s
306
+ ORDER BY table_name, ordinal_position;
307
+ """,
308
+ (ns,),
309
+ )
310
+ rows = cur.fetchall()
311
+ cur.close()
312
+ finally:
313
+ conn.close()
314
+ for table, col in rows:
315
+ tables_info.setdefault(table, {"columns": []})
316
+ tables_info[table]["columns"].append(col)
317
+
318
+ else: # MySQL
319
+ conn = get_mysql_conn(ns)
320
+ try:
321
+ cur = conn.cursor()
322
+ cur.execute(
323
+ """
324
+ SELECT table_name, column_name
325
+ FROM information_schema.columns
326
+ WHERE table_schema = %s
327
+ ORDER BY table_name, ordinal_position;
328
+ """,
329
+ (ns,),
330
+ )
331
+ rows = cur.fetchall()
332
+ cur.close()
333
+ finally:
334
+ conn.close()
335
+ for table, col in rows:
336
+ tables_info.setdefault(table, {"columns": []})
337
+ tables_info[table]["columns"].append(col)
338
 
339
+ for t, info_t in tables_info.items():
340
+ parts.append(f"{t}(" + ", ".join(info_t["columns"]) + ")")
341
 
 
342
  schema_str = " ; ".join(parts) if parts else "(empty_schema)"
343
 
344
  return {
345
  "tables": tables_info,
346
+ "foreign_keys": [], # podrías enriquecer esto luego
347
+ "schema_str": schema_str,
348
  }
349
 
350
 
351
+ def execute_sql(conn_id: str, sql: str) -> Dict[str, Any]:
352
+ """
353
+ Ejecuta SOLO consultas de lectura (SELECT).
354
+ Bloquea operaciones destructivas por seguridad en el demo.
355
+ """
356
+ info = ensure_connection(conn_id)
357
+ engine = info["engine"]
358
+ ns = info["namespace"]
359
+
360
+ forbidden = ["drop ", "delete ", "update ", "insert ", "alter ", "replace ", "truncate ", "create "]
361
  sql_low = sql.lower()
362
  if any(f in sql_low for f in forbidden):
363
  return {
364
  "ok": False,
365
+ "error": "Query bloqueada por seguridad (operación potencialmente destructiva).",
366
  "rows": None,
367
+ "columns": [],
368
  }
369
 
370
  try:
371
+ if engine == "postgresql":
372
+ conn = get_pg_conn()
373
+ try:
374
+ cur = conn.cursor()
375
+ cur.execute(f'SET search_path TO "{ns}";')
376
+ cur.execute(sql)
377
+ rows = cur.fetchall()
378
+ cols = [desc[0] for desc in cur.description] if cur.description else []
379
+ cur.close()
380
+ finally:
381
+ conn.close()
382
+ else:
383
+ conn = get_mysql_conn(ns)
384
+ try:
385
+ cur = conn.cursor()
386
+ cur.execute(sql)
387
+ rows = cur.fetchall()
388
+ cols = [desc[0] for desc in cur.description] if cur.description else []
389
+ cur.close()
390
+ finally:
391
+ conn.close()
392
+
393
+ return {"ok": True, "error": None, "rows": rows, "columns": cols}
394
  except Exception as e:
395
  return {"ok": False, "error": str(e), "rows": None, "columns": []}
396
 
397
 
398
  # ======================================================
399
+ # 6) SQL REPAIR LAYER (igual que antes, pero agnóstico de motor)
400
  # ======================================================
401
 
402
  def _normalize_name_for_match(name: str) -> str:
403
  s = name.lower()
404
+ s = s.replace('"', "").replace("`", "")
405
  s = s.replace("_", "")
406
  if s.endswith("s") and len(s) > 3:
407
  s = s[:-1]
 
430
  def _best_match_name(missing: str, index: Dict[str, List[str]]) -> Optional[str]:
431
  if not index:
432
  return None
 
433
  key = _normalize_name_for_match(missing)
434
  if key in index and index[key]:
435
  return index[key][0]
 
436
  candidates = difflib.get_close_matches(key, list(index.keys()), n=1, cutoff=0.7)
437
  if not candidates:
438
  return None
 
476
  missing_table = None
477
  missing_column = None
478
 
479
+ m_t = re.search(r"no such table: ([\w\.]+)", error or "", re.IGNORECASE)
480
  if m_t:
481
  missing_table = m_t.group(1)
482
 
483
+ m_c = re.search(r"no such column: ([\w\.]+)", error or "", re.IGNORECASE)
484
  if m_c:
485
  missing_column = m_c.group(1)
486
 
 
522
 
523
 
524
  # ======================================================
525
+ # 7) Construcción de prompt y NL→SQL + re-ranking
526
  # ======================================================
527
 
528
  def build_prompt(question_en: str, db_id: str, schema_str: str) -> str:
 
534
 
535
 
536
  def nl2sql_with_rerank(question: str, conn_id: str) -> Dict[str, Any]:
537
+ ensure_connection(conn_id)
538
+ meta = introspect_schema(conn_id)
 
 
 
539
  schema_str = meta["schema_str"]
540
 
541
  detected = detect_language(question)
 
582
  "raw_sql_model": raw_sql,
583
  }
584
 
585
+ exec_info = execute_sql(conn_id, raw_sql)
586
 
587
  if (not exec_info["ok"]) and (
588
+ "no such table" in (exec_info["error"] or "").lower()
589
+ or "no such column" in (exec_info["error"] or "").lower()
590
  ):
591
  current_sql = raw_sql
592
  last_error = exec_info["error"]
593
  for step in range(1, 4):
594
+ repaired_sql = try_repair_sql(current_sql, last_error or "", meta)
595
  if not repaired_sql or repaired_sql == current_sql:
596
  break
597
+ exec_info2 = execute_sql(conn_id, repaired_sql)
598
  cand["repaired_from"] = current_sql if cand["repaired_from"] is None else cand["repaired_from"]
599
  cand["repair_note"] = f"auto-repair (table/column name, step {step})"
600
  cand["sql"] = repaired_sql
 
641
 
642
 
643
  # ======================================================
644
+ # 8) Schemas Pydantic
645
  # ======================================================
646
 
647
  class UploadResponse(BaseModel):
648
  connection_id: str
649
  label: str
650
+ engine: str
651
+ namespace: str # schema (PG) o database (MySQL)
652
  note: Optional[str] = None
653
 
654
 
655
  class ConnectionInfo(BaseModel):
656
  connection_id: str
657
  label: str
658
+ engine: str
659
 
660
 
661
  class SchemaResponse(BaseModel):
662
  connection_id: str
663
+ engine: str
664
  schema_summary: str
665
  tables: Dict[str, Dict[str, List[str]]]
666
 
667
 
668
  class PreviewResponse(BaseModel):
669
  connection_id: str
670
+ engine: str
671
  table: str
672
  columns: List[str]
673
  rows: List[List[Any]]
 
698
 
699
 
700
  # ======================================================
701
+ # 9) Endpoints FastAPI
702
  # ======================================================
703
 
704
  @app.on_event("startup")
705
  async def startup_event():
706
  load_nl2sql_model()
707
+ print("✅ Backend NL2SQL inicializado (motores MySQL/PostgreSQL).")
708
 
709
 
710
+ @app.post("/upload-sql", response_model=UploadResponse)
711
+ async def upload_sql(
712
+ db_file: UploadFile = File(...),
713
+ engine: Optional[str] = Form(None), # "mysql" | "postgresql" (opcional)
714
+ connection_id: Optional[str] = Form(None), # para el caso schema.sql + data.sql
715
+ ):
716
  """
717
+ Subida de dumps SQL.
718
+ - Acepta .sql directo o .zip con varios .sql
719
+ - Detecta motor automáticamente (MySQL/PostgreSQL) si engine no se especifica
720
+ - Si connection_id es None, crea una nueva BD lógica
721
+ - Si connection_id existe, ejecuta el SQL encima (ej: schema.sql luego data.sql)
 
722
  """
723
  filename = db_file.filename
724
  if not filename:
 
727
  fname_lower = filename.lower()
728
  contents = await db_file.read()
729
 
730
+ if not (fname_lower.endswith(".sql") or fname_lower.endswith(".zip")):
731
+ raise HTTPException(
732
+ status_code=400,
733
+ detail="Formato no soportado. Usa: .sql o .zip (con archivos .sql).",
734
+ )
 
 
 
 
 
 
 
 
 
 
 
 
735
 
736
+ sql_text = ""
737
+ note = None
 
 
 
 
 
 
 
 
 
738
 
739
+ # Caso: archivo .sql único
740
+ if fname_lower.endswith(".sql"):
741
+ sql_text = contents.decode("utf-8", errors="ignore")
742
+ detected = detect_sql_dialect(sql_text)
743
+ final_engine = engine or detected
744
+ if final_engine == "unknown":
745
  raise HTTPException(
746
  status_code=400,
747
  detail=(
748
+ "No se pudo detectar el motor SQL (MySQL/PostgreSQL). "
749
+ "Vuelve a subir el archivo indicando engine='mysql' o 'postgresql'."
 
 
750
  ),
751
  )
752
 
753
+ conn_id = load_sql_into_connection(sql_text, final_engine, connection_id, filename)
754
+ info = DB_REGISTRY[conn_id]
755
+ note = f"SQL ejecutado sobre motor {final_engine}."
756
+ return UploadResponse(
757
+ connection_id=conn_id,
758
+ label=info["label"],
759
+ engine=info["engine"],
760
+ namespace=info["namespace"],
761
+ note=note,
762
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
763
 
764
+ # Caso: ZIP con varios .sql (ej: schema.sql + data.sql, o muchos scripts)
765
+ try:
766
+ with zipfile.ZipFile(io.BytesIO(contents)) as zf:
767
+ sql_names = [n for n in zf.namelist() if n.lower().endswith(".sql")]
768
+ if not sql_names:
769
+ raise HTTPException(
770
+ status_code=400,
771
+ detail="El ZIP no contiene archivos .sql utilizables.",
772
+ )
773
+
774
+ combined_sql_parts = []
775
+ for name in sorted(sql_names):
776
+ with zf.open(name) as f:
777
+ combined_sql_parts.append(f"-- FILE: {name}\n")
778
+ combined_sql_parts.append(f.read().decode("utf-8", errors="ignore"))
779
+ sql_text = "\n\n".join(combined_sql_parts)
780
+
781
+ except zipfile.BadZipFile:
782
+ raise HTTPException(status_code=400, detail="Archivo ZIP inválido o corrupto.")
783
+
784
+ detected = detect_sql_dialect(sql_text)
785
+ final_engine = engine or detected
786
+ if final_engine == "unknown":
787
  raise HTTPException(
788
  status_code=400,
789
+ detail=(
790
+ "No se pudo detectar el motor SQL (MySQL/PostgreSQL) en el ZIP. "
791
+ "Vuelve a subir indicando engine='mysql' o 'postgresql'."
792
+ ),
793
  )
794
 
795
+ conn_id = load_sql_into_connection(sql_text, final_engine, connection_id, filename)
796
+ info = DB_REGISTRY[conn_id]
797
+ note = f"ZIP con scripts SQL ejecutado sobre motor {final_engine}."
798
  return UploadResponse(
799
  connection_id=conn_id,
800
+ label=info["label"],
801
+ engine=info["engine"],
802
+ namespace=info["namespace"],
803
  note=note,
804
  )
805
 
806
 
807
  @app.get("/connections", response_model=List[ConnectionInfo])
808
  async def list_connections():
809
+ out: List[ConnectionInfo] = []
810
  for cid, info in DB_REGISTRY.items():
811
+ out.append(ConnectionInfo(connection_id=cid, label=info["label"], engine=info["engine"]))
812
  return out
813
 
814
 
815
  @app.get("/schema/{connection_id}", response_model=SchemaResponse)
816
  async def get_schema(connection_id: str):
817
+ info = ensure_connection(connection_id)
818
+ meta = introspect_schema(connection_id)
 
 
 
819
  return SchemaResponse(
820
  connection_id=connection_id,
821
+ engine=info["engine"],
822
  schema_summary=meta["schema_str"],
823
  tables=meta["tables"],
824
  )
 
826
 
827
  @app.get("/preview/{connection_id}/{table}", response_model=PreviewResponse)
828
  async def preview_table(connection_id: str, table: str, limit: int = 20):
829
+ info = ensure_connection(connection_id)
830
+ engine = info["engine"]
831
+ ns = info["namespace"]
832
 
 
833
  try:
834
+ if engine == "postgresql":
835
+ conn = get_pg_conn()
836
+ try:
837
+ cur = conn.cursor()
838
+ cur.execute(f'SET search_path TO "{ns}";')
839
+ cur.execute(f'SELECT * FROM "{table}" LIMIT %s;', (int(limit),))
840
+ rows = cur.fetchall()
841
+ cols = [d[0] for d in cur.description] if cur.description else []
842
+ cur.close()
843
+ finally:
844
+ conn.close()
845
+ else:
846
+ conn = get_mysql_conn(ns)
847
+ try:
848
+ cur = conn.cursor()
849
+ cur.execute(f"SELECT * FROM `{table}` LIMIT %s;", (int(limit),))
850
+ rows = cur.fetchall()
851
+ cols = [d[0] for d in cur.description] if cur.description else []
852
+ cur.close()
853
+ finally:
854
+ conn.close()
855
  except Exception as e:
856
  raise HTTPException(status_code=400, detail=f"Error al leer tabla '{table}': {e}")
857
 
858
  return PreviewResponse(
859
  connection_id=connection_id,
860
+ engine=engine,
861
  table=table,
862
  columns=cols,
863
  rows=[list(r) for r in rows],
 
873
  @app.post("/speech-infer", response_model=SpeechInferResponse)
874
  async def speech_infer(
875
  connection_id: str = Form(...),
876
+ audio: UploadFile = File(...),
877
  ):
878
  if openai_client is None:
879
  raise HTTPException(
880
  status_code=500,
881
+ detail="OPENAI_API_KEY no está configurado en el backend.",
882
  )
883
 
884
  if audio.content_type is None:
 
916
  "status": "ok",
917
  "model_loaded": t5_model is not None,
918
  "connections": len(DB_REGISTRY),
919
+ "engines_in_use": list({info["engine"] for info in DB_REGISTRY.values()}),
920
  "device": str(DEVICE),
921
  }
922
 
 
924
  @app.get("/")
925
  async def root():
926
  return {
927
+ "message": "NL2SQL T5-large backend is running (MySQL/PostgreSQL engines).",
928
  "endpoints": [
929
+ "POST /upload-sql (subir .sql o .zip con .sql, engine auto o manual)",
930
+ "GET /connections (listar BDs lógicas)",
931
+ "GET /schema/{id} (esquema resumido desde motor real)",
932
  "GET /preview/{id}/{t} (preview de tabla)",
933
+ "POST /infer (NL→SQL + ejecución segura en el motor)",
934
  "POST /speech-infer (NL por voz → SQL + ejecución)",
935
  "GET /health (estado del backend)",
936
  "GET /docs (OpenAPI UI)",