Spaces:

AshenH
/

ALM_LLM

Sleeping

App Files Files Community

AshenH commited on Oct 14

Commit

3fbd26b

verified ·

1 Parent(s): f8cd124

Update tools/sql_tool.py

Browse files

Files changed (1) hide show

tools/sql_tool.py +113 -18

tools/sql_tool.py CHANGED Viewed

@@ -1,41 +1,133 @@
 # tools/sql_tool.py
 import os
 import re
-from typing import Optional, Tuple
 import duckdb
-# DuckDB connection file
 DUCKDB_PATH = os.getenv("DUCKDB_PATH", "alm.duckdb")
-# Fully qualified schema path confirmed from your server
-# my_db.main.masterdataset_v
-DEFAULT_DB      = os.getenv("SQL_DEFAULT_DB", "my_db")
-DEFAULT_SCHEMA  = os.getenv("SQL_DEFAULT_SCHEMA", "main")
-DEFAULT_TABLE   = os.getenv("SQL_DEFAULT_TABLE", "masterdataset_v")
-def _full_table(db: Optional[str] = None,
-                schema: Optional[str] = None,
-                table: Optional[str] = None) -> str:
-    """Return fully qualified <db>.<schema>.<table>"""
-    db = db or DEFAULT_DB
-    schema = schema or DEFAULT_SCHEMA
-    table = table or DEFAULT_TABLE
-    return f"{db}.{schema}.{table}"
 class SQLTool:
-    """Natural-language → SQL helper for DuckDB"""
     def __init__(self, db_path: Optional[str] = None):
         self.db_path = db_path or DUCKDB_PATH
         self.con = duckdb.connect(self.db_path)
-        self.full_table = _full_table()
     # ------------------------------------------------------------
     # Run SQL directly
     # ------------------------------------------------------------
-    def run_sql(self, sql: str):
         return self.con.execute(sql).df()
     # ------------------------------------------------------------
@@ -140,3 +232,6 @@ class SQLTool:
         sql, why = self._nl_to_sql(message)
         df = self.run_sql(sql)
         return df, sql, why

 # tools/sql_tool.py
 import os
 import re
+from typing import Optional, Tuple, List
 import duckdb
+import pandas as pd
+# ------------------------------------------------------------
+# Connection config
+# ------------------------------------------------------------
 DUCKDB_PATH = os.getenv("DUCKDB_PATH", "alm.duckdb")
+# If you need to attach a catalog (e.g., MotherDuck), put the full ATTACH here.
+# Example:
+#   DUCKDB_ATTACH_SQL=ATTACH 'md:my_db' AS my_db;
+DUCKDB_ATTACH_SQL = os.getenv("DUCKDB_ATTACH_SQL", "").strip()
+# Preferred identifiers (we will fall back automatically if they don't exist)
+PREF_CATALOG = os.getenv("SQL_DEFAULT_DB", "my_db")      # catalog (optional)
+PREF_SCHEMA  = os.getenv("SQL_DEFAULT_SCHEMA", "main")   # schema
+PREF_TABLE   = os.getenv("SQL_DEFAULT_TABLE", "masterdataset_v")  # table
 class SQLTool:
+    """
+    NL→SQL helper for DuckDB with:
+      - optional pre-attach SQL (DUCKDB_ATTACH_SQL)
+      - robust table path resolution (tries 3-part → 2-part → 1-part → information_schema scan)
+    """
     def __init__(self, db_path: Optional[str] = None):
         self.db_path = db_path or DUCKDB_PATH
         self.con = duckdb.connect(self.db_path)
+        # Optional: run user-supplied ATTACH (safe no-op if empty)
+        if DUCKDB_ATTACH_SQL:
+            try:
+                self.con.execute(DUCKDB_ATTACH_SQL)
+            except Exception as e:
+                # Don't crash the app on attach issues; we still try local tables
+                print(f"[WARN] DUCKDB_ATTACH_SQL failed: {e}")
+        self.full_table = self._resolve_full_table(PREF_CATALOG, PREF_SCHEMA, PREF_TABLE)
+    # ------------------------------------------------------------
+    # Resolution helpers
+    # ------------------------------------------------------------
+    def _try_probe(self, path: str) -> bool:
+        """Return True if SELECT * FROM <path> LIMIT 1 succeeds."""
+        try:
+            self.con.execute(f"SELECT * FROM {path} LIMIT 1")
+            return True
+        except Exception:
+            return False
+    def _scan_information_schema(self, table_name: str) -> Optional[str]:
+        """
+        Look for <schema>.<table> (and <catalog>.<schema>.<table> if available)
+        in information_schema. Return a best guess path string or None.
+        """
+        q = """
+        SELECT table_catalog, table_schema, table_name
+        FROM information_schema.tables
+        WHERE lower(table_name) = ?
+        ORDER BY table_catalog, table_schema
+        """
+        rows = self.con.execute(q, [table_name.lower()]).fetchall()
+        if not rows:
+            return None
+        # Prefer matches in preferred schema/catalog when possible
+        # 1) exact catalog+schema
+        for cat, sch, t in rows:
+            if (cat or "").lower() == (PREF_CATALOG or "").lower() and sch.lower() == PREF_SCHEMA.lower():
+                candidate = f"{cat}.{sch}.{t}" if cat else f"{sch}.{t}"
+                if self._try_probe(candidate):
+                    return candidate
+        # 2) exact schema (2-part)
+        for cat, sch, t in rows:
+            if sch.lower() == PREF_SCHEMA.lower():
+                candidate = f"{sch}.{t}"
+                if self._try_probe(candidate):
+                    return candidate
+        # 3) first working row (prefer 3-part if catalog present)
+        for cat, sch, t in rows:
+            candidate = f"{cat}.{sch}.{t}" if cat else f"{sch}.{t}"
+            if self._try_probe(candidate):
+                return candidate
+        return None
+    def _resolve_full_table(self, catalog: Optional[str], schema: Optional[str], table: str) -> str:
+        """
+        Return a working fully qualified path for the table by trying:
+          - <catalog>.<schema>.<table> (3-part)
+          - <schema>.<table>        (2-part)
+          - <table>                  (1-part)
+          - information_schema scan (best effort)
+        """
+        candidates: List[str] = []
+        if catalog:
+            candidates.append(f"{catalog}.{schema}.{table}")
+        if schema:
+            candidates.append(f"{schema}.{table}")
+        candidates.append(table)
+        for path in candidates:
+            if self._try_probe(path):
+                print(f"[INFO] Using table path: {path}")
+                return path
+        # Fallback: scan information_schema
+        scanned = self._scan_information_schema(table)
+        if scanned:
+            print(f"[INFO] Using table path (scanned): {scanned}")
+            return scanned
+        # Last resort: keep preferred 3-part (will raise on first query)
+        fallback = f"{catalog}.{schema}.{table}" if catalog else f"{schema}.{table}"
+        print(f"[WARN] Could not resolve table path; falling back to: {fallback}")
+        return fallback
     # ------------------------------------------------------------
     # Run SQL directly
     # ------------------------------------------------------------
+    def run_sql(self, sql: str) -> pd.DataFrame:
         return self.con.execute(sql).df()
     # ------------------------------------------------------------
         sql, why = self._nl_to_sql(message)
         df = self.run_sql(sql)
         return df, sql, why
+    def get_full_table_path(self) -> str:
+        return self.full_table