Spaces:

AshenH
/

ALM_LLM

Sleeping

App Files Files Community

AshenH commited on Oct 14

Commit

f69d955

verified ·

1 Parent(s): 0d9239a

Update tools/sql_tool.py

Browse files

Files changed (1) hide show

tools/sql_tool.py +29 -42

tools/sql_tool.py CHANGED Viewed

@@ -5,46 +5,44 @@ from typing import Optional, Tuple
 import duckdb
-# DuckDB file path (can be overridden in Space settings)
 DUCKDB_PATH = os.getenv("DUCKDB_PATH", "alm.duckdb")
-# Default schema/table -> your path my_db.masterdataset_v
-DEFAULT_SCHEMA = os.getenv("SQL_DEFAULT_SCHEMA", "my_db")
-DEFAULT_TABLE  = os.getenv("SQL_DEFAULT_TABLE",  "masterdataset_v")
-def _full_table(schema: Optional[str] = None, table: Optional[str] = None) -> str:
     schema = schema or DEFAULT_SCHEMA
     table = table or DEFAULT_TABLE
-    return f"{schema}.{table}"
 class SQLTool:
-    """
-    Minimal NL→SQL helper wired to my_db.masterdataset_v with a DuckDB runner.
-    """
     def __init__(self, db_path: Optional[str] = None):
         self.db_path = db_path or DUCKDB_PATH
         self.con = duckdb.connect(self.db_path)
-    # -------------------------
-    # SQL Runner
-    # -------------------------
     def run_sql(self, sql: str):
         return self.con.execute(sql).df()
-    # -------------------------
     # NL → SQL
-    # -------------------------
-    def _nl_to_sql(
-        self, message: str, schema: Optional[str] = None, table: Optional[str] = None
-    ) -> Tuple[str, str]:
-        """
-        Returns (sql, rationale). Small template library covering common queries.
-        Falls back to a filtered SELECT or a sample.
-        """
-        full_table = _full_table(schema, table)
         m = (message or "").strip().lower()
         def has_any(txt, words):
@@ -56,7 +54,7 @@ class SQLTool:
         if m_top:
             limit = int(m_top.group(1))
-        # 1) Top N FDs by Portfolio_value
         if has_any(m, ["fd", "fixed deposit", "deposits"]) and has_any(
             m, ["top", "largest", "biggest"]
         ) and has_any(m, ["portfolio value", "portfolio_value"]):
@@ -71,7 +69,7 @@ class SQLTool:
             why = f"Top {n} fixed deposits by Portfolio_value from {full_table}"
             return sql, why
-        # 2) Top N Assets by Portfolio_value
         if has_any(m, ["asset", "loan", "advances"]) and has_any(
             m, ["top", "largest", "biggest"]
         ) and has_any(m, ["portfolio value", "portfolio_value"]):
@@ -86,7 +84,7 @@ class SQLTool:
             why = f"Top {n} assets by Portfolio_value from {full_table}"
             return sql, why
-        # 3) Aggregate (SUM/AVG) by segment or currency
         if has_any(m, ["sum", "total", "avg", "average"]) and has_any(
             m, ["segment", "currency"]
         ):
@@ -101,7 +99,7 @@ class SQLTool:
             why = f"{agg} Portfolio_value grouped by {dim} from {full_table}"
             return sql, why
-        # 4) Generic filters
         product = None
         if "fd" in m or "deposit" in m:
             product = "fd"
@@ -115,14 +113,12 @@ class SQLTool:
             parts.append(f"AND lower(product) = '{product}'")
             why_parts.append(f"product = {product}")
-        # currency filter like: "in lkr", "currency usd"
         cur_match = re.search(r"\b(currency|in)\s+([a-z]{3})\b", m)
         if cur_match:
             cur = cur_match.group(2).upper()
             parts.append(f"AND upper(currency) = '{cur}'")
             why_parts.append(f"currency = {cur}")
-        # segment filter like: "segment retail" or "for corporate"
         seg_match = re.search(r"(segment|for)\s+([a-z0-9_\- ]+)", m)
         if seg_match:
             seg = seg_match.group(2).strip()
@@ -137,19 +133,10 @@ class SQLTool:
         fallback_why = "; ".join(why_parts)
         return fallback_sql, fallback_why
-    # Public helpers
     def query_from_nl(self, message: str):
         sql, why = self._nl_to_sql(message)
         df = self.run_sql(sql)
         return df, sql, why
-    def table_exists(self, schema: Optional[str] = None, table: Optional[str] = None) -> bool:
-        schema = schema or DEFAULT_SCHEMA
-        table = table or DEFAULT_TABLE
-        q = f"""
-        SELECT COUNT(*) AS n
-        FROM information_schema.tables
-        WHERE table_schema = '{schema}' AND table_name = '{table}';
-        """
-        n = self.con.execute(q).fetchone()[0]
-        return n > 0

 import duckdb
+# DuckDB connection file
 DUCKDB_PATH = os.getenv("DUCKDB_PATH", "alm.duckdb")
+# Fully qualified schema path confirmed from your server
+# my_db.main.masterdataset_v
+DEFAULT_DB      = os.getenv("SQL_DEFAULT_DB", "my_db")
+DEFAULT_SCHEMA  = os.getenv("SQL_DEFAULT_SCHEMA", "main")
+DEFAULT_TABLE   = os.getenv("SQL_DEFAULT_TABLE", "masterdataset_v")
+def _full_table(db: Optional[str] = None,
+                schema: Optional[str] = None,
+                table: Optional[str] = None) -> str:
+    """Return fully qualified <db>.<schema>.<table>"""
+    db = db or DEFAULT_DB
     schema = schema or DEFAULT_SCHEMA
     table = table or DEFAULT_TABLE
+    return f"{db}.{schema}.{table}"
 class SQLTool:
+    """Natural-language → SQL helper for DuckDB"""
     def __init__(self, db_path: Optional[str] = None):
         self.db_path = db_path or DUCKDB_PATH
         self.con = duckdb.connect(self.db_path)
+        self.full_table = _full_table()
+    # ------------------------------------------------------------
+    # Run SQL directly
+    # ------------------------------------------------------------
     def run_sql(self, sql: str):
         return self.con.execute(sql).df()
+    # ------------------------------------------------------------
     # NL → SQL
+    # ------------------------------------------------------------
+    def _nl_to_sql(self, message: str) -> Tuple[str, str]:
+        full_table = self.full_table
         m = (message or "").strip().lower()
         def has_any(txt, words):
         if m_top:
             limit = int(m_top.group(1))
+        # 1. Top N FDs
         if has_any(m, ["fd", "fixed deposit", "deposits"]) and has_any(
             m, ["top", "largest", "biggest"]
         ) and has_any(m, ["portfolio value", "portfolio_value"]):
             why = f"Top {n} fixed deposits by Portfolio_value from {full_table}"
             return sql, why
+        # 2. Top N Assets
         if has_any(m, ["asset", "loan", "advances"]) and has_any(
             m, ["top", "largest", "biggest"]
         ) and has_any(m, ["portfolio value", "portfolio_value"]):
             why = f"Top {n} assets by Portfolio_value from {full_table}"
             return sql, why
+        # 3. Aggregate by segment/currency
         if has_any(m, ["sum", "total", "avg", "average"]) and has_any(
             m, ["segment", "currency"]
         ):
             why = f"{agg} Portfolio_value grouped by {dim} from {full_table}"
             return sql, why
+        # 4. Generic filters
         product = None
         if "fd" in m or "deposit" in m:
             product = "fd"
             parts.append(f"AND lower(product) = '{product}'")
             why_parts.append(f"product = {product}")
         cur_match = re.search(r"\b(currency|in)\s+([a-z]{3})\b", m)
         if cur_match:
             cur = cur_match.group(2).upper()
             parts.append(f"AND upper(currency) = '{cur}'")
             why_parts.append(f"currency = {cur}")
         seg_match = re.search(r"(segment|for)\s+([a-z0-9_\- ]+)", m)
         if seg_match:
             seg = seg_match.group(2).strip()
         fallback_why = "; ".join(why_parts)
         return fallback_sql, fallback_why
+    # ------------------------------------------------------------
+    # Public wrappers
+    # ------------------------------------------------------------
     def query_from_nl(self, message: str):
         sql, why = self._nl_to_sql(message)
         df = self.run_sql(sql)
         return df, sql, why