Spaces:

AshenH
/

ALM_LLM

Sleeping

App Files Files Community

AshenH commited on Oct 14

Commit

6e66f3a

verified ·

1 Parent(s): 0ffc27e

Update tools/sql_tool.py

Browse files

Files changed (1) hide show

tools/sql_tool.py +112 -138

tools/sql_tool.py CHANGED Viewed

@@ -1,143 +1,117 @@
-# tools/sql_tool.py
 import os
-import re
-import duckdb
-from typing import Optional, Tuple
-DUCKDB_PATH = os.getenv("DUCKDB_PATH", "alm.duckdb")
-# Defaults point to your real table; can be overridden via Space secrets
-DEFAULT_SCHEMA = os.getenv("SQL_DEFAULT_SCHEMA", "main")
 DEFAULT_TABLE  = os.getenv("SQL_DEFAULT_TABLE",  "masterdataset_v")
-def _full_table(schema: Optional[str] = None, table: Optional[str] = None) -> str:
-    schema = schema or DEFAULT_SCHEMA
-    table  = table  or DEFAULT_TABLE
-    return f"{schema}.{table}"
-class SQLTool:
-    """
-    Minimal NL→SQL helper wired to main.masterdataset_v with a DuckDB runner.
-    """
-    def __init__(self, db_path: Optional[str] = None):
-        self.db_path = db_path or DUCKDB_PATH
-        self.con = duckdb.connect(self.db_path)
-    def run_sql(self, sql: str):
-        return self.con.execute(sql).df()
-    # -------------------------
-    # NL → SQL
-    # -------------------------
-    def _nl_to_sql(self, message: str, schema: Optional[str] = None, table: Optional[str] = None) -> Tuple[str, str]:
-        """
-        Returns (sql, rationale). Very small template library covering your common queries.
-        Falls back to SHOW TABLES if no match.
-        """
-        full_table = _full_table(schema, table)
-        m = message.strip().lower()
-        # Common synonyms
-        def has_any(txt, words):
-            return any(w in txt for w in words)
-        # Extract a "top N"
-        limit = None
-        m_top = re.search(r"\btop\s+(\d+)", m)
-        if m_top:
-            limit = int(m_top.group(1))
-        # 1) Top N FDs by Portfolio_value
-        if has_any(m, ["fd", "fixed deposit", "deposits"]) and has_any(m, ["top", "largest", "biggest"]) and has_any(m, ["portfolio value", "portfolio_value"]):
-            n = limit or 10
-            sql = f"""
-            SELECT contract_number, Portfolio_value, Interest_rate, currency, segments
-            FROM {full_table}
-            WHERE lower(product) = 'fd'
-            ORDER BY Portfolio_value DESC
-            LIMIT {n};
-            """
-            why = f"Top {n} fixed deposits by Portfolio_value from {full_table}"
-            return sql, why
-        # 2) Top N Assets by Portfolio_value
-        if has_any(m, ["asset", "loan", "advances"]) and has_any(m, ["top", "largest", "biggest"]) and has_any(m, ["portfolio value", "portfolio_value"]):
-            n = limit or 10
-            sql = f"""
-            SELECT contract_number, Portfolio_value, Interest_rate, currency, segments
-            FROM {full_table}
-            WHERE lower(product) = 'assets'
-            ORDER BY Portfolio_value DESC
-            LIMIT {n};
-            """
-            why = f"Top {n} assets by Portfolio_value from {full_table}"
-            return sql, why
-        # 3) Aggregate (SUM/AVG) by segment or currency
-        if has_any(m, ["sum", "total", "avg", "average"]) and has_any(m, ["segment", "currency"]):
-            agg = "SUM" if has_any(m, ["sum", "total"]) else "AVG"
-            dim = "segments" if "segment" in m else "currency"
-            sql = f"""
-            SELECT {dim}, {agg}(Portfolio_value) AS {agg.lower()}_Portfolio_value
-            FROM {full_table}
-            GROUP BY 1
-            ORDER BY 2 DESC;
-            """
-            why = f"{agg} Portfolio_value grouped by {dim} from {full_table}"
-            return sql, why
-        # 4) Filter by product, currency, or segment
-        product = None
-        if "fd" in m or "deposit" in m:
-            product = "fd"
-        elif "asset" in m or "loan" in m or "advance" in m:
-            product = "assets"
-        parts = [f"SELECT * FROM {full_table} WHERE 1=1"]
-        why_parts = [f"Filtered rows from {full_table}"]
-        if product:
-            parts.append(f"AND lower(product) = '{product}'")
-            why_parts.append(f"product = {product}")
-        # currency filter like: "in lkr", "currency usd"
-        cur = None
-        cur_match = re.search(r"\b(currency|in)\s+([a-z]{3})\b", m)
-        if cur_match:
-            cur = cur_match.group(2).upper()
-        if cur:
-            parts.append(f"AND upper(currency) = '{cur}'")
-            why_parts.append(f"currency = {cur}")
-        # segment filter like: "segment retail" or "for corporate"
-        seg_match = re.search(r"(segment|for)\s+([a-z0-9_\- ]+)", m)
-        if seg_match:
-            seg = seg_match.group(2).strip()
-            if seg:
-                parts.append(f"AND lower(segments) LIKE '%{seg.lower()}%'")
-                why_parts.append(f"segments like '{seg}'")
-        # maybe a limit
-        if limit:
-            parts.append(f"LIMIT {limit}")
-        fallback_sql = " ".join(parts) + ";"
-        fallback_why = "; ".join(why_parts)
-        if fallback_sql:
-            return fallback_sql, fallback_why
-        # 5) Super fallback: show sample rows
-        return f"SELECT * FROM {full_table} LIMIT 20;", f"Default sample from {full_table}"
-    # Public helpers
-    def query_from_nl(self, message: str):
-        sql, why = self._nl_to_sql(message)
-        df = self.run_sql(sql)
-        return df, sql, why
-    def table_exists(self, schema: Optional[str] = None, table: Optional[str] = None) -> bool:
-        schema = schema or DEFAULT_SCHEMA
-        table  = table or DEFAULT_TABLE
-        q = f"SELECT COUNT(*) AS n FROM information_schema.tables WHERE table_schema = '{schema}' AND table_name = '{table}';"
-        n = self.con.execute(q).fetchone()[0]
-        return n > 0

+# app.py
 import os
+import pandas as pd
+import gradio as gr
+from tools.sql_tool import SQLTool
+from tools.ts_preprocess import build_timeseries
+# ==========================================================
+#   CONFIG
+# ==========================================================
+DUCKDB_PATH = os.getenv("DUCKDB_PATH", "alm.duckdb")
+DEFAULT_SCHEMA = os.getenv("SQL_DEFAULT_SCHEMA", "my_db")
 DEFAULT_TABLE  = os.getenv("SQL_DEFAULT_TABLE",  "masterdataset_v")
+sql_tool = SQLTool(DUCKDB_PATH)
+INTRO = f"""
+### ALM LLM — Demo
+Connected to **DuckDB** at `{DUCKDB_PATH}` using table **{DEFAULT_SCHEMA}.{DEFAULT_TABLE}**.
+**Try:**
+- *"show me the top 10 fds by portfolio value"*
+- *"top 10 assets by portfolio value"*
+- *"sum portfolio value by currency"*
+"""
+# ==========================================================
+#   BACKEND HANDLERS
+# ==========================================================
+def run_nl(nl_query: str):
+    """Handle natural-language queries."""
+    if not nl_query or not nl_query.strip():
+        return pd.DataFrame(), "", "Please enter a query.", pd.DataFrame(), pd.DataFrame()
+    try:
+        df, sql, why = sql_tool.query_from_nl(nl_query)
+    except Exception as e:
+        return pd.DataFrame(), "", f"Error: {e}", pd.DataFrame(), pd.DataFrame()
+    try:
+        cf, gap = build_timeseries(df)
+    except Exception:
+        cf, gap = pd.DataFrame(), pd.DataFrame()
+    return df, sql.strip(), why, cf, gap
+def run_sql(sql_text: str):
+    """Handle raw SQL execution."""
+    if not sql_text or not sql_text.strip():
+        return pd.DataFrame(), "Please paste a SQL statement.", pd.DataFrame(), pd.DataFrame()
+    try:
+        df = sql_tool.run_sql(sql_text)
+    except Exception as e:
+        return pd.DataFrame(), f"Error: {e}", pd.DataFrame(), pd.DataFrame()
+    try:
+        cf, gap = build_timeseries(df)
+    except Exception:
+        cf, gap = pd.DataFrame(), pd.DataFrame()
+    return df, "OK", cf, gap
+# ==========================================================
+#   GRADIO UI
+# ==========================================================
+with gr.Blocks(title="ALM LLM") as demo:
+    gr.Markdown(INTRO)
+    # ---- Tab 1: Natural language ----
+    with gr.Tab("Ask in Natural Language"):
+        nl = gr.Textbox(
+            label="Ask a question",
+            placeholder="e.g., show me the top 10 fds by portfolio value",
+            lines=2,
+        )
+        btn = gr.Button("Run")
+        sql_out = gr.Textbox(label="Generated SQL", interactive=False)
+        why_out = gr.Textbox(label="Reasoning", interactive=False)
+        df_out  = gr.Dataframe(label="Query Result", interactive=True)
+        cf_out  = gr.Dataframe(label="Projected Cash-Flows (if applicable)", interactive=True)
+        gap_out = gr.Dataframe(label="Liquidity Gap (monthly)", interactive=True)
+        btn.click(
+            fn=run_nl,
+            inputs=[nl],
+            outputs=[df_out, sql_out, why_out, cf_out, gap_out],
+        )
+    # ---- Tab 2: Raw SQL ----
+    with gr.Tab("Run Raw SQL"):
+        sql_in = gr.Code(
+            label="SQL",
+            language="sql",
+            value=f"SELECT * FROM {DEFAULT_SCHEMA}.{DEFAULT_TABLE} LIMIT 20;",
+        )
+        btn2 = gr.Button("Execute")
+        df2   = gr.Dataframe(label="Result", interactive=True)
+        status = gr.Textbox(label="Status", interactive=False)
+        cf2    = gr.Dataframe(label="Projected Cash-Flows (if applicable)", interactive=True)
+        gap2   = gr.Dataframe(label="Liquidity Gap (monthly)", interactive=True)
+        btn2.click(
+            fn=run_sql,
+            inputs=[sql_in],
+            outputs=[df2, status, cf2, gap2],
+        )
+# ==========================================================
+#   LAUNCH
+# ==========================================================
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))