Spaces:

AshenH
/

ALM_LLM

Sleeping

App Files Files Community

AshenH commited on Oct 13, 2025

Commit

d861fbf

verified ·

1 Parent(s): 081c73d

Update tools/sql_tool.py

Browse files

Files changed (1) hide show

tools/sql_tool.py +63 -10

tools/sql_tool.py CHANGED Viewed

@@ -247,22 +247,64 @@ class SQLTool:
     def _nl_to_sql(self, message: str) -> str:
         """
         Convert natural language to SQL query.
-        This is a simple heuristic - replace with proper NL2SQL model for production.
         """
         m = message.lower()
         # If it's already SQL, return as-is (after validation)
         if re.match(r'^\s*select\s', m, re.IGNORECASE):
             return message.strip()
         # Template-based generation (customize for your schema)
         if "avg" in m or "average" in m:
             if "by month" in m or "monthly" in m:
-                return """
 SELECT
     DATE_TRUNC('month', date_col) AS month,
     AVG(metric_col) AS avg_metric
-FROM analytics.fact_table
 GROUP BY 1
 ORDER BY 1 DESC
 LIMIT 100;
@@ -274,27 +316,38 @@ LIMIT 100;
             limit = match.group(1) if match else "10"
             return f"""
 SELECT *
-FROM analytics.fact_table
 ORDER BY metric_col DESC
 LIMIT {limit};
 """
         if "count" in m:
-            return """
 SELECT
     category_col,
     COUNT(*) AS count
-FROM analytics.fact_table
 GROUP BY 1
 ORDER BY 2 DESC
 LIMIT 100;
 """
-        # Default fallback
         return """
-SELECT *
-FROM analytics.fact_table
-LIMIT 100;
 """
     def run(self, message: str) -> pd.DataFrame:

     def _nl_to_sql(self, message: str) -> str:
         """
         Convert natural language to SQL query.
+        IMPORTANT: This is a simple heuristic template system.
+        For production, either:
+        1. Replace table/column names with your actual schema, OR
+        2. Integrate a proper NL2SQL model (e.g., T5, CodeGen, GPT), OR
+        3. Have users write SQL directly
+        To customize: Set these environment variables or edit the code:
+        - SQL_DEFAULT_SCHEMA (default: "analytics")
+        - SQL_DEFAULT_TABLE (default: "fact_table")
         """
         m = message.lower()
+        # Get configurable defaults
+        default_schema = os.getenv("SQL_DEFAULT_SCHEMA", "analytics")
+        default_table = os.getenv("SQL_DEFAULT_TABLE", "fact_table")
+        full_table = f"{default_schema}.{default_table}"
         # If it's already SQL, return as-is (after validation)
         if re.match(r'^\s*select\s', m, re.IGNORECASE):
             return message.strip()
+        # Special keyword: show tables/schemas
+        if any(keyword in m for keyword in ["show tables", "list tables", "available tables", "what tables"]):
+            return """
+SELECT table_schema, table_name, table_type
+FROM information_schema.tables
+WHERE table_schema NOT IN ('information_schema', 'pg_catalog')
+ORDER BY table_schema, table_name
+LIMIT 100;
+"""
+        if any(keyword in m for keyword in ["show schemas", "list schemas", "available schemas"]):
+            return """
+SELECT DISTINCT table_schema
+FROM information_schema.tables
+WHERE table_schema NOT IN ('information_schema', 'pg_catalog')
+ORDER BY table_schema;
+"""
+        if "show columns" in m or "describe table" in m or "table structure" in m:
+            # Try to extract table name from message
+            return f"""
+SELECT column_name, data_type, is_nullable
+FROM information_schema.columns
+WHERE table_schema = '{default_schema}'
+ORDER BY ordinal_position
+LIMIT 100;
+"""
         # Template-based generation (customize for your schema)
         if "avg" in m or "average" in m:
             if "by month" in m or "monthly" in m:
+                return f"""
 SELECT
     DATE_TRUNC('month', date_col) AS month,
     AVG(metric_col) AS avg_metric
+FROM {full_table}
 GROUP BY 1
 ORDER BY 1 DESC
 LIMIT 100;
             limit = match.group(1) if match else "10"
             return f"""
 SELECT *
+FROM {full_table}
 ORDER BY metric_col DESC
 LIMIT {limit};
 """
         if "count" in m:
+            return f"""
 SELECT
     category_col,
     COUNT(*) AS count
+FROM {full_table}
 GROUP BY 1
 ORDER BY 2 DESC
 LIMIT 100;
 """
+        # Default fallback - show available tables instead of failing
+        logger.warning(
+            f"Could not generate specific SQL for query: '{message}'. "
+            f"Returning list of available tables. "
+            f"Configure SQL_DEFAULT_SCHEMA and SQL_DEFAULT_TABLE or write SQL directly."
+        )
         return """
+SELECT
+    table_schema,
+    table_name,
+    table_type,
+    'Run: SELECT * FROM ' || table_schema || '.' || table_name || ' LIMIT 5' as example_query
+FROM information_schema.tables
+WHERE table_schema NOT IN ('information_schema', 'pg_catalog')
+ORDER BY table_schema, table_name
+LIMIT 50;
 """
     def run(self, message: str) -> pd.DataFrame: