Spaces:

LalitChaudhari3
/

PlainSQL-Agent

Sleeping

App Files Files Community

LalitChaudhari3 commited on 27 days ago

Commit

8fbaa76

verified ·

1 Parent(s): 91ed273

Update src/sql_generator.py

Browse files

Files changed (1) hide show

src/sql_generator.py +13 -17

src/sql_generator.py CHANGED Viewed

@@ -8,31 +8,31 @@ class SQLGenerator:
     def __init__(self):
         load_dotenv()
-        # 1. Robustly fetch the API Key
-        # Try HF_API_KEY first (our standard), then fall back to others if user renamed it
-        self.api_key = os.getenv("HF_API_KEY") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
-        # 2. Use the powerful Qwen 2.5 Coder model
         self.repo_id = "Qwen/Qwen2.5-Coder-32B-Instruct"
         self.api_url = f"https://api-inference.huggingface.co/models/{self.repo_id}"
     def generate_followup_questions(self, question, sql_query):
-        # Basic heuristics to suggest next steps
         return ["Visualize this result", "Export as CSV", "Compare with last year"]
     def generate_sql(self, question, context, history=None):
         if history is None: history = []
-        # 🚨 ERROR CHECK: Stop early if key is missing
         if not self.api_key:
             return "SELECT 'Error: HF_API_KEY Missing' as status", "Configuration Error", "Please add HF_API_KEY to your Space Secrets."
-        # 🛡️ Safety Layer: Block dangerous keywords
         forbidden = ["DROP", "DELETE", "UPDATE", "INSERT", "ALTER", "TRUNCATE", "GRANT"]
         if any(word in question.upper() for word in forbidden):
              return "SELECT 'Error: Blocked by Safety Layer' as status", "Safety Alert", "I cannot execute commands that modify data."
-        # Format History for the AI
         history_text = ""
         if history:
             history_text = "PREVIOUS CONVERSATION:\n" + "\n".join([f"User: {h.get('user')}\nSQL: {h.get('sql')}" for h in history[-2:]])
@@ -50,12 +50,12 @@ class SQLGenerator:
         3. Do not include markdown formatting like ```json.
         """
-        # Prepare the payload (Direct HTTP Request)
         payload = {
             "inputs": f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant\n",
             "parameters": {
                 "max_new_tokens": 1024,
-                "temperature": 0.1,  # Low temp = precise SQL
                 "return_full_text": False
             }
         }
@@ -68,7 +68,7 @@ class SQLGenerator:
         try:
             print(f"   ⚡ Generating SQL via Direct API...")
-            # 🚀 DIRECT REQUEST (Bypasses library auth issues)
             response = requests.post(self.api_url, headers=headers, json=payload, timeout=25)
             if response.status_code != 200:
@@ -78,7 +78,6 @@ class SQLGenerator:
             # Parse Response
             result = response.json()
-            # Handle different response formats (sometimes list, sometimes dict)
             if isinstance(result, list) and len(result) > 0:
                 raw_text = result[0].get('generated_text', '')
             elif isinstance(result, dict):
@@ -86,28 +85,25 @@ class SQLGenerator:
             else:
                 raw_text = str(result)
-            # JSON Parsing & Cleanup
             sql_query = ""
             message = "Here is the data."
             explanation = "Query generated successfully."
             try:
-                # Remove markdown code blocks if AI added them
                 clean_json = re.sub(r"```json|```", "", raw_text).strip()
                 data = json.loads(clean_json)
                 sql_query = data.get("sql", "")
                 message = data.get("message", message)
                 explanation = data.get("explanation", explanation)
             except:
-                # Fallback: Regex to find SQL if JSON parsing fails
                 match = re.search(r"(SELECT[\s\S]+?;)", raw_text, re.IGNORECASE)
                 if match: sql_query = match.group(1)
-            # Final SQL Cleanup
             sql_query = sql_query.strip().replace("\n", " ")
             if sql_query and not sql_query.endswith(";"): sql_query += ";"
-            # 🛡️ Final Validation (Allow SELECT or WITH)
             clean_check = re.sub(r"/\*.*?\*/|--.*?\n", "", sql_query, flags=re.DOTALL).strip().upper()
             if not clean_check.startswith("SELECT") and not clean_check.startswith("WITH"):
                 print(f"   ⚠️ Invalid SQL Blocked: {sql_query}")

     def __init__(self):
         load_dotenv()
+        # 1. ROBUSTLY FETCH & CLEAN THE KEY
+        # We use .strip() to remove the accidental '\n' (newline) causing your error
+        raw_key = os.getenv("HF_API_KEY") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
+        self.api_key = raw_key.strip() if raw_key else None
+        # 2. Use Qwen 2.5 Coder
         self.repo_id = "Qwen/Qwen2.5-Coder-32B-Instruct"
         self.api_url = f"https://api-inference.huggingface.co/models/{self.repo_id}"
     def generate_followup_questions(self, question, sql_query):
         return ["Visualize this result", "Export as CSV", "Compare with last year"]
     def generate_sql(self, question, context, history=None):
         if history is None: history = []
+        # 🚨 ERROR CHECK
         if not self.api_key:
             return "SELECT 'Error: HF_API_KEY Missing' as status", "Configuration Error", "Please add HF_API_KEY to your Space Secrets."
+        # 🛡️ Safety Layer
         forbidden = ["DROP", "DELETE", "UPDATE", "INSERT", "ALTER", "TRUNCATE", "GRANT"]
         if any(word in question.upper() for word in forbidden):
              return "SELECT 'Error: Blocked by Safety Layer' as status", "Safety Alert", "I cannot execute commands that modify data."
+        # Format History
         history_text = ""
         if history:
             history_text = "PREVIOUS CONVERSATION:\n" + "\n".join([f"User: {h.get('user')}\nSQL: {h.get('sql')}" for h in history[-2:]])
         3. Do not include markdown formatting like ```json.
         """
+        # Payload
         payload = {
             "inputs": f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant\n",
             "parameters": {
                 "max_new_tokens": 1024,
+                "temperature": 0.1,
                 "return_full_text": False
             }
         }
         try:
             print(f"   ⚡ Generating SQL via Direct API...")
+            # 🚀 DIRECT REQUEST
             response = requests.post(self.api_url, headers=headers, json=payload, timeout=25)
             if response.status_code != 200:
             # Parse Response
             result = response.json()
             if isinstance(result, list) and len(result) > 0:
                 raw_text = result[0].get('generated_text', '')
             elif isinstance(result, dict):
             else:
                 raw_text = str(result)
+            # Clean JSON
             sql_query = ""
             message = "Here is the data."
             explanation = "Query generated successfully."
             try:
                 clean_json = re.sub(r"```json|```", "", raw_text).strip()
                 data = json.loads(clean_json)
                 sql_query = data.get("sql", "")
                 message = data.get("message", message)
                 explanation = data.get("explanation", explanation)
             except:
                 match = re.search(r"(SELECT[\s\S]+?;)", raw_text, re.IGNORECASE)
                 if match: sql_query = match.group(1)
+            # Final Cleanup
             sql_query = sql_query.strip().replace("\n", " ")
             if sql_query and not sql_query.endswith(";"): sql_query += ";"
             clean_check = re.sub(r"/\*.*?\*/|--.*?\n", "", sql_query, flags=re.DOTALL).strip().upper()
             if not clean_check.startswith("SELECT") and not clean_check.startswith("WITH"):
                 print(f"   ⚠️ Invalid SQL Blocked: {sql_query}")