Spaces:

Paul720810
/

Softline-SQL-Assistant

Sleeping

App Files Files Community

Paul720810 commited on Sep 5, 2025

Commit

f080e2a

verified ·

1 Parent(s): 99cea8f

Update app.py

Browse files

Files changed (1) hide show

app.py +152 -120

app.py CHANGED Viewed

@@ -227,35 +227,31 @@ class TextToSQLSystem:
             return self._generate_fallback_sql(prompt)
         try:
-            # GGUF 模型呼叫
             output = self.llm(
                 prompt,
-                max_tokens=150,       # 給予足夠的生成長度
                 temperature=0.1,
                 top_p=0.9,
                 echo=False,
-                # 暫時移除 stop 參數，觀察最原始的輸出
-                # stop=["```", ";", "\n\n", "</s>"],
             )
-            # --- 關鍵除錯步驟 ---
-            # 印出 llama-cpp-python 返回的完整、原始的 dictionary
             self._log(f"🧠 模型原始輸出 (Raw Output): {output}", "DEBUG")
             if output and "choices" in output and len(output["choices"]) > 0:
-                # 從原始輸出中提取文本
                 generated_text = output["choices"][0]["text"]
                 self._log(f"📝 提取出的生成文本: {generated_text.strip()}", "DEBUG")
                 return generated_text.strip()
             else:
                 self._log("❌ 模型的原始輸出格式不正確或為空。", "ERROR")
-                return "" # 返回空字串，讓後續流程處理
         except Exception as e:
             self._log(f"❌ 模型生成過程中發生嚴重錯誤: {e}", "CRITICAL")
             import traceback
-            self._log(traceback.format_exc(), "DEBUG") # 印出詳細的錯誤堆疊
-            return "" # 返回空字串
     def _load_gguf_model_fallback(self, model_path):
         """備用載入方式"""
@@ -418,129 +414,165 @@ class TextToSQLSystem:
         return relevant_tables[:3]  # 最多返回3個相關表格
     def _validate_and_fix_sql(self, sql: str, question: str) -> str:
-        """
-        根據 Schema 和常見錯誤，全面驗證並動態修正 SQL。
-        這個函數會依序執行以下步驟：
-        1. 語法正規化：清理多餘的空格和分號。
-        2. SQL 方言修正：將非 SQLite 的語法 (如 YEAR()) 轉換為 SQLite 語法。
-        3. Schema 修正：將模型幻覺出的表名和欄位名，映射回真實的 Schema 名稱。
-        4. 邏輯意圖修正：根據用戶問題的關鍵詞，檢查並補全缺失的 WHERE 條件或修正錯誤的聚合函數。
-        """
         if not sql or not self.schema:
-            self._log("SQL 修正被跳過，因為輸入為空或 schema 未載入。", "WARNING")
             return sql
         original_sql = sql
-        # 前後加空格方便正則匹配，並移除前後多餘的空白
         fixed_sql = " " + sql.strip() + " "
         fixes_applied = []
         q_lower = question.lower()
-        # ==================== 步驟 1: SQL 方言修正 (Dialect Correction) ====================
-        # (包含您在 column_corrections 中定義的 YEAR() 規則)
-        dialect_corrections = {
-            # 模式 (Pattern) -> 替換 (Replacement)
-            r'YEAR\s*\(([^)]+)\)': r"strftime('%Y', \1)",
-            r"(strftime\('%Y',\s*[^)]+\))\s*=\s*(\d{4})": r"\1 = '\2'"
-        }
-        for pattern, replacement in dialect_corrections.items():
-            if re.search(pattern, fixed_sql, re.IGNORECASE):
-                fixed_sql = re.sub(pattern, replacement, fixed_sql, flags=re.IGNORECASE)
-                fixes_applied.append(f"修正 SQL 方言: {pattern}")
-        # ==================== 步驟 2: Schema 名稱修正 (Schema Correction) ====================
-        # (合併了您所有的 table_corrections 和 column_corrections)
-        schema_corrections = {
-            # === 表格映射 (來自您的 table_corrections) ===
-            'Customer': 'TSR53SampleDescription',
-            'InvoiceNote': 'TSR53Invoice',
-            'InvoiceNoteInvoiceNo': 'TSR53Invoice',
-            'JobNoLog': 'JobTimeline',
-            'SampleDescription': 'TSR53SampleDescription',
-            'Invoice': 'TSR53Invoice',
-            'Job': 'JobTimeline',
-            'Events': 'JobEventsLog',
-            'Progress': 'JobsInProgress',
-            'Items': 'JobItemsInProgress',
-            'Calendar': 'calendar_days',
-            'job_timeline': 'JobTimeline',
-            'sample_description': 'TSR53SampleDescription',
-            'invoice': 'TSR53Invoice',
-            'events_log': 'JobEventsLog',
-            'calendar_days': 'calendar_days',
-            # === 欄位映射 (來自您的 column_corrections) ===
-            # 客戶相關
-            'CustomerName': 'InvoiceToName',
-            'CustomerNo': 'InvoiceToID',
-            '客戶': 'InvoiceToName',
-            '買家': 'BuyerName',
-            '申請方': 'ApplicantName',
-            # 工作單相關
-            'JobNumber': 'JobNo',
-            'JobId': 'JobNo',
-            '工作單': 'JobNo',
-            # 時間相關
-            'LTRNo': 'JobCreation',
-            'CreationDate': 'JobCreation',
-            'IssueDate': 'JobIssuedDate',
-            'EventTime': 'EventTimestamp',
-            'CompletedDate': 'ReportAuthorization', # 完成日期應為報告授權
-            # 發票相關
-            'InvoiceNoteNo': 'InvoiceCreditNoteNo',
-            'InvoiceNo': 'InvoiceCreditNoteNo',
-            'InvoiceDate': 'InvoiceCreditNoteDate',
-        }
-        for wrong, correct in schema_corrections.items():
-            pattern = r'\b' + re.escape(wrong) + r'\b' # \b 確保是完整單詞匹配
-            if re.search(pattern, fixed_sql, re.IGNORECASE):
-                fixed_sql = re.sub(pattern, correct, fixed_sql, flags=re.IGNORECASE)
-                fixes_applied.append(f"映射 Schema: '{wrong}' -> '{correct}'")
-        # ==================== 步驟 3: 邏輯意圖修正 (Logical Intent Correction) ====================
-        # (包含您在 column_corrections 中定義的邏輯規則)
-        logical_corrections = {
-            r'\bMaxJobNo\b': 'COUNT(*)',
-            r'MAX\s*\(([^)]*JobNo[^)]*)\)': r'COUNT(\1)', # 處理 MAX(j.JobNo) 或 MAX(JobNo)
-        }
-        # 只有在問題意圖是 "數量" 時才觸發
-        if any(keyword in q_lower for keyword in ['how many', 'count', '數量', '多少']):
-            for pattern, replacement in logical_corrections.items():
                 if re.search(pattern, fixed_sql, re.IGNORECASE):
                     fixed_sql = re.sub(pattern, replacement, fixed_sql, flags=re.IGNORECASE)
-                    fixes_applied.append(f"修正邏輯意圖: {pattern}")
-        # 檢查並補全 WHERE 條件
-        intent_conditions = {
-            'fail': "OverallRating = 'Fail'",
-            'pass': "OverallRating = 'Pass'",
-        }
-        for keyword, condition in intent_conditions.items():
-            if keyword in q_lower and condition.lower() not in fixed_sql.lower():
-                fixes_applied.append(f"補全 WHERE 條件: {condition}")
-                if ' where ' in fixed_sql.lower():
-                    parts = re.split(r'\b(GROUP BY|ORDER BY|LIMIT)\b', fixed_sql, maxsplit=1, flags=re.IGNORECASE)
-                    main_query = parts[0]
-                    main_query = re.sub(r'( where\s+)', f' WHERE {condition} AND ', main_query, count=1, flags=re.IGNORECASE)
-                    fixed_sql = main_query + ''.join(parts[1:])
-                else:
-                    match = re.search(r'\s(GROUP BY|ORDER BY|LIMIT)\s', fixed_sql, re.IGNORECASE)
-                    if match:
-                        insert_point = match.start()
-                        fixed_sql = fixed_sql[:insert_point] + f" WHERE {condition} " + fixed_sql[insert_point:]
-                    else:
-                        fixed_sql = fixed_sql.strip().rstrip(';') + f" WHERE {condition};"
-        # ==================== 步驟 4: 清理與完成 ====================
         fixed_sql = fixed_sql.strip()
         if not fixed_sql.endswith(';'):
             fixed_sql += ';'
         fixed_sql = re.sub(r'\s+', ' ', fixed_sql).strip()
         if fixes_applied:
@@ -550,7 +582,7 @@ class TextToSQLSystem:
                 self._log(f"  - 應用規則: {fix}", "DEBUG")
             self._log(f"  - 修正後 SQL: {fixed_sql}", "INFO")
         else:
-            self._log("✅ SQL 驗證通過，無需修正。", "INFO")
         return fixed_sql

             return self._generate_fallback_sql(prompt)
         try:
             output = self.llm(
                 prompt,
+                max_tokens=150,
                 temperature=0.1,
                 top_p=0.9,
                 echo=False,
+                # --- 將 stop 參數加回來 ---
+                stop=["```", ";", "\n\n", "</s>"],
             )
             self._log(f"🧠 模型原始輸出 (Raw Output): {output}", "DEBUG")
             if output and "choices" in output and len(output["choices"]) > 0:
                 generated_text = output["choices"][0]["text"]
                 self._log(f"📝 提取出的生成文本: {generated_text.strip()}", "DEBUG")
                 return generated_text.strip()
             else:
                 self._log("❌ 模型的原始輸出格式不正確或為空。", "ERROR")
+                return ""
         except Exception as e:
             self._log(f"❌ 模型生成過程中發生嚴重錯誤: {e}", "CRITICAL")
             import traceback
+            self._log(traceback.format_exc(), "DEBUG")
+            return ""
     def _load_gguf_model_fallback(self, model_path):
         """備用載入方式"""
         return relevant_tables[:3]  # 最多返回3個相關表格
+    # in class TextToSQLSystem:
     def _validate_and_fix_sql(self, sql: str, question: str) -> str:
         if not sql or not self.schema:
+            self._log("SQL 修正被跳過，因輸入為空或 schema 未載入。", "WARNING")
             return sql
         original_sql = sql
         fixed_sql = " " + sql.strip() + " "
         fixes_applied = []
         q_lower = question.lower()
+        # ==============================================================================
+        #  第一層：高價值意圖識別與模板覆寫 (Intent Recognition & Templating)
+        # ==============================================================================
+        # --- 意圖 1: 查詢 Top N 實體的營收貢獻 ---
+        # 匹配 "top 5 買家 營收", "貢獻最高的10個客戶", "業績最好的申請方" 等
+        top_n_pattern = r"(?:top|前|最高|最大|最好)\s*(\d+)?\s*(?:個|名)?\s*([^ ]+?)\s*(?:的)?(?:營收|業績|貢獻|金額|sales|revenue)"
+        top_n_match = re.search(top_n_pattern, question, re.IGNORECASE)
+        if top_n_match:
+            limit = top_n_match.group(1) or '10' # 如果沒寫 N，預設為 10
+            entity_keyword = top_n_match.group(2).lower()
+            # 建立實體關鍵詞到欄位的映射
+            ENTITY_MAP = {
+                '買家': 'T1.BuyerName', 'buyer': 'T1.BuyerName', '客戶': 'T1.BuyerName',
+                '申請廠商': 'T1.ApplicantName', '申請方': 'T1.ApplicantName', 'applicant': 'T1.ApplicantName',
+                '付款廠商': 'T1.InvoiceToName', '付款方': 'T1.InvoiceToName', 'invoiceto': 'T1.InvoiceToName',
+            }
+            column_name = next((v for k, v in ENTITY_MAP.items() if k in entity_keyword), None)
+            if column_name:
+                self._log(f"🔄 檢測到【Top {limit} {entity_keyword} 營收】查詢意圖，啟用模板。", "INFO")
+                fixed_sql = f"""
+SELECT {column_name} AS entity, SUM(T2.LocalAmount) AS total_revenue
+FROM TSR53SampleDescription AS T1
+JOIN TSR53Invoice AS T2 ON T1.JobNo = T2.JobNo
+WHERE T2.LocalAmount > 0
+GROUP BY entity
+ORDER BY total_revenue DESC
+LIMIT {limit};
+"""
+                fixes_applied.append(f"模板覆寫: Top {limit} {entity_keyword} 營收查詢")
+        # --- 意圖 2: 查詢特定實體的報告數量 (包含 Pass/Fail 等狀態) ---
+        # 匹配 "買家 ABC 有幾份 Fail 的報告", "申請方 XYZ 的 Pass 報告數量"
+        elif '報告' in q_lower and ('數量' in q_lower or '幾份' in q_lower or 'count' in q_lower):
+            ENTITY_TO_COLUMN_MAP = {
+                '買家': 'T1.BuyerName', 'buyer': 'T1.BuyerName', '客戶': 'T1.BuyerName',
+                '申請廠商': 'T1.ApplicantName', '申請方': 'T1.ApplicantName', 'applicant': 'T1.ApplicantName',
+                '付款廠商': 'T1.InvoiceToName', 'invoiceto': 'T1.InvoiceToName',
+                '代理商': 'T1.AgentName', 'agent': 'T1.AgentName',
+            }
+            entity_keywords_pattern = '|'.join(ENTITY_TO_COLUMN_MAP.keys())
+            dynamic_pattern = fr"({entity_keywords_pattern})\s*'\"?([a-zA-Z0-9\s&.-]+)'\"?"
+            entity_match = re.search(dynamic_pattern, question, re.IGNORECASE)
+            # 必須匹配到實體，且模型生成了錯誤 SQL (作為觸發器)
+            if entity_match and ('tsr53reportauthorization' in fixed_sql.lower() or 'testresult' in fixed_sql.lower()):
+                entity_type = entity_match.group(1).lower()
+                entity_name = entity_match.group(2).strip()
+                column_name = ENTITY_TO_COLUMN_MAP.get(entity_type)
+                # 確定報告狀態 (Fail/Pass)
+                status = "'Fail'"
+                if 'pass' in q_lower or '通過' in q_lower:
+                    status = "'Pass'"
+                self._log(f"🔄 檢測到查詢【{entity_type} '{entity_name}' 的 {status} 報告數】意圖，啟用模板。", "INFO")
+                fixed_sql = f"""
+SELECT COUNT(T1.JobNo) AS report_count
+FROM TSR53SampleDescription AS T1
+JOIN JobTimeline AS T2 ON T1.JobNo = T2.JobNo
+WHERE {column_name} = '{entity_name}'
+  AND T1.OverallRating = {status}
+  AND strftime('%Y', T2.ReportAuthorization) = '2024';
+"""
+                fixes_applied.append(f"模板覆寫: 查詢 {entity_type}='{entity_name}' 的 {status} 報告數")
+        # --- 意圖 3: 計算平均處理時長 (Turnaround Time, TAT) ---
+        # 匹配 "平均處理時間", "LabIn 到 LabOut 平均多久", "TAT"
+        elif any(k in q_lower for k in ['平均', 'average']) and any(k in q_lower for k in ['時間', '時長', '多久', '天', 'tat', 'turnaround']):
+            # 預設計算從 LabIn 到 ReportAuthorization 的總時長
+            start_col, end_col = 'T2.LabIn', 'T2.ReportAuthorization'
+            log_msg = "總流程平均時長 (天)"
+            if 'labin' in q_lower and 'labout' in q_lower:
+                start_col, end_col = 'T2.LabIn', 'T2.LabOut'
+                log_msg = "實驗室平均處理時長 (天)"
+            self._log(f"🔄 檢測到【{log_msg}】查詢意圖，啟用模板。", "INFO")
+            # SQLite 中，JULIANDAY 用於精確計算天數差
+            fixed_sql = f"""
+SELECT AVG(JULIANDAY({end_col}) - JULIANDAY({start_col})) AS average_tat_days
+FROM JobTimeline AS T2
+WHERE {start_col} IS NOT NULL AND {end_col} IS NOT NULL AND {end_col} > {start_col};
+"""
+            fixes_applied.append(f"模板覆寫: {log_msg} 查詢")
+        # 如果沒有任何模板被觸發，則進入常規修正流程
+        if not fixes_applied:
+            # ==============================================================================
+            #  第二層：常規修正流程 (Fallback Corrections)
+            # ==============================================================================
+            self._log("未觸發任何模板，執行常規修正流程...", "DEBUG")
+            # 步驟 2.1: SQL 方言修正
+            dialect_corrections = {
+                r'YEAR\s*\(([^)]+)\)': r"strftime('%Y', \1)",
+                r"(strftime\('%Y',\s*[^)]+\))\s*=\s*(\d{4})": r"\1 = '\2'"
+            }
+            for pattern, replacement in dialect_corrections.items():
                 if re.search(pattern, fixed_sql, re.IGNORECASE):
                     fixed_sql = re.sub(pattern, replacement, fixed_sql, flags=re.IGNORECASE)
+                    fixes_applied.append(f"修正方言: {pattern}")
+            # 步驟 2.2: Schema 名稱修正 (一個全面的字典)
+            schema_corrections = {
+                # --- 常見幻覺表 ---
+                'TSR53ReportAuthorization': 'TSR53SampleDescription',
+                'TSR53TestResult': 'TSR53SampleDescription',
+                'Customer': 'TSR53SampleDescription', 'Customers': 'TSR53SampleDescription',
+                'Invoice': 'TSR53Invoice', 'Invoices': 'TSR53Invoice',
+                'Job': 'JobTimeline', 'Jobs': 'JobsInProgress',
+                # --- 常見幻覺或錯誤欄位 ---
+                'ReportAuthorizationDate': 'ReportAuthorization',
+                'TestResult': 'OverallRating', 'Rating': 'OverallRating',
+                'CustomerName': 'BuyerName', # 優先使用 BuyerName 作為通用客戶名
+                'InvoiceTo': 'InvoiceToName',
+                'Applicant': 'ApplicantName',
+                'Agent': 'AgentName',
+                'JobNumber': 'JobNo',
+                'CreationDate': 'JobCreation', 'CreateDate': 'JobCreation',
+                'CompletedDate': 'ReportAuthorization',
+                'Amount': 'LocalAmount', # 優先使用 LocalAmount 作為金額
+                'Price': 'LocalAmount',
+                'Lab': 'LabGroup'
+            }
+            for wrong, correct in schema_corrections.items():
+                pattern = r'\b' + re.escape(wrong) + r'\b'
+                if re.search(pattern, fixed_sql, re.IGNORECASE):
+                    fixed_sql = re.sub(pattern, correct, fixed_sql, flags=re.IGNORECASE)
+                    fixes_applied.append(f"映射 Schema: '{wrong}' -> '{correct}'")
+            # 步驟 2.3: 基礎邏輯意圖修正
+            if any(kw in q_lower for kw in ['幾份', '多少', 'how many', 'count', '數量']) and 'select ' in fixed_sql.lower() and 'count' not in fixed_sql.lower():
+                # 僅在不是分組查詢時替換，避免破壞 GROUP BY
+                if 'group by' not in fixed_sql.lower():
+                    fixed_sql = re.sub(r'SELECT\s+.*?FROM', 'SELECT COUNT(*) FROM', fixed_sql, count=1, flags=re.IGNORECASE)
+                    fixes_applied.append("修正邏輯: 補全 COUNT(*)")
+        # ==============================================================================
+        #  第三層：清理與完成 (Finalization)
+        # ==============================================================================
         fixed_sql = fixed_sql.strip()
         if not fixed_sql.endswith(';'):
             fixed_sql += ';'
         fixed_sql = re.sub(r'\s+', ' ', fixed_sql).strip()
         if fixes_applied:
                 self._log(f"  - 應用規則: {fix}", "DEBUG")
             self._log(f"  - 修正後 SQL: {fixed_sql}", "INFO")
         else:
+            self._log("✅ SQL 驗證通過，無需常規修正。", "INFO")
         return fixed_sql