Spaces:

Paul720810
/

Softline-SQL-Assistant

Sleeping

App Files Files Community

Paul720810 commited on Sep 5, 2025

Commit

31a9f3d

verified ·

1 Parent(s): 845eb47

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -41

app.py CHANGED Viewed

@@ -418,12 +418,12 @@ class TextToSQLSystem:
     # in class TextToSQLSystem:
     def _validate_and_fix_sql(self, sql: str, question: str) -> str:
         """
-        (V8 / 最終可靠版)
-        一個全面、多層次的 SQL 驗證與生成引擎。
-        本函數的觸發邏輯經過強化，不再依賴模型生成的特定幻覺內容，
-        而是更主動地基於使用者問題的意圖來啟用模板。
         """
         if not sql or not self.schema:
             self._log("SQL 修正被跳過，因輸入為空或 schema 未載入。", "WARNING")
@@ -438,23 +438,38 @@ class TextToSQLSystem:
         #  第一層：高價值意圖識別與模板覆寫 (Intent Recognition & Templating)
         # ==============================================================================
-        # --- 意圖 1: 查詢 Top N 實體的營收貢獻 ---
-        # 匹配 "top 5 買家 營收", "貢獻最高的10個客戶", "業績最好的申請方" 等
         top_n_pattern = r"(?:top|前|最高|最大|最好)\s*(\d+)?\s*(?:個|名)?\s*([^ ]+?)\s*(?:的)?(?:營收|業績|貢獻|金額|sales|revenue)"
         top_n_match = re.search(top_n_pattern, question, re.IGNORECASE)
-        # --- 意圖 2: 查詢特定實體的報告數量 (包含 Pass/Fail 等狀態) ---
         ENTITY_TO_COLUMN_MAP = {
-            '買家': 'T1.BuyerName', 'buyer': 'T1.BuyerName', '客戶': 'T1.BuyerName',
             '申請廠商': 'T1.ApplicantName', '申請方': 'T1.ApplicantName', 'applicant': 'T1.ApplicantName',
-            '付款廠商': 'T1.InvoiceToName', 'invoiceto': 'T1.InvoiceToName',
             '代理商': 'T1.AgentName', 'agent': 'T1.AgentName',
         }
-        entity_keywords_pattern = '|'.join(ENTITY_TO_COLUMN_MAP.keys())
-        dynamic_pattern = fr"({entity_keywords_pattern})\s*'\"?([a-zA-Z0-9\s&.-]+)'\"?"
-        entity_match = re.search(dynamic_pattern, question, re.IGNORECASE)
-        # --- 意圖 3: 計算平均處理時長 (TAT) ---
         is_tat_query = any(k in q_lower for k in ['平均', 'average']) and any(k in q_lower for k in ['時間', '時長', '多久', '天', 'tat', 'turnaround'])
         # --- 判斷邏輯: 依優先級進入對應的模板 ---
@@ -462,7 +477,6 @@ class TextToSQLSystem:
             limit = top_n_match.group(1) or '10'
             entity_keyword = top_n_match.group(2).lower()
-            # 從實體映射中找到對應的欄位
             column_name = next((v for k, v in ENTITY_TO_COLUMN_MAP.items() if k in entity_keyword), None)
             if column_name:
@@ -478,10 +492,10 @@ LIMIT {limit};
 """
                 fixes_applied.append(f"模板覆寫: Top {limit} {entity_keyword} 營收查詢")
-        elif entity_match and any(kw in q_lower for kw in ['份數', '數量', 'count', '幾份']):
-            entity_type = entity_match.group(1).lower()
-            entity_name = entity_match.group(2).strip()
-            column_name = ENTITY_TO_COLUMN_MAP.get(entity_type)
             year_match = re.search(r'(\d{4})\s*年?', question)
             year = year_match.group(1) if year_match else '2024'
@@ -526,7 +540,6 @@ WHERE {start_col} IS NOT NULL AND {end_col} IS NOT NULL AND {end_col} > {start_c
             #  第二層：常規修正流程 (Fallback Corrections)
             # ==============================================================================
-            # 步驟 2.1: SQL 方言修正
             dialect_corrections = {
                 r'YEAR\s*\(([^)]+)\)': r"strftime('%Y', \1)",
                 r"(strftime\('%Y',\s*[^)]+\))\s*=\s*(\d{4})": r"\1 = '\2'"
@@ -536,33 +549,21 @@ WHERE {start_col} IS NOT NULL AND {end_col} IS NOT NULL AND {end_col} > {start_c
                     fixed_sql = re.sub(pattern, replacement, fixed_sql, flags=re.IGNORECASE)
                     fixes_applied.append(f"修正方言: {pattern}")
-            # 步驟 2.2: Schema 名稱修正 (一個全面的字典)
             schema_corrections = {
-                # --- 常見幻覺表 ---
-                'TSR53ReportAuthorization': 'TSR53SampleDescription',
-                'TSR53TestResult': 'TSR53SampleDescription',
-                'JobInvoice': 'TSR53Invoice',
-                'JobInvoiceAuthorization': 'TSR53Invoice',
                 'Customer': 'TSR53SampleDescription', 'Customers': 'TSR53SampleDescription',
                 'Invoice': 'TSR53Invoice', 'Invoices': 'TSR53Invoice',
                 'Job': 'JobTimeline', 'Jobs': 'JobsInProgress',
-                # --- 常見幻覺或錯誤欄位 ---
-                'AuthorizationDate': 'ReportAuthorization',
-                'ReportAuthorizationDate': 'ReportAuthorization',
-                'LegalAuthorization': 'OverallRating',
                 'TestResult': 'OverallRating', 'Rating': 'OverallRating',
-                'CustomerName': 'BuyerName',
-                'InvoiceTo': 'InvoiceToName',
-                'Applicant': 'ApplicantName',
-                'Agent': 'AgentName',
-                'JobNumber': 'JobNo',
-                'ReportNo': 'JobNo',
                 'CreationDate': 'JobCreation', 'CreateDate': 'JobCreation',
                 'CompletedDate': 'ReportAuthorization',
-                'Amount': 'LocalAmount',
-                'Price': 'LocalAmount',
-                'Lab': 'LabGroup'
             }
             for wrong, correct in schema_corrections.items():
                 pattern = r'\b' + re.escape(wrong) + r'\b'
@@ -570,7 +571,6 @@ WHERE {start_col} IS NOT NULL AND {end_col} IS NOT NULL AND {end_col} > {start_c
                     fixed_sql = re.sub(pattern, correct, fixed_sql, flags=re.IGNORECASE)
                     fixes_applied.append(f"映射 Schema: '{wrong}' -> '{correct}'")
-            # 步驟 2.3: 基礎邏輯意圖修正
             if any(kw in q_lower for kw in ['幾份', '多少', 'how many', 'count', '數量']) and 'select ' in fixed_sql.lower() and 'count' not in fixed_sql.lower():
                 if 'group by' not in fixed_sql.lower():
                     fixed_sql = re.sub(r'SELECT\s+.*?FROM', 'SELECT COUNT(*) FROM', fixed_sql, count=1, flags=re.IGNORECASE)

     # in class TextToSQLSystem:
+    # in class TextToSQLSystem:
     def _validate_and_fix_sql(self, sql: str, question: str) -> str:
         """
+        (V9 / 最終模式匹配版)
+        採用更穩健的分步正則匹配邏輯，確保意圖模板能被可靠觸發。
         """
         if not sql or not self.schema:
             self._log("SQL 修正被跳過，因輸入為空或 schema 未載入。", "WARNING")
         #  第一層：高價值意圖識別與模板覆寫 (Intent Recognition & Templating)
         # ==============================================================================
+        # --- 意圖 1: Top N 查詢 ---
         top_n_pattern = r"(?:top|前|最高|最大|最好)\s*(\d+)?\s*(?:個|名)?\s*([^ ]+?)\s*(?:的)?(?:營收|業績|貢獻|金額|sales|revenue)"
         top_n_match = re.search(top_n_pattern, question, re.IGNORECASE)
+        # --- 意圖 2: 特定實體報告數查詢 (採用新的、更穩健的匹配邏輯) ---
+        entity_match_data = None
         ENTITY_TO_COLUMN_MAP = {
+            # 關鍵詞 (小寫，按可能的優先順序排列) -> 資料庫欄位
             '申請廠商': 'T1.ApplicantName', '申請方': 'T1.ApplicantName', 'applicant': 'T1.ApplicantName',
+            '付款廠商': 'T1.InvoiceToName', '付款方': 'T1.InvoiceToName', 'invoiceto': 'T1.InvoiceToName',
             '代理商': 'T1.AgentName', 'agent': 'T1.AgentName',
+            '買家': 'T1.BuyerName', 'buyer': 'T1.BuyerName', '客戶': 'T1.BuyerName', # 將通用詞放在後面
         }
+        # **新的分步匹配邏輯**
+        for keyword, column in ENTITY_TO_COLUMN_MAP.items():
+            # 步驟 1: 在問題中尋找關鍵詞
+            keyword_pos = q_lower.find(keyword)
+            if keyword_pos != -1:
+                # 步驟 2: 從關鍵詞之後的文本中提取實體名稱
+                # 正則: 捕獲關鍵詞後面跟著的、由字母/數字/&/./-組成的第一個詞組
+                pattern = fr"{re.escape(keyword)}[\s:;\'\"-]*([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|$)"
+                match = re.search(pattern, question, re.IGNORECASE)
+                if match:
+                    entity_match_data = {
+                        "type": keyword,
+                        "name": match.group(1).strip(),
+                        "column": column
+                    }
+                    break # 找到第一個匹配的關鍵詞就停止，避免 "客戶" 覆蓋 "買家"
+        # --- 意圖 3: TAT 查詢 ---
         is_tat_query = any(k in q_lower for k in ['平均', 'average']) and any(k in q_lower for k in ['時間', '時長', '多久', '天', 'tat', 'turnaround'])
         # --- 判斷邏輯: 依優先級進入對應的模板 ---
             limit = top_n_match.group(1) or '10'
             entity_keyword = top_n_match.group(2).lower()
             column_name = next((v for k, v in ENTITY_TO_COLUMN_MAP.items() if k in entity_keyword), None)
             if column_name:
 """
                 fixes_applied.append(f"模板覆寫: Top {limit} {entity_keyword} 營收查詢")
+        elif entity_match_data and any(kw in q_lower for kw in ['份數', '數量', 'count', '幾份']):
+            entity_type = entity_match_data["type"]
+            entity_name = entity_match_data["name"]
+            column_name = entity_match_data["column"]
             year_match = re.search(r'(\d{4})\s*年?', question)
             year = year_match.group(1) if year_match else '2024'
             #  第二層：常規修正流程 (Fallback Corrections)
             # ==============================================================================
             dialect_corrections = {
                 r'YEAR\s*\(([^)]+)\)': r"strftime('%Y', \1)",
                 r"(strftime\('%Y',\s*[^)]+\))\s*=\s*(\d{4})": r"\1 = '\2'"
                     fixed_sql = re.sub(pattern, replacement, fixed_sql, flags=re.IGNORECASE)
                     fixes_applied.append(f"修正方言: {pattern}")
             schema_corrections = {
+                'TSR53ReportAuthorization': 'TSR53SampleDescription', 'TSR53TestResult': 'TSR53SampleDescription',
+                'JobInvoice': 'TSR53Invoice', 'JobInvoiceAuthorization': 'TSR53Invoice',
                 'Customer': 'TSR53SampleDescription', 'Customers': 'TSR53SampleDescription',
                 'Invoice': 'TSR53Invoice', 'Invoices': 'TSR53Invoice',
                 'Job': 'JobTimeline', 'Jobs': 'JobsInProgress',
+                'AuthorizationDate': 'ReportAuthorization', 'ReportAuthorizationDate': 'ReportAuthorization',
+                'LegalAuthorization': 'OverallRating', 'LegalAuthorizationDate': 'ReportAuthorization',
                 'TestResult': 'OverallRating', 'Rating': 'OverallRating',
+                'CustomerName': 'BuyerName', 'InvoiceTo': 'InvoiceToName',
+                'Applicant': 'ApplicantName', 'Agent': 'AgentName',
+                'JobNumber': 'JobNo', 'ReportNo': 'JobNo',
                 'CreationDate': 'JobCreation', 'CreateDate': 'JobCreation',
                 'CompletedDate': 'ReportAuthorization',
+                'Amount': 'LocalAmount', 'Price': 'LocalAmount', 'Lab': 'LabGroup'
             }
             for wrong, correct in schema_corrections.items():
                 pattern = r'\b' + re.escape(wrong) + r'\b'
                     fixed_sql = re.sub(pattern, correct, fixed_sql, flags=re.IGNORECASE)
                     fixes_applied.append(f"映射 Schema: '{wrong}' -> '{correct}'")
             if any(kw in q_lower for kw in ['幾份', '多少', 'how many', 'count', '數量']) and 'select ' in fixed_sql.lower() and 'count' not in fixed_sql.lower():
                 if 'group by' not in fixed_sql.lower():
                     fixed_sql = re.sub(r'SELECT\s+.*?FROM', 'SELECT COUNT(*) FROM', fixed_sql, count=1, flags=re.IGNORECASE)