Paul720810 commited on
Commit
31a9f3d
·
verified ·
1 Parent(s): 845eb47

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -41
app.py CHANGED
@@ -418,12 +418,12 @@ class TextToSQLSystem:
418
 
419
  # in class TextToSQLSystem:
420
 
 
 
421
  def _validate_and_fix_sql(self, sql: str, question: str) -> str:
422
  """
423
- (V8 / 最終可靠版)
424
- 一個全面、多層次的 SQL 驗證與生成引擎。
425
- 本函數的觸發邏輯經過強化,不再依賴模型生成的特定幻覺內容,
426
- 而是更主動地基於使用者問題的意圖來啟用模板。
427
  """
428
  if not sql or not self.schema:
429
  self._log("SQL 修正被跳過,因輸入為空或 schema 未載入。", "WARNING")
@@ -438,23 +438,38 @@ class TextToSQLSystem:
438
  # 第一層:高價值意圖識別與模板覆寫 (Intent Recognition & Templating)
439
  # ==============================================================================
440
 
441
- # --- 意圖 1: 查詢 Top N 實體的營收貢獻 ---
442
- # 匹配 "top 5 買家 營收", "貢獻最高的10個客戶", "業績最好的申請方" 等
443
  top_n_pattern = r"(?:top|前|最高|最大|最好)\s*(\d+)?\s*(?:個|名)?\s*([^ ]+?)\s*(?:的)?(?:營收|業績|貢獻|金額|sales|revenue)"
444
  top_n_match = re.search(top_n_pattern, question, re.IGNORECASE)
445
 
446
- # --- 意圖 2: 查詢特定實體的報告數量 (包含 Pass/Fail 等狀態) ---
 
447
  ENTITY_TO_COLUMN_MAP = {
448
- '買家': 'T1.BuyerName', 'buyer': 'T1.BuyerName', '客戶': 'T1.BuyerName',
449
  '申請廠商': 'T1.ApplicantName', '申請方': 'T1.ApplicantName', 'applicant': 'T1.ApplicantName',
450
- '付款廠商': 'T1.InvoiceToName', 'invoiceto': 'T1.InvoiceToName',
451
  '代理商': 'T1.AgentName', 'agent': 'T1.AgentName',
 
452
  }
453
- entity_keywords_pattern = '|'.join(ENTITY_TO_COLUMN_MAP.keys())
454
- dynamic_pattern = fr"({entity_keywords_pattern})\s*'\"?([a-zA-Z0-9\s&.-]+)'\"?"
455
- entity_match = re.search(dynamic_pattern, question, re.IGNORECASE)
456
-
457
- # --- 意圖 3: 計算平均處理時長 (TAT) ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
458
  is_tat_query = any(k in q_lower for k in ['平均', 'average']) and any(k in q_lower for k in ['時間', '時長', '多久', '天', 'tat', 'turnaround'])
459
 
460
  # --- 判斷邏輯: 依優先級進入對應的模板 ---
@@ -462,7 +477,6 @@ class TextToSQLSystem:
462
  limit = top_n_match.group(1) or '10'
463
  entity_keyword = top_n_match.group(2).lower()
464
 
465
- # 從實體映射中找到對應的欄位
466
  column_name = next((v for k, v in ENTITY_TO_COLUMN_MAP.items() if k in entity_keyword), None)
467
 
468
  if column_name:
@@ -478,10 +492,10 @@ LIMIT {limit};
478
  """
479
  fixes_applied.append(f"模板覆寫: Top {limit} {entity_keyword} 營收查詢")
480
 
481
- elif entity_match and any(kw in q_lower for kw in ['份數', '數量', 'count', '幾份']):
482
- entity_type = entity_match.group(1).lower()
483
- entity_name = entity_match.group(2).strip()
484
- column_name = ENTITY_TO_COLUMN_MAP.get(entity_type)
485
 
486
  year_match = re.search(r'(\d{4})\s*年?', question)
487
  year = year_match.group(1) if year_match else '2024'
@@ -526,7 +540,6 @@ WHERE {start_col} IS NOT NULL AND {end_col} IS NOT NULL AND {end_col} > {start_c
526
  # 第二層:常規修正流程 (Fallback Corrections)
527
  # ==============================================================================
528
 
529
- # 步驟 2.1: SQL 方言修正
530
  dialect_corrections = {
531
  r'YEAR\s*\(([^)]+)\)': r"strftime('%Y', \1)",
532
  r"(strftime\('%Y',\s*[^)]+\))\s*=\s*(\d{4})": r"\1 = '\2'"
@@ -536,33 +549,21 @@ WHERE {start_col} IS NOT NULL AND {end_col} IS NOT NULL AND {end_col} > {start_c
536
  fixed_sql = re.sub(pattern, replacement, fixed_sql, flags=re.IGNORECASE)
537
  fixes_applied.append(f"修正方言: {pattern}")
538
 
539
- # 步驟 2.2: Schema 名稱修正 (一個全面的字典)
540
  schema_corrections = {
541
- # --- 常見幻覺表 ---
542
- 'TSR53ReportAuthorization': 'TSR53SampleDescription',
543
- 'TSR53TestResult': 'TSR53SampleDescription',
544
- 'JobInvoice': 'TSR53Invoice',
545
- 'JobInvoiceAuthorization': 'TSR53Invoice',
546
  'Customer': 'TSR53SampleDescription', 'Customers': 'TSR53SampleDescription',
547
  'Invoice': 'TSR53Invoice', 'Invoices': 'TSR53Invoice',
548
  'Job': 'JobTimeline', 'Jobs': 'JobsInProgress',
549
-
550
- # --- 常見幻覺或錯誤欄位 ---
551
- 'AuthorizationDate': 'ReportAuthorization',
552
- 'ReportAuthorizationDate': 'ReportAuthorization',
553
- 'LegalAuthorization': 'OverallRating',
554
  'TestResult': 'OverallRating', 'Rating': 'OverallRating',
555
- 'CustomerName': 'BuyerName',
556
- 'InvoiceTo': 'InvoiceToName',
557
- 'Applicant': 'ApplicantName',
558
- 'Agent': 'AgentName',
559
- 'JobNumber': 'JobNo',
560
- 'ReportNo': 'JobNo',
561
  'CreationDate': 'JobCreation', 'CreateDate': 'JobCreation',
562
  'CompletedDate': 'ReportAuthorization',
563
- 'Amount': 'LocalAmount',
564
- 'Price': 'LocalAmount',
565
- 'Lab': 'LabGroup'
566
  }
567
  for wrong, correct in schema_corrections.items():
568
  pattern = r'\b' + re.escape(wrong) + r'\b'
@@ -570,7 +571,6 @@ WHERE {start_col} IS NOT NULL AND {end_col} IS NOT NULL AND {end_col} > {start_c
570
  fixed_sql = re.sub(pattern, correct, fixed_sql, flags=re.IGNORECASE)
571
  fixes_applied.append(f"映射 Schema: '{wrong}' -> '{correct}'")
572
 
573
- # 步驟 2.3: 基礎邏輯意圖修正
574
  if any(kw in q_lower for kw in ['幾份', '多少', 'how many', 'count', '數量']) and 'select ' in fixed_sql.lower() and 'count' not in fixed_sql.lower():
575
  if 'group by' not in fixed_sql.lower():
576
  fixed_sql = re.sub(r'SELECT\s+.*?FROM', 'SELECT COUNT(*) FROM', fixed_sql, count=1, flags=re.IGNORECASE)
 
418
 
419
  # in class TextToSQLSystem:
420
 
421
+ # in class TextToSQLSystem:
422
+
423
  def _validate_and_fix_sql(self, sql: str, question: str) -> str:
424
  """
425
+ (V9 / 最終模式匹配版)
426
+ 採用更穩健的分步正則匹配邏輯,確保意圖模板能被可靠觸發。
 
 
427
  """
428
  if not sql or not self.schema:
429
  self._log("SQL 修正被跳過,因輸入為空或 schema 未載入。", "WARNING")
 
438
  # 第一層:高價值意圖識別與模板覆寫 (Intent Recognition & Templating)
439
  # ==============================================================================
440
 
441
+ # --- 意圖 1: Top N 查詢 ---
 
442
  top_n_pattern = r"(?:top|前|最高|最大|最好)\s*(\d+)?\s*(?:個|名)?\s*([^ ]+?)\s*(?:的)?(?:營收|業績|貢獻|金額|sales|revenue)"
443
  top_n_match = re.search(top_n_pattern, question, re.IGNORECASE)
444
 
445
+ # --- 意圖 2: 特定實體報告數查詢 (採用新的、更穩健的匹配邏輯) ---
446
+ entity_match_data = None
447
  ENTITY_TO_COLUMN_MAP = {
448
+ # 關鍵詞 (小寫,按可能的優先順序排列) -> 資料庫欄位
449
  '申請廠商': 'T1.ApplicantName', '申請方': 'T1.ApplicantName', 'applicant': 'T1.ApplicantName',
450
+ '付款廠商': 'T1.InvoiceToName', '付款方': 'T1.InvoiceToName', 'invoiceto': 'T1.InvoiceToName',
451
  '代理商': 'T1.AgentName', 'agent': 'T1.AgentName',
452
+ '買家': 'T1.BuyerName', 'buyer': 'T1.BuyerName', '客戶': 'T1.BuyerName', # 將通用詞放在後面
453
  }
454
+
455
+ # **新的分步匹配邏輯**
456
+ for keyword, column in ENTITY_TO_COLUMN_MAP.items():
457
+ # 步驟 1: 在問題中尋找關鍵詞
458
+ keyword_pos = q_lower.find(keyword)
459
+ if keyword_pos != -1:
460
+ # 步驟 2: 從關鍵詞之後的文本中提取實體名稱
461
+ # 正則: 捕獲關鍵詞後面跟著的、由字母/數字/&/./-組成的第一個詞組
462
+ pattern = fr"{re.escape(keyword)}[\s:;\'\"-]*([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|$)"
463
+ match = re.search(pattern, question, re.IGNORECASE)
464
+ if match:
465
+ entity_match_data = {
466
+ "type": keyword,
467
+ "name": match.group(1).strip(),
468
+ "column": column
469
+ }
470
+ break # 找到第一個匹配的關鍵詞就停止,避免 "客戶" 覆蓋 "買家"
471
+
472
+ # --- 意圖 3: TAT 查詢 ---
473
  is_tat_query = any(k in q_lower for k in ['平均', 'average']) and any(k in q_lower for k in ['時間', '時長', '多久', '天', 'tat', 'turnaround'])
474
 
475
  # --- 判斷邏輯: 依優先級進入對應的模板 ---
 
477
  limit = top_n_match.group(1) or '10'
478
  entity_keyword = top_n_match.group(2).lower()
479
 
 
480
  column_name = next((v for k, v in ENTITY_TO_COLUMN_MAP.items() if k in entity_keyword), None)
481
 
482
  if column_name:
 
492
  """
493
  fixes_applied.append(f"模板覆寫: Top {limit} {entity_keyword} 營收查詢")
494
 
495
+ elif entity_match_data and any(kw in q_lower for kw in ['份數', '數量', 'count', '幾份']):
496
+ entity_type = entity_match_data["type"]
497
+ entity_name = entity_match_data["name"]
498
+ column_name = entity_match_data["column"]
499
 
500
  year_match = re.search(r'(\d{4})\s*年?', question)
501
  year = year_match.group(1) if year_match else '2024'
 
540
  # 第二層:常規修正流程 (Fallback Corrections)
541
  # ==============================================================================
542
 
 
543
  dialect_corrections = {
544
  r'YEAR\s*\(([^)]+)\)': r"strftime('%Y', \1)",
545
  r"(strftime\('%Y',\s*[^)]+\))\s*=\s*(\d{4})": r"\1 = '\2'"
 
549
  fixed_sql = re.sub(pattern, replacement, fixed_sql, flags=re.IGNORECASE)
550
  fixes_applied.append(f"修正方言: {pattern}")
551
 
 
552
  schema_corrections = {
553
+ 'TSR53ReportAuthorization': 'TSR53SampleDescription', 'TSR53TestResult': 'TSR53SampleDescription',
554
+ 'JobInvoice': 'TSR53Invoice', 'JobInvoiceAuthorization': 'TSR53Invoice',
 
 
 
555
  'Customer': 'TSR53SampleDescription', 'Customers': 'TSR53SampleDescription',
556
  'Invoice': 'TSR53Invoice', 'Invoices': 'TSR53Invoice',
557
  'Job': 'JobTimeline', 'Jobs': 'JobsInProgress',
558
+ 'AuthorizationDate': 'ReportAuthorization', 'ReportAuthorizationDate': 'ReportAuthorization',
559
+ 'LegalAuthorization': 'OverallRating', 'LegalAuthorizationDate': 'ReportAuthorization',
 
 
 
560
  'TestResult': 'OverallRating', 'Rating': 'OverallRating',
561
+ 'CustomerName': 'BuyerName', 'InvoiceTo': 'InvoiceToName',
562
+ 'Applicant': 'ApplicantName', 'Agent': 'AgentName',
563
+ 'JobNumber': 'JobNo', 'ReportNo': 'JobNo',
 
 
 
564
  'CreationDate': 'JobCreation', 'CreateDate': 'JobCreation',
565
  'CompletedDate': 'ReportAuthorization',
566
+ 'Amount': 'LocalAmount', 'Price': 'LocalAmount', 'Lab': 'LabGroup'
 
 
567
  }
568
  for wrong, correct in schema_corrections.items():
569
  pattern = r'\b' + re.escape(wrong) + r'\b'
 
571
  fixed_sql = re.sub(pattern, correct, fixed_sql, flags=re.IGNORECASE)
572
  fixes_applied.append(f"映射 Schema: '{wrong}' -> '{correct}'")
573
 
 
574
  if any(kw in q_lower for kw in ['幾份', '多少', 'how many', 'count', '數量']) and 'select ' in fixed_sql.lower() and 'count' not in fixed_sql.lower():
575
  if 'group by' not in fixed_sql.lower():
576
  fixed_sql = re.sub(r'SELECT\s+.*?FROM', 'SELECT COUNT(*) FROM', fixed_sql, count=1, flags=re.IGNORECASE)