Paul720810 commited on
Commit
845eb47
·
verified ·
1 Parent(s): f080e2a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -48
app.py CHANGED
@@ -416,8 +416,15 @@ class TextToSQLSystem:
416
 
417
  # in class TextToSQLSystem:
418
 
419
- def _validate_and_fix_sql(self, sql: str, question: str) -> str:
420
 
 
 
 
 
 
 
 
421
  if not sql or not self.schema:
422
  self._log("SQL 修正被跳過,因輸入為空或 schema 未載入。", "WARNING")
423
  return sql
@@ -435,17 +442,28 @@ class TextToSQLSystem:
435
  # 匹配 "top 5 買家 營收", "貢獻最高的10個客戶", "業績最好的申請方" 等
436
  top_n_pattern = r"(?:top|前|最高|最大|最好)\s*(\d+)?\s*(?:個|名)?\s*([^ ]+?)\s*(?:的)?(?:營收|業績|貢獻|金額|sales|revenue)"
437
  top_n_match = re.search(top_n_pattern, question, re.IGNORECASE)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
438
  if top_n_match:
439
- limit = top_n_match.group(1) or '10' # 如果沒寫 N,預設為 10
440
  entity_keyword = top_n_match.group(2).lower()
441
 
442
- # 建立實體關鍵詞到欄位的映射
443
- ENTITY_MAP = {
444
- '買家': 'T1.BuyerName', 'buyer': 'T1.BuyerName', '客戶': 'T1.BuyerName',
445
- '申請廠商': 'T1.ApplicantName', '申請方': 'T1.ApplicantName', 'applicant': 'T1.ApplicantName',
446
- '付款廠商': 'T1.InvoiceToName', '付款方': 'T1.InvoiceToName', 'invoiceto': 'T1.InvoiceToName',
447
- }
448
- column_name = next((v for k, v in ENTITY_MAP.items() if k in entity_keyword), None)
449
 
450
  if column_name:
451
  self._log(f"🔄 檢測到【Top {limit} {entity_keyword} 營收】查詢意圖,啟用模板。", "INFO")
@@ -459,46 +477,33 @@ ORDER BY total_revenue DESC
459
  LIMIT {limit};
460
  """
461
  fixes_applied.append(f"模板覆寫: Top {limit} {entity_keyword} 營收查詢")
462
-
463
- # --- 意圖 2: 查詢特定實體的報告數量 (包含 Pass/Fail 等狀態) ---
464
- # 匹配 "買家 ABC 有幾份 Fail 的報告", "申請方 XYZ 的 Pass 報告數量"
465
- elif '報告' in q_lower and ('數量' in q_lower or '幾份' in q_lower or 'count' in q_lower):
466
- ENTITY_TO_COLUMN_MAP = {
467
- '買家': 'T1.BuyerName', 'buyer': 'T1.BuyerName', '客戶': 'T1.BuyerName',
468
- '申請廠商': 'T1.ApplicantName', '申請方': 'T1.ApplicantName', 'applicant': 'T1.ApplicantName',
469
- '付款廠商': 'T1.InvoiceToName', 'invoiceto': 'T1.InvoiceToName',
470
- '代理商': 'T1.AgentName', 'agent': 'T1.AgentName',
471
- }
472
- entity_keywords_pattern = '|'.join(ENTITY_TO_COLUMN_MAP.keys())
473
- dynamic_pattern = fr"({entity_keywords_pattern})\s*'\"?([a-zA-Z0-9\s&.-]+)'\"?"
474
- entity_match = re.search(dynamic_pattern, question, re.IGNORECASE)
475
-
476
- # 必須匹配到實體,且模型生成了錯誤 SQL (作為觸發器)
477
- if entity_match and ('tsr53reportauthorization' in fixed_sql.lower() or 'testresult' in fixed_sql.lower()):
478
- entity_type = entity_match.group(1).lower()
479
- entity_name = entity_match.group(2).strip()
480
- column_name = ENTITY_TO_COLUMN_MAP.get(entity_type)
481
-
482
- # 確定報告狀態 (Fail/Pass)
483
- status = "'Fail'"
484
- if 'pass' in q_lower or '通過' in q_lower:
485
- status = "'Pass'"
486
-
487
- self._log(f"🔄 檢測到查詢【{entity_type} '{entity_name}' 的 {status} 報告數】意圖,啟用模板。", "INFO")
488
- fixed_sql = f"""
489
  SELECT COUNT(T1.JobNo) AS report_count
490
  FROM TSR53SampleDescription AS T1
491
  JOIN JobTimeline AS T2 ON T1.JobNo = T2.JobNo
492
  WHERE {column_name} = '{entity_name}'
493
- AND T1.OverallRating = {status}
494
- AND strftime('%Y', T2.ReportAuthorization) = '2024';
495
  """
496
- fixes_applied.append(f"模板覆寫: 查詢 {entity_type}='{entity_name}' {status} 報告數")
497
 
498
- # --- 意圖 3: 計算平均處理時長 (Turnaround Time, TAT) ---
499
- # 匹配 "平均處理時間", "LabIn 到 LabOut 平均多久", "TAT"
500
- elif any(k in q_lower for k in ['平均', 'average']) and any(k in q_lower for k in ['時間', '時長', '多久', '天', 'tat', 'turnaround']):
501
- # 預設計算從 LabIn 到 ReportAuthorization 的總時長
502
  start_col, end_col = 'T2.LabIn', 'T2.ReportAuthorization'
503
  log_msg = "總流程平均時長 (天)"
504
  if 'labin' in q_lower and 'labout' in q_lower:
@@ -506,7 +511,6 @@ WHERE {column_name} = '{entity_name}'
506
  log_msg = "實驗室平均處理時長 (天)"
507
 
508
  self._log(f"🔄 檢測到【{log_msg}】查詢意圖,啟用模板。", "INFO")
509
- # SQLite 中,JULIANDAY 用於精確計算天數差
510
  fixed_sql = f"""
511
  SELECT AVG(JULIANDAY({end_col}) - JULIANDAY({start_col})) AS average_tat_days
512
  FROM JobTimeline AS T2
@@ -516,10 +520,11 @@ WHERE {start_col} IS NOT NULL AND {end_col} IS NOT NULL AND {end_col} > {start_c
516
 
517
  # 如果沒有任何模板被觸發,則進入常規修正流程
518
  if not fixes_applied:
 
 
519
  # ==============================================================================
520
  # 第二層:常規修正流程 (Fallback Corrections)
521
  # ==============================================================================
522
- self._log("未觸發任何模板,執行常規修正流程...", "DEBUG")
523
 
524
  # 步驟 2.1: SQL 方言修正
525
  dialect_corrections = {
@@ -536,21 +541,26 @@ WHERE {start_col} IS NOT NULL AND {end_col} IS NOT NULL AND {end_col} > {start_c
536
  # --- 常見幻覺表 ---
537
  'TSR53ReportAuthorization': 'TSR53SampleDescription',
538
  'TSR53TestResult': 'TSR53SampleDescription',
 
 
539
  'Customer': 'TSR53SampleDescription', 'Customers': 'TSR53SampleDescription',
540
  'Invoice': 'TSR53Invoice', 'Invoices': 'TSR53Invoice',
541
  'Job': 'JobTimeline', 'Jobs': 'JobsInProgress',
542
 
543
  # --- 常見幻覺或錯誤欄位 ---
 
544
  'ReportAuthorizationDate': 'ReportAuthorization',
 
545
  'TestResult': 'OverallRating', 'Rating': 'OverallRating',
546
- 'CustomerName': 'BuyerName', # 優先使用 BuyerName 作為通用客戶名
547
  'InvoiceTo': 'InvoiceToName',
548
  'Applicant': 'ApplicantName',
549
  'Agent': 'AgentName',
550
  'JobNumber': 'JobNo',
 
551
  'CreationDate': 'JobCreation', 'CreateDate': 'JobCreation',
552
  'CompletedDate': 'ReportAuthorization',
553
- 'Amount': 'LocalAmount', # 優先使用 LocalAmount 作為金額
554
  'Price': 'LocalAmount',
555
  'Lab': 'LabGroup'
556
  }
@@ -562,7 +572,6 @@ WHERE {start_col} IS NOT NULL AND {end_col} IS NOT NULL AND {end_col} > {start_c
562
 
563
  # 步驟 2.3: 基礎邏輯意圖修正
564
  if any(kw in q_lower for kw in ['幾份', '多少', 'how many', 'count', '數量']) and 'select ' in fixed_sql.lower() and 'count' not in fixed_sql.lower():
565
- # 僅在不是分組查詢時替換,避免破壞 GROUP BY
566
  if 'group by' not in fixed_sql.lower():
567
  fixed_sql = re.sub(r'SELECT\s+.*?FROM', 'SELECT COUNT(*) FROM', fixed_sql, count=1, flags=re.IGNORECASE)
568
  fixes_applied.append("修正邏輯: 補全 COUNT(*)")
 
416
 
417
  # in class TextToSQLSystem:
418
 
419
+ # in class TextToSQLSystem:
420
 
421
+ def _validate_and_fix_sql(self, sql: str, question: str) -> str:
422
+ """
423
+ (V8 / 最終可靠版)
424
+ 一個全面、多層次的 SQL 驗證與生成引擎。
425
+ 本函數的觸發邏輯經過強化,不再依賴模型生成的特定幻覺內容,
426
+ 而是更主動地基於使用者問題的意圖來啟用模板。
427
+ """
428
  if not sql or not self.schema:
429
  self._log("SQL 修正被跳過,因輸入為空或 schema 未載入。", "WARNING")
430
  return sql
 
442
  # 匹配 "top 5 買家 營收", "貢獻最高的10個客戶", "業績最好的申請方" 等
443
  top_n_pattern = r"(?:top|前|最高|最大|最好)\s*(\d+)?\s*(?:個|名)?\s*([^ ]+?)\s*(?:的)?(?:營收|業績|貢獻|金額|sales|revenue)"
444
  top_n_match = re.search(top_n_pattern, question, re.IGNORECASE)
445
+
446
+ # --- 意圖 2: 查詢特定實體的報告數量 (包含 Pass/Fail 等狀態) ---
447
+ ENTITY_TO_COLUMN_MAP = {
448
+ '買家': 'T1.BuyerName', 'buyer': 'T1.BuyerName', '客戶': 'T1.BuyerName',
449
+ '申請廠商': 'T1.ApplicantName', '申請方': 'T1.ApplicantName', 'applicant': 'T1.ApplicantName',
450
+ '付款廠商': 'T1.InvoiceToName', 'invoiceto': 'T1.InvoiceToName',
451
+ '代理商': 'T1.AgentName', 'agent': 'T1.AgentName',
452
+ }
453
+ entity_keywords_pattern = '|'.join(ENTITY_TO_COLUMN_MAP.keys())
454
+ dynamic_pattern = fr"({entity_keywords_pattern})\s*'\"?([a-zA-Z0-9\s&.-]+)'\"?"
455
+ entity_match = re.search(dynamic_pattern, question, re.IGNORECASE)
456
+
457
+ # --- 意圖 3: 計算平均處理時長 (TAT) ---
458
+ is_tat_query = any(k in q_lower for k in ['平均', 'average']) and any(k in q_lower for k in ['時間', '時長', '多久', '天', 'tat', 'turnaround'])
459
+
460
+ # --- 判斷邏輯: 依優先級進入對應的模板 ---
461
  if top_n_match:
462
+ limit = top_n_match.group(1) or '10'
463
  entity_keyword = top_n_match.group(2).lower()
464
 
465
+ # 從實體映射中找到對應的欄位
466
+ column_name = next((v for k, v in ENTITY_TO_COLUMN_MAP.items() if k in entity_keyword), None)
 
 
 
 
 
467
 
468
  if column_name:
469
  self._log(f"🔄 檢測到【Top {limit} {entity_keyword} 營收】查詢意圖,啟用模板。", "INFO")
 
477
  LIMIT {limit};
478
  """
479
  fixes_applied.append(f"模板覆寫: Top {limit} {entity_keyword} 營收查詢")
480
+
481
+ elif entity_match and any(kw in q_lower for kw in ['份數', '數量', 'count', '幾份']):
482
+ entity_type = entity_match.group(1).lower()
483
+ entity_name = entity_match.group(2).strip()
484
+ column_name = ENTITY_TO_COLUMN_MAP.get(entity_type)
485
+
486
+ year_match = re.search(r'(\d{4})\s*年?', question)
487
+ year = year_match.group(1) if year_match else '2024'
488
+
489
+ status_condition = ""
490
+ if 'fail' in q_lower or '失敗' in q_lower:
491
+ status_condition = "AND T1.OverallRating = 'Fail'"
492
+ elif 'pass' in q_lower or '通過' in q_lower:
493
+ status_condition = "AND T1.OverallRating = 'Pass'"
494
+
495
+ self._log(f"🔄 檢測到查詢【{entity_type} '{entity_name}' {year} 年的報告數】意圖,啟用模板。", "INFO")
496
+ fixed_sql = f"""
 
 
 
 
 
 
 
 
 
 
497
  SELECT COUNT(T1.JobNo) AS report_count
498
  FROM TSR53SampleDescription AS T1
499
  JOIN JobTimeline AS T2 ON T1.JobNo = T2.JobNo
500
  WHERE {column_name} = '{entity_name}'
501
+ AND strftime('%Y', T2.ReportAuthorization) = '{year}'
502
+ {status_condition};
503
  """
504
+ fixes_applied.append(f"模板覆寫: 查詢 {entity_type}='{entity_name}' ({year}年) 的報告數")
505
 
506
+ elif is_tat_query:
 
 
 
507
  start_col, end_col = 'T2.LabIn', 'T2.ReportAuthorization'
508
  log_msg = "總流程平均時長 (天)"
509
  if 'labin' in q_lower and 'labout' in q_lower:
 
511
  log_msg = "實驗室平均處理時長 (天)"
512
 
513
  self._log(f"🔄 檢測到【{log_msg}】查詢意圖,啟用模板。", "INFO")
 
514
  fixed_sql = f"""
515
  SELECT AVG(JULIANDAY({end_col}) - JULIANDAY({start_col})) AS average_tat_days
516
  FROM JobTimeline AS T2
 
520
 
521
  # 如果沒有任何模板被觸發,則進入常規修正流程
522
  if not fixes_applied:
523
+ self._log("未觸發任何模板,執行常規修正流程...", "DEBUG")
524
+
525
  # ==============================================================================
526
  # 第二層:常規修正流程 (Fallback Corrections)
527
  # ==============================================================================
 
528
 
529
  # 步驟 2.1: SQL 方言修正
530
  dialect_corrections = {
 
541
  # --- 常見幻覺表 ---
542
  'TSR53ReportAuthorization': 'TSR53SampleDescription',
543
  'TSR53TestResult': 'TSR53SampleDescription',
544
+ 'JobInvoice': 'TSR53Invoice',
545
+ 'JobInvoiceAuthorization': 'TSR53Invoice',
546
  'Customer': 'TSR53SampleDescription', 'Customers': 'TSR53SampleDescription',
547
  'Invoice': 'TSR53Invoice', 'Invoices': 'TSR53Invoice',
548
  'Job': 'JobTimeline', 'Jobs': 'JobsInProgress',
549
 
550
  # --- 常見幻覺或錯誤欄位 ---
551
+ 'AuthorizationDate': 'ReportAuthorization',
552
  'ReportAuthorizationDate': 'ReportAuthorization',
553
+ 'LegalAuthorization': 'OverallRating',
554
  'TestResult': 'OverallRating', 'Rating': 'OverallRating',
555
+ 'CustomerName': 'BuyerName',
556
  'InvoiceTo': 'InvoiceToName',
557
  'Applicant': 'ApplicantName',
558
  'Agent': 'AgentName',
559
  'JobNumber': 'JobNo',
560
+ 'ReportNo': 'JobNo',
561
  'CreationDate': 'JobCreation', 'CreateDate': 'JobCreation',
562
  'CompletedDate': 'ReportAuthorization',
563
+ 'Amount': 'LocalAmount',
564
  'Price': 'LocalAmount',
565
  'Lab': 'LabGroup'
566
  }
 
572
 
573
  # 步驟 2.3: 基礎邏輯意圖修正
574
  if any(kw in q_lower for kw in ['幾份', '多少', 'how many', 'count', '數量']) and 'select ' in fixed_sql.lower() and 'count' not in fixed_sql.lower():
 
575
  if 'group by' not in fixed_sql.lower():
576
  fixed_sql = re.sub(r'SELECT\s+.*?FROM', 'SELECT COUNT(*) FROM', fixed_sql, count=1, flags=re.IGNORECASE)
577
  fixes_applied.append("修正邏輯: 補全 COUNT(*)")