Paul720810 commited on
Commit
a90716d
·
verified ·
1 Parent(s): 88cfd09

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -63
app.py CHANGED
@@ -494,25 +494,19 @@ class TextToSQLSystem:
494
 
495
  return formatted.strip()
496
 
497
- # in class TextToSQLSystem:
498
 
499
  def _validate_and_fix_sql(self, question: str, raw_response: str) -> Tuple[Optional[str], str]:
500
  """
501
- (V23 / 实体识别版)
502
- 一個全面、多層次 SQL 驗證與生成引擎。
503
- 引入了全新的、统一的实体识别引擎,能够准确解析 "买家 Gap", "c0761n",
504
- "买家ID c0761n" 等多种复杂的实体提问模式。
505
  """
506
  q_lower = question.lower()
507
-
508
  # ==============================================================================
509
- # 第層:高價值意圖識別與模板覆寫 (Intent Recognition & Templating)
510
  # ==============================================================================
511
-
512
- # --- **全新的统一实体识别引擎** ---
513
  entity_match_data = None
514
-
515
- # 定义多种识别模式,【优先级从高到低】
516
  entity_patterns = [
517
  # 模式1: 匹配 "类型 + ID" (e.g., "买家ID C0761N") - 最高优先级
518
  {'pattern': r"(買家|buyer)\s*(?:id|代號|代碼)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.BuyerID', 'type': '買家ID'},
@@ -533,67 +527,103 @@ class TextToSQLSystem:
533
  for p in entity_patterns:
534
  match = re.search(p['pattern'], question, re.IGNORECASE)
535
  if match:
 
536
  entity_value = match.group(2) if len(match.groups()) > 1 else match.group(1)
537
  entity_match_data = {
538
  "type": p['type'],
539
- "name": entity_value.strip().upper(),
540
  "column": p['column']
541
  }
 
542
  break
543
-
544
- # --- 预先检测其他意图 ---
545
- job_no_match = re.search(r"(?:工單|jobno)\s*'\"?([A-Z]{2,3}\d+)'\"?", question, re.IGNORECASE)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
546
 
547
- # --- 判断逻辑: 依优先级进入对应的模板 ---
548
- if any(kw in q_lower for kw in ['報告號碼', '報告清單', '列出報告', 'report number', 'list of reports']):
549
- year_match = re.search(r'(\d{4})\s*年?', question)
550
- month_match = re.search(r'(\d{1,2})\s*月', question)
551
- from_clause = "FROM JobTimeline AS jt"
552
- select_clause = "SELECT jt.JobNo, jt.ReportAuthorization"
553
- where_conditions = ["jt.ReportAuthorization IS NOT NULL"]
554
- log_parts = []
555
-
556
- if year_match: year = year_match.group(1); where_conditions.append(f"strftime('%Y', jt.ReportAuthorization) = '{year}'"); log_parts.append(f"{year}年")
557
- if month_match: month = month_match.group(1).zfill(2); where_conditions.append(f"strftime('%m', jt.ReportAuthorization) = '{month}'"); log_parts.append(f"{month}月")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
558
 
559
- if 'fail' in q_lower or '失敗' in q_lower:
560
- if "JOIN TSR53SampleDescription" not in from_clause: from_clause = "FROM JobTimeline AS jt JOIN TSR53SampleDescription AS sd ON jt.JobNo = sd.JobNo"
561
- where_conditions.append("sd.OverallRating = 'Fail'"); log_parts.append("Fail")
562
- elif 'pass' in q_lower or '通過' in q_lower:
563
- if "JOIN TSR53SampleDescription" not in from_clause: from_clause = "FROM JobTimeline AS jt JOIN TSR53SampleDescription AS sd ON jt.JobNo = sd.JobNo"
564
- where_conditions.append("sd.OverallRating = 'Pass'"); log_parts.append("Pass")
565
 
566
- if entity_match_data:
567
- entity_name, column_name = entity_match_data["name"], entity_match_data["column"]
568
- if "JOIN TSR53SampleDescription" not in from_clause: from_clause = "FROM JobTimeline AS jt JOIN TSR53SampleDescription AS sd ON jt.JobNo = sd.JobNo"
569
- match_operator = "=" if column_name.endswith("ID") else "LIKE"
570
- entity_value = f"'{entity_name}'" if match_operator == "=" else f"'%{entity_name}%'"
571
- where_conditions.append(f"{column_name} {match_operator} {entity_value}")
572
- log_parts.append(entity_name)
573
- select_clause = "SELECT jt.JobNo, sd.BuyerName, jt.ReportAuthorization"
574
-
575
- final_where_clause = "WHERE " + " AND ".join(where_conditions)
576
- time_log = " ".join(log_parts) if log_parts else "全部"
577
- self._log(f"🔄 檢測到查詢【{time_log} 報告列表】意圖,啟用智能模板。", "INFO")
578
- template_sql = f"{select_clause} {from_clause} {final_where_clause} ORDER BY jt.ReportAuthorization DESC;"
579
- return self._finalize_sql(template_sql, f"模板覆寫: {time_log} 報告列表查詢")
580
-
581
- # ... (此处可以继续添加 V17 版本中的其他所有 if/elif 模板)
582
- elif '報告' in q_lower and any(kw in q_lower for kw in ['幾份', '多少', '數量', '總數']) and not entity_match_data:
583
- year_match = re.search(r'(\d{4})\s*年?', question)
584
- time_condition, time_log = "", "總"
585
- if year_match:
586
- year = year_match.group(1)
587
- time_condition = f"WHERE ReportAuthorization IS NOT NULL AND strftime('%Y', ReportAuthorization) = '{year}'"
588
- time_log = f"{year}年"
589
- else:
590
- time_condition = "WHERE ReportAuthorization IS NOT NULL"
591
- self._log(f"🔄 檢測到查詢【{time_log}全局報告總數】意圖,啟用模板。", "INFO")
592
- template_sql = f"SELECT COUNT(DISTINCT JobNo) AS report_count FROM JobTimeline {time_condition};"
593
- return self._finalize_sql(template_sql, f"模板覆寫: {time_log}全局報告總數查詢")
594
-
 
 
595
  # ==============================================================================
596
- # 第二层:常规修正流程 (Fallback Corrections)
597
  # ==============================================================================
598
  self._log("未觸發任何模板,嘗試解析並修正 AI 輸出...", "INFO")
599
 
@@ -601,7 +631,7 @@ class TextToSQLSystem:
601
  if not parsed_sql:
602
  self._log(f"❌ 未能從模型回應中解析出任何 SQL。原始回應: {raw_response}", "ERROR")
603
  return None, f"無法解析SQL。原始回應:\n{raw_response}"
604
-
605
  self._log(f"📊 解析出的原始 SQL: {parsed_sql}", "DEBUG")
606
 
607
  fixed_sql = " " + parsed_sql.strip() + " "
 
494
 
495
  return formatted.strip()
496
 
497
+ # class TextToSQLSystem
498
 
499
  def _validate_and_fix_sql(self, question: str, raw_response: str) -> Tuple[Optional[str], str]:
500
  """
501
+ (V26 / 實體識別 + 模組化意圖引擎版)
502
+ 在模組化意圖偵測前,先運行一個帶優先級統一實體識別引擎。
 
 
503
  """
504
  q_lower = question.lower()
505
+
506
  # ==============================================================================
507
+ # 第層:統一實體識別引擎 (Unified Entity Recognition Engine)
508
  # ==============================================================================
 
 
509
  entity_match_data = None
 
 
510
  entity_patterns = [
511
  # 模式1: 匹配 "类型 + ID" (e.g., "买家ID C0761N") - 最高优先级
512
  {'pattern': r"(買家|buyer)\s*(?:id|代號|代碼)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.BuyerID', 'type': '買家ID'},
 
527
  for p in entity_patterns:
528
  match = re.search(p['pattern'], question, re.IGNORECASE)
529
  if match:
530
+ # 根據正則表達式捕獲組的數量來決定取哪個組
531
  entity_value = match.group(2) if len(match.groups()) > 1 else match.group(1)
532
  entity_match_data = {
533
  "type": p['type'],
534
+ "name": entity_value.strip().upper(), # 標準化:移除前後空格並轉大寫
535
  "column": p['column']
536
  }
537
+ # 找到第一個匹配項(最高優先級)後就立刻停止
538
  break
539
+
540
+ # ==============================================================================
541
+ # 第一層:模組化意圖偵測 (Modular Intent Detection)
542
+ # ==============================================================================
543
+
544
+ # --- 1. 初始化 SQL 組件 ---
545
+ intents = {}
546
+ sql_components = {
547
+ 'select': [],
548
+ 'from': "FROM JobTimeline AS jt",
549
+ 'joins': [],
550
+ 'where': ["jt.ReportAuthorization IS NOT NULL"],
551
+ 'group_by': [],
552
+ 'order_by': [],
553
+ 'log_parts': []
554
+ }
555
+
556
+ # --- 2. 運行一系列獨立的意圖偵測器 ---
557
 
558
+ # 偵測器 2.1: 核心動作意圖 (計數 vs. 列表)
559
+ if any(kw in q_lower for kw in ['幾份', '多少', '數量', '總數', 'how many', 'count']):
560
+ intents['action'] = 'count'
561
+ sql_components['select'].append("COUNT(DISTINCT jt.JobNo) AS report_count")
562
+ sql_components['log_parts'].append("報告總數")
563
+ elif any(kw in q_lower for kw in ['報告號碼', '報告清單', '列出報告', 'report number', 'list of reports']):
564
+ intents['action'] = 'list'
565
+ sql_components['select'].append("jt.JobNo, jt.ReportAuthorization")
566
+ sql_components['order_by'].append("jt.ReportAuthorization DESC")
567
+ sql_components['log_parts'].append("報告列表")
568
+
569
+ # 偵測器 2.2: 時間意圖
570
+ year_match = re.search(r'(\d{4})\s*年?', question)
571
+ month_match = re.search(r'(\d{1,2})\s*月', question)
572
+ if year_match:
573
+ year = year_match.group(1)
574
+ sql_components['where'].append(f"strftime('%Y', jt.ReportAuthorization) = '{year}'")
575
+ sql_components['log_parts'].append(f"{year}年")
576
+ if month_match:
577
+ month = month_match.group(1).zfill(2)
578
+ sql_components['where'].append(f"strftime('%m', jt.ReportAuthorization) = '{month}'")
579
+ sql_components['log_parts'].append(f"{month}月")
580
+
581
+ # 偵測器 2.3: 實體意圖 (現在使用預處理的結果)
582
+ if entity_match_data:
583
+ # 確保 JOIN 只添加一次
584
+ if "TSR53SampleDescription" not in " ".join(sql_components['joins']):
585
+ sql_components['joins'].append("JOIN TSR53SampleDescription AS sd ON jt.JobNo = sd.JobNo")
586
 
587
+ entity_name, column_name = entity_match_data["name"], entity_match_data["column"]
588
+ match_operator = "=" if column_name.endswith("ID") else "LIKE"
589
+ # 對於 LIKE 查詢,在值兩邊加上 %
590
+ entity_value = f"'%{entity_name}%'" if match_operator == "LIKE" else f"'{entity_name}'"
591
+ sql_components['where'].append(f"{column_name} {match_operator} {entity_value}")
592
+ sql_components['log_parts'].append(entity_match_data["type"] + ":" + entity_name)
593
 
594
+ # 如果是列表查詢,我們可以顯示更多實體相關的資訊
595
+ if intents.get('action') == 'list':
596
+ sql_components['select'].append("sd.BuyerName")
597
+
598
+ # 偵測器 2.4: 評級意圖 (Pass/Fail)
599
+ if 'fail' in q_lower or '失敗' in q_lower:
600
+ if "TSR53SampleDescription" not in " ".join(sql_components['joins']):
601
+ sql_components['joins'].append("JOIN TSR53SampleDescription AS sd ON jt.JobNo = sd.JobNo")
602
+ sql_components['where'].append("sd.OverallRating = 'Fail'")
603
+ sql_components['log_parts'].append("Fail")
604
+ elif 'pass' in q_lower or '通過' in q_lower:
605
+ if "TSR53SampleDescription" not in " ".join(sql_components['joins']):
606
+ sql_components['joins'].append("JOIN TSR53SampleDescription AS sd ON jt.JobNo = sd.JobNo")
607
+ sql_components['where'].append("sd.OverallRating = 'Pass'")
608
+ sql_components['log_parts'].append("Pass")
609
+
610
+ # --- 3. ���斷是否觸發了模板,並組合 SQL ---
611
+
612
+ if 'action' in intents:
613
+ final_select = "SELECT " + ", ".join(sorted(list(set(sql_components['select']))))
614
+ from_clause = sql_components['from']
615
+ joins_clause = " ".join(sql_components['joins'])
616
+ where_clause = "WHERE " + " AND ".join(sql_components['where'])
617
+ orderby_clause = "ORDER BY " + ", ".join(sql_components['order_by']) if sql_components['order_by'] else ""
618
+
619
+ template_sql = f"{final_select} {from_clause} {joins_clause} {where_clause} {orderby_clause};"
620
+
621
+ query_log = " ".join(sql_components['log_parts'])
622
+ self._log(f"🔄 偵測到組合意圖【{query_log}】,啟用動態模板。", "INFO")
623
+ return self._finalize_sql(template_sql, f"模板覆寫: {query_log} 查詢")
624
+
625
  # ==============================================================================
626
+ # 第二层:AI 生成修正流程 (Fallback)
627
  # ==============================================================================
628
  self._log("未觸發任何模板,嘗試解析並修正 AI 輸出...", "INFO")
629
 
 
631
  if not parsed_sql:
632
  self._log(f"❌ 未能從模型回應中解析出任何 SQL。原始回應: {raw_response}", "ERROR")
633
  return None, f"無法解析SQL。原始回應:\n{raw_response}"
634
+
635
  self._log(f"📊 解析出的原始 SQL: {parsed_sql}", "DEBUG")
636
 
637
  fixed_sql = " " + parsed_sql.strip() + " "