Paul720810 commited on
Commit
335982c
·
verified ·
1 Parent(s): b06cd9a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -37
app.py CHANGED
@@ -486,11 +486,10 @@ class TextToSQLSystem:
486
 
487
  def _validate_and_fix_sql(self, question: str, raw_response: str) -> Tuple[Optional[str], str]:
488
  """
489
- (V22 / ID 識別版)
490
  一個全面、多層次的 SQL 驗證與生成引擎。
491
- 極大地增強了實體識別能力。新增了一個獨立的 ID 識別模塊,
492
- 能夠主動從問題中捕捉並分類各種格式化的 ID (如 'C0761N', 'M1044N'),
493
- 並將其用於後續的模板生成中。
494
  """
495
  q_lower = question.lower()
496
 
@@ -498,41 +497,49 @@ class TextToSQLSystem:
498
  # 第一層:高價值意圖識別與模板覆寫 (Intent Recognition & Templating)
499
  # ==============================================================================
500
 
501
- # --- 預先檢測所有可能的意圖和實體 ---
502
- job_no_match = re.search(r"(?:工單|jobno)\s*'\"?([A-Z]{2,3}\d+)'\"?", question, re.IGNORECASE)
503
-
504
- # **新增的、更強大的實體識別模塊**
505
  entity_match_data = None
506
 
507
- # 步驟 1: 優先識別格式化的 ID (例如 M1234N 或 C5678N)
508
- id_match = re.search(r'\b([A-Z]\d{4}[A-Z])\b', question, re.IGNORECASE)
509
- if id_match:
510
- entity_id = id_match.group(1).upper()
511
- column_name = 'sd.ApplicantID' # 默認值
512
- entity_type_log = 'ID'
513
- # 根據用戶問題中的上下文關鍵詞來判斷 ID 類型
514
- if 'buyer' in q_lower or '買家' in q_lower:
515
- column_name, entity_type_log = 'sd.BuyerID', '買家ID'
516
- elif 'applicant' in q_lower or '申請' in q_lower:
517
- column_name, entity_type_log = 'sd.ApplicantID', '申請方ID'
518
- elif 'invoice' in q_lower or '付款' in q_lower:
519
- column_name, entity_type_log = 'sd.InvoiceToID', '付款方ID'
520
- elif 'agent' in q_lower or '代理' in q_lower:
521
- column_name, entity_type_log = 'sd.AgentID', '代理商ID'
522
 
523
- entity_match_data = {"type": entity_type_log, "name": entity_id, "column": column_name}
524
-
525
- # 步驟 2: 如果沒有找到 ID,再識別文本名稱 (舊邏輯)
526
- if not entity_match_data:
527
- ENTITY_TO_COLUMN_MAP = {'申請廠商':'sd.ApplicantName','申請方':'sd.ApplicantName','applicant':'sd.ApplicantName','付款廠商':'sd.InvoiceToName','付款方':'sd.InvoiceToName','invoiceto':'sd.InvoiceToName','代理商':'sd.AgentName','agent':'sd.AgentName','買家':'sd.BuyerName','buyer':'sd.BuyerName','客戶':'sd.BuyerName','品牌':'tsr.BuyerName'}
528
- for keyword, column in ENTITY_TO_COLUMN_MAP.items():
529
- if keyword in q_lower:
530
- match = re.search(fr"{re.escape(keyword)}[\s:;\'\"-]*([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|為|$)", question, re.IGNORECASE)
531
- if match: entity_match_data = {"type": keyword, "name": match.group(1).strip(), "column": column}; break
532
 
533
- # --- 判斷邏輯: 依優先級進入對應的模板 ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
534
 
535
- # 意图 1: 报告列表查询 (高优先级)
536
  if any(kw in q_lower for kw in ['報告號碼', '報告清單', '列出報告', 'report number', 'list of reports']):
537
  year_match = re.search(r'(\d{4})\s*年?', question)
538
  month_match = re.search(r'(\d{1,2})\s*月', question)
@@ -554,8 +561,7 @@ class TextToSQLSystem:
554
  if entity_match_data:
555
  entity_name, column_name = entity_match_data["name"], entity_match_data["column"]
556
  if "JOIN TSR53SampleDescription" not in from_clause: from_clause = "FROM JobTimeline AS jt JOIN TSR53SampleDescription AS sd ON jt.JobNo = sd.JobNo"
557
- # ID 查詢使用精確匹配,名稱查詢使用模糊匹配
558
- match_operator = "=" if entity_match_data["type"].endswith("ID") else "LIKE"
559
  entity_value = f"'{entity_name}'" if match_operator == "=" else f"'%{entity_name}%'"
560
  where_conditions.append(f"{column_name} {match_operator} {entity_value}")
561
  log_parts.append(entity_name)
@@ -584,7 +590,18 @@ class TextToSQLSystem:
584
  fixed_sql = " " + parsed_sql.strip() + " "
585
  fixes_applied_fallback = []
586
 
587
- # ... (后备修正字典和循环)
 
 
 
 
 
 
 
 
 
 
 
588
 
589
  log_msg = "AI 生成並成功修正" if fixes_applied_fallback else "AI 生成且無需修正"
590
  return self._finalize_sql(fixed_sql, log_msg)
 
486
 
487
  def _validate_and_fix_sql(self, question: str, raw_response: str) -> Tuple[Optional[str], str]:
488
  """
489
+ (V23 / 统一实体识别版)
490
  一個全面、多層次的 SQL 驗證與生成引擎。
491
+ 引入了全新的、统一的实体识别引擎,能够准确解析 "买家 Gap", "c0761n",
492
+ "买家ID c0761n" 等多种复杂的实体提问模式。
 
493
  """
494
  q_lower = question.lower()
495
 
 
497
  # 第一層:高價值意圖識別與模板覆寫 (Intent Recognition & Templating)
498
  # ==============================================================================
499
 
500
+ # --- **全新的统一实体识别引擎** ---
 
 
 
501
  entity_match_data = None
502
 
503
+ # 定义多种识别模式,【优先级从高到低】
504
+ entity_patterns = [
505
+ # 模式1: 匹配 "类型 + ID" (e.g., "买家ID C0761N") - 最高优先级
506
+ {'pattern': r"(买家|buyer)\s*(?:id|代號|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.BuyerID', 'type': '買家ID'},
507
+ {'pattern': r"(申请方|申请厂商|applicant)\s*(?:id|代號|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.ApplicantID', 'type': '申請方ID'},
508
+ {'pattern': r"(付款方|付款厂商|invoiceto)\s*(?:id|代號|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.InvoiceToID', 'type': '付款方ID'},
509
+ {'pattern': r"(代理商|agent)\s*(?:id|代號|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.AgentID', 'type': '代理商ID'},
 
 
 
 
 
 
 
 
510
 
511
+ # 模式2: 匹配 "类型 + 名称" (e.g., "买家 Gap")
512
+ {'pattern': r"(买家|buyer|客戶)\s*'\"?([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|$|有)", 'column': 'sd.BuyerName', 'type': '買家'},
513
+ {'pattern': r"(申请方|申请厂商|applicant)\s*'\"?([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|$|有)", 'column': 'sd.ApplicantName', 'type': '申請方'},
514
+ {'pattern': r"(付款方|付款厂商|invoiceto)\s*'\"?([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|$|有)", 'column': 'sd.InvoiceToName', 'type': '付款方'},
515
+ {'pattern': r"(代理商|agent)\s*'\"?([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|$|有)", 'column': 'sd.AgentName', 'type': '代理商'},
516
+
517
+ # 模式3: 单独匹配一个 ID (e.g., "c0761n") - 较低优先级
518
+ {'pattern': r"\b([A-Z]\d{4}[A-Z])\b", 'column': 'sd.ApplicantID', 'type': 'ID'} # 默认为 ApplicantID,可以根据业务调整
519
+ ]
520
 
521
+ for p in entity_patterns:
522
+ match = re.search(p['pattern'], question, re.IGNORECASE)
523
+ if match:
524
+ entity_value = match.group(2) if len(match.groups()) > 1 else match.group(1)
525
+ entity_match_data = {
526
+ "type": p['type'],
527
+ "name": entity_value.strip().upper(),
528
+ "column": p['column']
529
+ }
530
+ break
531
+
532
+ # --- 预先检测其他意图 ---
533
+ job_no_match = re.search(r"(?:工單|jobno)\s*'\"?([A-Z]{2,3}\d+)'\"?", question, re.IGNORECASE)
534
+ lab_group_match_data = None
535
+ LAB_GROUP_MAP = {'A':'TA','B':'TB','C':'TC','D':'TD','E':'TE','Y':'TY','TA':'TA','TB':'TB','TC':'TC','TD':'TD','TE':'TE','TY':'TY','WC':'WC','EO':'EO','GCI':'GCI','GCO':'GCO','MI':'MI'}
536
+ lab_group_match = re.findall(r"([A-Z]+)\s*組", question, re.IGNORECASE)
537
+ if lab_group_match:
538
+ codes = [LAB_GROUP_MAP.get(g.upper()) for g in lab_group_match if LAB_GROUP_MAP.get(g.upper())]
539
+ if codes: lab_group_match_data = {"codes": codes, "identifiers": lab_group_match}
540
+
541
+ # --- 判断逻辑: 依优先级进入对应的模板 ---
542
 
 
543
  if any(kw in q_lower for kw in ['報告號碼', '報告清單', '列出報告', 'report number', 'list of reports']):
544
  year_match = re.search(r'(\d{4})\s*年?', question)
545
  month_match = re.search(r'(\d{1,2})\s*月', question)
 
561
  if entity_match_data:
562
  entity_name, column_name = entity_match_data["name"], entity_match_data["column"]
563
  if "JOIN TSR53SampleDescription" not in from_clause: from_clause = "FROM JobTimeline AS jt JOIN TSR53SampleDescription AS sd ON jt.JobNo = sd.JobNo"
564
+ match_operator = "=" if column_name.endswith("ID") else "LIKE"
 
565
  entity_value = f"'{entity_name}'" if match_operator == "=" else f"'%{entity_name}%'"
566
  where_conditions.append(f"{column_name} {match_operator} {entity_value}")
567
  log_parts.append(entity_name)
 
590
  fixed_sql = " " + parsed_sql.strip() + " "
591
  fixes_applied_fallback = []
592
 
593
+ dialect_corrections = {r'YEAR\s*\(([^)]+)\)': r"strftime('%Y', \1)"}
594
+ for pattern, replacement in dialect_corrections.items():
595
+ if re.search(pattern, fixed_sql, re.IGNORECASE):
596
+ fixed_sql = re.sub(pattern, replacement, fixed_sql, flags=re.IGNORECASE)
597
+ fixes_applied_fallback.append(f"修正方言: {pattern}")
598
+
599
+ schema_corrections = {'TSR53Report':'TSR53SampleDescription', 'TSR53InvoiceReportNo':'JobNo', 'TSR53ReportNo':'JobNo', 'TSR53InvoiceNo':'JobNo', 'TSR53InvoiceCreditNoteNo':'InvoiceCreditNoteNo', 'TSR53InvoiceLocalAmount':'LocalAmount', 'Status':'OverallRating', 'ReportStatus':'OverallRating'}
600
+ for wrong, correct in schema_corrections.items():
601
+ pattern = r'\b' + re.escape(wrong) + r'\b'
602
+ if re.search(pattern, fixed_sql, re.IGNORECASE):
603
+ fixed_sql = re.sub(pattern, correct, fixed_sql, flags=re.IGNORECASE)
604
+ fixes_applied_fallback.append(f"映射 Schema: '{wrong}' -> '{correct}'")
605
 
606
  log_msg = "AI 生成並成功修正" if fixes_applied_fallback else "AI 生成且無需修正"
607
  return self._finalize_sql(fixed_sql, log_msg)