Paul720810 commited on
Commit
eb6b709
·
verified ·
1 Parent(s): 335982c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -22
app.py CHANGED
@@ -487,7 +487,7 @@ class TextToSQLSystem:
487
  def _validate_and_fix_sql(self, question: str, raw_response: str) -> Tuple[Optional[str], str]:
488
  """
489
  (V23 / 统一实体识别版)
490
- 一個全面、多層次的 SQL 驗證與生成引擎。
491
  引入了全新的、统一的实体识别引擎,能够准确解析 "买家 Gap", "c0761n",
492
  "买家ID c0761n" 等多种复杂的实体提问模式。
493
  """
@@ -503,19 +503,19 @@ class TextToSQLSystem:
503
  # 定义多种识别模式,【优先级从高到低】
504
  entity_patterns = [
505
  # 模式1: 匹配 "类型 + ID" (e.g., "买家ID C0761N") - 最高优先级
506
- {'pattern': r"(买家|buyer)\s*(?:id|代號|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.BuyerID', 'type': '買家ID'},
507
- {'pattern': r"(申请方|申请厂商|applicant)\s*(?:id|代號|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.ApplicantID', 'type': '申請方ID'},
508
  {'pattern': r"(付款方|付款厂商|invoiceto)\s*(?:id|代號|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.InvoiceToID', 'type': '付款方ID'},
509
  {'pattern': r"(代理商|agent)\s*(?:id|代號|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.AgentID', 'type': '代理商ID'},
510
 
511
  # 模式2: 匹配 "类型 + 名称" (e.g., "买家 Gap")
512
- {'pattern': r"(买家|buyer|客戶)\s*'\"?([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|$|有)", 'column': 'sd.BuyerName', 'type': '買家'},
513
- {'pattern': r"(申请方|申请厂商|applicant)\s*'\"?([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|$|有)", 'column': 'sd.ApplicantName', 'type': '申請方'},
514
  {'pattern': r"(付款方|付款厂商|invoiceto)\s*'\"?([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|$|有)", 'column': 'sd.InvoiceToName', 'type': '付款方'},
515
  {'pattern': r"(代理商|agent)\s*'\"?([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|$|有)", 'column': 'sd.AgentName', 'type': '代理商'},
516
 
517
  # 模式3: 单独匹配一个 ID (e.g., "c0761n") - 较低优先级
518
- {'pattern': r"\b([A-Z]\d{4}[A-Z])\b", 'column': 'sd.ApplicantID', 'type': 'ID'} # 默认为 ApplicantID,可以根据业务调整
519
  ]
520
 
521
  for p in entity_patterns:
@@ -528,19 +528,12 @@ class TextToSQLSystem:
528
  "column": p['column']
529
  }
530
  break
531
-
532
  # --- 预先检测其他意图 ---
533
  job_no_match = re.search(r"(?:工單|jobno)\s*'\"?([A-Z]{2,3}\d+)'\"?", question, re.IGNORECASE)
534
- lab_group_match_data = None
535
- LAB_GROUP_MAP = {'A':'TA','B':'TB','C':'TC','D':'TD','E':'TE','Y':'TY','TA':'TA','TB':'TB','TC':'TC','TD':'TD','TE':'TE','TY':'TY','WC':'WC','EO':'EO','GCI':'GCI','GCO':'GCO','MI':'MI'}
536
- lab_group_match = re.findall(r"([A-Z]+)\s*組", question, re.IGNORECASE)
537
- if lab_group_match:
538
- codes = [LAB_GROUP_MAP.get(g.upper()) for g in lab_group_match if LAB_GROUP_MAP.get(g.upper())]
539
- if codes: lab_group_match_data = {"codes": codes, "identifiers": lab_group_match}
540
-
541
  # --- 判断逻辑: 依优先级进入对应的模板 ---
542
-
543
- if any(kw in q_lower for kw in ['報告號碼', '報告清單', '列出報告', 'report number', 'list of reports']):
544
  year_match = re.search(r'(\d{4})\s*年?', question)
545
  month_match = re.search(r'(\d{1,2})\s*月', question)
546
  from_clause = "FROM JobTimeline AS jt"
@@ -590,12 +583,6 @@ class TextToSQLSystem:
590
  fixed_sql = " " + parsed_sql.strip() + " "
591
  fixes_applied_fallback = []
592
 
593
- dialect_corrections = {r'YEAR\s*\(([^)]+)\)': r"strftime('%Y', \1)"}
594
- for pattern, replacement in dialect_corrections.items():
595
- if re.search(pattern, fixed_sql, re.IGNORECASE):
596
- fixed_sql = re.sub(pattern, replacement, fixed_sql, flags=re.IGNORECASE)
597
- fixes_applied_fallback.append(f"修正方言: {pattern}")
598
-
599
  schema_corrections = {'TSR53Report':'TSR53SampleDescription', 'TSR53InvoiceReportNo':'JobNo', 'TSR53ReportNo':'JobNo', 'TSR53InvoiceNo':'JobNo', 'TSR53InvoiceCreditNoteNo':'InvoiceCreditNoteNo', 'TSR53InvoiceLocalAmount':'LocalAmount', 'Status':'OverallRating', 'ReportStatus':'OverallRating'}
600
  for wrong, correct in schema_corrections.items():
601
  pattern = r'\b' + re.escape(wrong) + r'\b'
 
487
  def _validate_and_fix_sql(self, question: str, raw_response: str) -> Tuple[Optional[str], str]:
488
  """
489
  (V23 / 统一实体识别版)
490
+ 一个全面、多层次的 SQL 验证与生成引擎。
491
  引入了全新的、统一的实体识别引擎,能够准确解析 "买家 Gap", "c0761n",
492
  "买家ID c0761n" 等多种复杂的实体提问模式。
493
  """
 
503
  # 定义多种识别模式,【优先级从高到低】
504
  entity_patterns = [
505
  # 模式1: 匹配 "类型 + ID" (e.g., "买家ID C0761N") - 最高优先级
506
+ {'pattern': r"(买家|buyer)\s*(?:id|代號|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.BuyerID', 'type': '买家ID'},
507
+ {'pattern': r"(申请方|申请厂商|applicant)\s*(?:id|代號|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.ApplicantID', 'type': '申请方ID'},
508
  {'pattern': r"(付款方|付款厂商|invoiceto)\s*(?:id|代號|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.InvoiceToID', 'type': '付款方ID'},
509
  {'pattern': r"(代理商|agent)\s*(?:id|代號|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.AgentID', 'type': '代理商ID'},
510
 
511
  # 模式2: 匹配 "类型 + 名称" (e.g., "买家 Gap")
512
+ {'pattern': r"(买家|buyer|客戶)\s*'\"?([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|$|有)", 'column': 'sd.BuyerName', 'type': '买家'},
513
+ {'pattern': r"(申请方|申请厂商|applicant)\s*'\"?([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|$|有)", 'column': 'sd.ApplicantName', 'type': '申请方'},
514
  {'pattern': r"(付款方|付款厂商|invoiceto)\s*'\"?([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|$|有)", 'column': 'sd.InvoiceToName', 'type': '付款方'},
515
  {'pattern': r"(代理商|agent)\s*'\"?([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|$|有)", 'column': 'sd.AgentName', 'type': '代理商'},
516
 
517
  # 模式3: 单独匹配一个 ID (e.g., "c0761n") - 较低优先级
518
+ {'pattern': r"\b([A-Z]\d{4}[A-Z])\b", 'column': 'sd.ApplicantID', 'type': 'ID'}
519
  ]
520
 
521
  for p in entity_patterns:
 
528
  "column": p['column']
529
  }
530
  break
531
+
532
  # --- 预先检测其他意图 ---
533
  job_no_match = re.search(r"(?:工單|jobno)\s*'\"?([A-Z]{2,3}\d+)'\"?", question, re.IGNORECASE)
534
+
 
 
 
 
 
 
535
  # --- 判断逻辑: 依优先级进入对应的模板 ---
536
+ if any(kw in q_lower for kw in ['報告號碼', '報告清單', '列出報告']):
 
537
  year_match = re.search(r'(\d{4})\s*年?', question)
538
  month_match = re.search(r'(\d{1,2})\s*月', question)
539
  from_clause = "FROM JobTimeline AS jt"
 
583
  fixed_sql = " " + parsed_sql.strip() + " "
584
  fixes_applied_fallback = []
585
 
 
 
 
 
 
 
586
  schema_corrections = {'TSR53Report':'TSR53SampleDescription', 'TSR53InvoiceReportNo':'JobNo', 'TSR53ReportNo':'JobNo', 'TSR53InvoiceNo':'JobNo', 'TSR53InvoiceCreditNoteNo':'InvoiceCreditNoteNo', 'TSR53InvoiceLocalAmount':'LocalAmount', 'Status':'OverallRating', 'ReportStatus':'OverallRating'}
587
  for wrong, correct in schema_corrections.items():
588
  pattern = r'\b' + re.escape(wrong) + r'\b'