Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -486,11 +486,10 @@ class TextToSQLSystem:
|
|
| 486 |
|
| 487 |
def _validate_and_fix_sql(self, question: str, raw_response: str) -> Tuple[Optional[str], str]:
|
| 488 |
"""
|
| 489 |
-
(
|
| 490 |
一個全面、多層次的 SQL 驗證與生成引擎。
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
並將其用於後續的模板生成中。
|
| 494 |
"""
|
| 495 |
q_lower = question.lower()
|
| 496 |
|
|
@@ -498,41 +497,49 @@ class TextToSQLSystem:
|
|
| 498 |
# 第一層:高價值意圖識別與模板覆寫 (Intent Recognition & Templating)
|
| 499 |
# ==============================================================================
|
| 500 |
|
| 501 |
-
# ---
|
| 502 |
-
job_no_match = re.search(r"(?:工單|jobno)\s*'\"?([A-Z]{2,3}\d+)'\"?", question, re.IGNORECASE)
|
| 503 |
-
|
| 504 |
-
# **新增的、更強大的實體識別模塊**
|
| 505 |
entity_match_data = None
|
| 506 |
|
| 507 |
-
#
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
if 'buyer' in q_lower or '買家' in q_lower:
|
| 515 |
-
column_name, entity_type_log = 'sd.BuyerID', '買家ID'
|
| 516 |
-
elif 'applicant' in q_lower or '申請' in q_lower:
|
| 517 |
-
column_name, entity_type_log = 'sd.ApplicantID', '申請方ID'
|
| 518 |
-
elif 'invoice' in q_lower or '付款' in q_lower:
|
| 519 |
-
column_name, entity_type_log = 'sd.InvoiceToID', '付款方ID'
|
| 520 |
-
elif 'agent' in q_lower or '代理' in q_lower:
|
| 521 |
-
column_name, entity_type_log = 'sd.AgentID', '代理商ID'
|
| 522 |
|
| 523 |
-
|
| 524 |
-
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
|
| 530 |
-
|
| 531 |
-
|
| 532 |
|
| 533 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 534 |
|
| 535 |
-
# 意图 1: 报告列表查询 (高优先级)
|
| 536 |
if any(kw in q_lower for kw in ['報告號碼', '報告清單', '列出報告', 'report number', 'list of reports']):
|
| 537 |
year_match = re.search(r'(\d{4})\s*年?', question)
|
| 538 |
month_match = re.search(r'(\d{1,2})\s*月', question)
|
|
@@ -554,8 +561,7 @@ class TextToSQLSystem:
|
|
| 554 |
if entity_match_data:
|
| 555 |
entity_name, column_name = entity_match_data["name"], entity_match_data["column"]
|
| 556 |
if "JOIN TSR53SampleDescription" not in from_clause: from_clause = "FROM JobTimeline AS jt JOIN TSR53SampleDescription AS sd ON jt.JobNo = sd.JobNo"
|
| 557 |
-
|
| 558 |
-
match_operator = "=" if entity_match_data["type"].endswith("ID") else "LIKE"
|
| 559 |
entity_value = f"'{entity_name}'" if match_operator == "=" else f"'%{entity_name}%'"
|
| 560 |
where_conditions.append(f"{column_name} {match_operator} {entity_value}")
|
| 561 |
log_parts.append(entity_name)
|
|
@@ -584,7 +590,18 @@ class TextToSQLSystem:
|
|
| 584 |
fixed_sql = " " + parsed_sql.strip() + " "
|
| 585 |
fixes_applied_fallback = []
|
| 586 |
|
| 587 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 588 |
|
| 589 |
log_msg = "AI 生成並成功修正" if fixes_applied_fallback else "AI 生成且無需修正"
|
| 590 |
return self._finalize_sql(fixed_sql, log_msg)
|
|
|
|
| 486 |
|
| 487 |
def _validate_and_fix_sql(self, question: str, raw_response: str) -> Tuple[Optional[str], str]:
|
| 488 |
"""
|
| 489 |
+
(V23 / 统一实体识别版)
|
| 490 |
一個全面、多層次的 SQL 驗證與生成引擎。
|
| 491 |
+
引入了全新的、统一的实体识别引擎,能够准确解析 "买家 Gap", "c0761n",
|
| 492 |
+
"买家ID c0761n" 等多种复杂的实体提问模式。
|
|
|
|
| 493 |
"""
|
| 494 |
q_lower = question.lower()
|
| 495 |
|
|
|
|
| 497 |
# 第一層:高價值意圖識別與模板覆寫 (Intent Recognition & Templating)
|
| 498 |
# ==============================================================================
|
| 499 |
|
| 500 |
+
# --- **全新的统一实体识别引擎** ---
|
|
|
|
|
|
|
|
|
|
| 501 |
entity_match_data = None
|
| 502 |
|
| 503 |
+
# 定义多种识别模式,【优先级从高到低】
|
| 504 |
+
entity_patterns = [
|
| 505 |
+
# 模式1: 匹配 "类型 + ID" (e.g., "买家ID C0761N") - 最高优先级
|
| 506 |
+
{'pattern': r"(买家|buyer)\s*(?:id|代號|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.BuyerID', 'type': '買家ID'},
|
| 507 |
+
{'pattern': r"(申请方|申请厂商|applicant)\s*(?:id|代號|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.ApplicantID', 'type': '申請方ID'},
|
| 508 |
+
{'pattern': r"(付款方|付款厂商|invoiceto)\s*(?:id|代號|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.InvoiceToID', 'type': '付款方ID'},
|
| 509 |
+
{'pattern': r"(代理商|agent)\s*(?:id|代號|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.AgentID', 'type': '代理商ID'},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 510 |
|
| 511 |
+
# 模式2: 匹配 "类型 + 名称" (e.g., "买家 Gap")
|
| 512 |
+
{'pattern': r"(买家|buyer|客戶)\s*'\"?([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|$|有)", 'column': 'sd.BuyerName', 'type': '買家'},
|
| 513 |
+
{'pattern': r"(申请方|申请厂商|applicant)\s*'\"?([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|$|有)", 'column': 'sd.ApplicantName', 'type': '申請方'},
|
| 514 |
+
{'pattern': r"(付款方|付款厂商|invoiceto)\s*'\"?([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|$|有)", 'column': 'sd.InvoiceToName', 'type': '付款方'},
|
| 515 |
+
{'pattern': r"(代理商|agent)\s*'\"?([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|$|有)", 'column': 'sd.AgentName', 'type': '代理商'},
|
| 516 |
+
|
| 517 |
+
# 模式3: 单独匹配一个 ID (e.g., "c0761n") - 较低优先级
|
| 518 |
+
{'pattern': r"\b([A-Z]\d{4}[A-Z])\b", 'column': 'sd.ApplicantID', 'type': 'ID'} # 默认为 ApplicantID,可以根据业务调整
|
| 519 |
+
]
|
| 520 |
|
| 521 |
+
for p in entity_patterns:
|
| 522 |
+
match = re.search(p['pattern'], question, re.IGNORECASE)
|
| 523 |
+
if match:
|
| 524 |
+
entity_value = match.group(2) if len(match.groups()) > 1 else match.group(1)
|
| 525 |
+
entity_match_data = {
|
| 526 |
+
"type": p['type'],
|
| 527 |
+
"name": entity_value.strip().upper(),
|
| 528 |
+
"column": p['column']
|
| 529 |
+
}
|
| 530 |
+
break
|
| 531 |
+
|
| 532 |
+
# --- 预先检测其他意图 ---
|
| 533 |
+
job_no_match = re.search(r"(?:工單|jobno)\s*'\"?([A-Z]{2,3}\d+)'\"?", question, re.IGNORECASE)
|
| 534 |
+
lab_group_match_data = None
|
| 535 |
+
LAB_GROUP_MAP = {'A':'TA','B':'TB','C':'TC','D':'TD','E':'TE','Y':'TY','TA':'TA','TB':'TB','TC':'TC','TD':'TD','TE':'TE','TY':'TY','WC':'WC','EO':'EO','GCI':'GCI','GCO':'GCO','MI':'MI'}
|
| 536 |
+
lab_group_match = re.findall(r"([A-Z]+)\s*組", question, re.IGNORECASE)
|
| 537 |
+
if lab_group_match:
|
| 538 |
+
codes = [LAB_GROUP_MAP.get(g.upper()) for g in lab_group_match if LAB_GROUP_MAP.get(g.upper())]
|
| 539 |
+
if codes: lab_group_match_data = {"codes": codes, "identifiers": lab_group_match}
|
| 540 |
+
|
| 541 |
+
# --- 判断逻辑: 依优先级进入对应的模板 ---
|
| 542 |
|
|
|
|
| 543 |
if any(kw in q_lower for kw in ['報告號碼', '報告清單', '列出報告', 'report number', 'list of reports']):
|
| 544 |
year_match = re.search(r'(\d{4})\s*年?', question)
|
| 545 |
month_match = re.search(r'(\d{1,2})\s*月', question)
|
|
|
|
| 561 |
if entity_match_data:
|
| 562 |
entity_name, column_name = entity_match_data["name"], entity_match_data["column"]
|
| 563 |
if "JOIN TSR53SampleDescription" not in from_clause: from_clause = "FROM JobTimeline AS jt JOIN TSR53SampleDescription AS sd ON jt.JobNo = sd.JobNo"
|
| 564 |
+
match_operator = "=" if column_name.endswith("ID") else "LIKE"
|
|
|
|
| 565 |
entity_value = f"'{entity_name}'" if match_operator == "=" else f"'%{entity_name}%'"
|
| 566 |
where_conditions.append(f"{column_name} {match_operator} {entity_value}")
|
| 567 |
log_parts.append(entity_name)
|
|
|
|
| 590 |
fixed_sql = " " + parsed_sql.strip() + " "
|
| 591 |
fixes_applied_fallback = []
|
| 592 |
|
| 593 |
+
dialect_corrections = {r'YEAR\s*\(([^)]+)\)': r"strftime('%Y', \1)"}
|
| 594 |
+
for pattern, replacement in dialect_corrections.items():
|
| 595 |
+
if re.search(pattern, fixed_sql, re.IGNORECASE):
|
| 596 |
+
fixed_sql = re.sub(pattern, replacement, fixed_sql, flags=re.IGNORECASE)
|
| 597 |
+
fixes_applied_fallback.append(f"修正方言: {pattern}")
|
| 598 |
+
|
| 599 |
+
schema_corrections = {'TSR53Report':'TSR53SampleDescription', 'TSR53InvoiceReportNo':'JobNo', 'TSR53ReportNo':'JobNo', 'TSR53InvoiceNo':'JobNo', 'TSR53InvoiceCreditNoteNo':'InvoiceCreditNoteNo', 'TSR53InvoiceLocalAmount':'LocalAmount', 'Status':'OverallRating', 'ReportStatus':'OverallRating'}
|
| 600 |
+
for wrong, correct in schema_corrections.items():
|
| 601 |
+
pattern = r'\b' + re.escape(wrong) + r'\b'
|
| 602 |
+
if re.search(pattern, fixed_sql, re.IGNORECASE):
|
| 603 |
+
fixed_sql = re.sub(pattern, correct, fixed_sql, flags=re.IGNORECASE)
|
| 604 |
+
fixes_applied_fallback.append(f"映射 Schema: '{wrong}' -> '{correct}'")
|
| 605 |
|
| 606 |
log_msg = "AI 生成並成功修正" if fixes_applied_fallback else "AI 生成且無需修正"
|
| 607 |
return self._finalize_sql(fixed_sql, log_msg)
|