Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -487,7 +487,7 @@ class TextToSQLSystem:
|
|
| 487 |
def _validate_and_fix_sql(self, question: str, raw_response: str) -> Tuple[Optional[str], str]:
|
| 488 |
"""
|
| 489 |
(V23 / 统一实体识别版)
|
| 490 |
-
|
| 491 |
引入了全新的、统一的实体识别引擎,能够准确解析 "买家 Gap", "c0761n",
|
| 492 |
"买家ID c0761n" 等多种复杂的实体提问模式。
|
| 493 |
"""
|
|
@@ -503,19 +503,19 @@ class TextToSQLSystem:
|
|
| 503 |
# 定义多种识别模式,【优先级从高到低】
|
| 504 |
entity_patterns = [
|
| 505 |
# 模式1: 匹配 "类型 + ID" (e.g., "买家ID C0761N") - 最高优先级
|
| 506 |
-
{'pattern': r"(买家|buyer)\s*(?:id|代號|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.BuyerID', 'type': '
|
| 507 |
-
{'pattern': r"(申请方|申请厂商|applicant)\s*(?:id|代號|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.ApplicantID', 'type': '
|
| 508 |
{'pattern': r"(付款方|付款厂商|invoiceto)\s*(?:id|代號|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.InvoiceToID', 'type': '付款方ID'},
|
| 509 |
{'pattern': r"(代理商|agent)\s*(?:id|代號|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.AgentID', 'type': '代理商ID'},
|
| 510 |
|
| 511 |
# 模式2: 匹配 "类型 + 名称" (e.g., "买家 Gap")
|
| 512 |
-
{'pattern': r"(买家|buyer|客戶)\s*'\"?([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|$|有)", 'column': 'sd.BuyerName', 'type': '
|
| 513 |
-
{'pattern': r"(申请方|申请厂商|applicant)\s*'\"?([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|$|有)", 'column': 'sd.ApplicantName', 'type': '
|
| 514 |
{'pattern': r"(付款方|付款厂商|invoiceto)\s*'\"?([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|$|有)", 'column': 'sd.InvoiceToName', 'type': '付款方'},
|
| 515 |
{'pattern': r"(代理商|agent)\s*'\"?([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|$|有)", 'column': 'sd.AgentName', 'type': '代理商'},
|
| 516 |
|
| 517 |
# 模式3: 单独匹配一个 ID (e.g., "c0761n") - 较低优先级
|
| 518 |
-
{'pattern': r"\b([A-Z]\d{4}[A-Z])\b", 'column': 'sd.ApplicantID', 'type': 'ID'}
|
| 519 |
]
|
| 520 |
|
| 521 |
for p in entity_patterns:
|
|
@@ -528,19 +528,12 @@ class TextToSQLSystem:
|
|
| 528 |
"column": p['column']
|
| 529 |
}
|
| 530 |
break
|
| 531 |
-
|
| 532 |
# --- 预先检测其他意图 ---
|
| 533 |
job_no_match = re.search(r"(?:工單|jobno)\s*'\"?([A-Z]{2,3}\d+)'\"?", question, re.IGNORECASE)
|
| 534 |
-
|
| 535 |
-
LAB_GROUP_MAP = {'A':'TA','B':'TB','C':'TC','D':'TD','E':'TE','Y':'TY','TA':'TA','TB':'TB','TC':'TC','TD':'TD','TE':'TE','TY':'TY','WC':'WC','EO':'EO','GCI':'GCI','GCO':'GCO','MI':'MI'}
|
| 536 |
-
lab_group_match = re.findall(r"([A-Z]+)\s*組", question, re.IGNORECASE)
|
| 537 |
-
if lab_group_match:
|
| 538 |
-
codes = [LAB_GROUP_MAP.get(g.upper()) for g in lab_group_match if LAB_GROUP_MAP.get(g.upper())]
|
| 539 |
-
if codes: lab_group_match_data = {"codes": codes, "identifiers": lab_group_match}
|
| 540 |
-
|
| 541 |
# --- 判断逻辑: 依优先级进入对应的模板 ---
|
| 542 |
-
|
| 543 |
-
if any(kw in q_lower for kw in ['報告號碼', '報告清單', '列出報告', 'report number', 'list of reports']):
|
| 544 |
year_match = re.search(r'(\d{4})\s*年?', question)
|
| 545 |
month_match = re.search(r'(\d{1,2})\s*月', question)
|
| 546 |
from_clause = "FROM JobTimeline AS jt"
|
|
@@ -590,12 +583,6 @@ class TextToSQLSystem:
|
|
| 590 |
fixed_sql = " " + parsed_sql.strip() + " "
|
| 591 |
fixes_applied_fallback = []
|
| 592 |
|
| 593 |
-
dialect_corrections = {r'YEAR\s*\(([^)]+)\)': r"strftime('%Y', \1)"}
|
| 594 |
-
for pattern, replacement in dialect_corrections.items():
|
| 595 |
-
if re.search(pattern, fixed_sql, re.IGNORECASE):
|
| 596 |
-
fixed_sql = re.sub(pattern, replacement, fixed_sql, flags=re.IGNORECASE)
|
| 597 |
-
fixes_applied_fallback.append(f"修正方言: {pattern}")
|
| 598 |
-
|
| 599 |
schema_corrections = {'TSR53Report':'TSR53SampleDescription', 'TSR53InvoiceReportNo':'JobNo', 'TSR53ReportNo':'JobNo', 'TSR53InvoiceNo':'JobNo', 'TSR53InvoiceCreditNoteNo':'InvoiceCreditNoteNo', 'TSR53InvoiceLocalAmount':'LocalAmount', 'Status':'OverallRating', 'ReportStatus':'OverallRating'}
|
| 600 |
for wrong, correct in schema_corrections.items():
|
| 601 |
pattern = r'\b' + re.escape(wrong) + r'\b'
|
|
|
|
| 487 |
def _validate_and_fix_sql(self, question: str, raw_response: str) -> Tuple[Optional[str], str]:
|
| 488 |
"""
|
| 489 |
(V23 / 统一实体识别版)
|
| 490 |
+
一个全面、多层次的 SQL 验证与生成引擎。
|
| 491 |
引入了全新的、统一的实体识别引擎,能够准确解析 "买家 Gap", "c0761n",
|
| 492 |
"买家ID c0761n" 等多种复杂的实体提问模式。
|
| 493 |
"""
|
|
|
|
| 503 |
# 定义多种识别模式,【优先级从高到低】
|
| 504 |
entity_patterns = [
|
| 505 |
# 模式1: 匹配 "类型 + ID" (e.g., "买家ID C0761N") - 最高优先级
|
| 506 |
+
{'pattern': r"(买家|buyer)\s*(?:id|代號|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.BuyerID', 'type': '买家ID'},
|
| 507 |
+
{'pattern': r"(申请方|申请厂商|applicant)\s*(?:id|代號|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.ApplicantID', 'type': '申请方ID'},
|
| 508 |
{'pattern': r"(付款方|付款厂商|invoiceto)\s*(?:id|代號|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.InvoiceToID', 'type': '付款方ID'},
|
| 509 |
{'pattern': r"(代理商|agent)\s*(?:id|代號|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.AgentID', 'type': '代理商ID'},
|
| 510 |
|
| 511 |
# 模式2: 匹配 "类型 + 名称" (e.g., "买家 Gap")
|
| 512 |
+
{'pattern': r"(买家|buyer|客戶)\s*'\"?([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|$|有)", 'column': 'sd.BuyerName', 'type': '买家'},
|
| 513 |
+
{'pattern': r"(申请方|申请厂商|applicant)\s*'\"?([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|$|有)", 'column': 'sd.ApplicantName', 'type': '申请方'},
|
| 514 |
{'pattern': r"(付款方|付款厂商|invoiceto)\s*'\"?([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|$|有)", 'column': 'sd.InvoiceToName', 'type': '付款方'},
|
| 515 |
{'pattern': r"(代理商|agent)\s*'\"?([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|$|有)", 'column': 'sd.AgentName', 'type': '代理商'},
|
| 516 |
|
| 517 |
# 模式3: 单独匹配一个 ID (e.g., "c0761n") - 较低优先级
|
| 518 |
+
{'pattern': r"\b([A-Z]\d{4}[A-Z])\b", 'column': 'sd.ApplicantID', 'type': 'ID'}
|
| 519 |
]
|
| 520 |
|
| 521 |
for p in entity_patterns:
|
|
|
|
| 528 |
"column": p['column']
|
| 529 |
}
|
| 530 |
break
|
| 531 |
+
|
| 532 |
# --- 预先检测其他意图 ---
|
| 533 |
job_no_match = re.search(r"(?:工單|jobno)\s*'\"?([A-Z]{2,3}\d+)'\"?", question, re.IGNORECASE)
|
| 534 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 535 |
# --- 判断逻辑: 依优先级进入对应的模板 ---
|
| 536 |
+
if any(kw in q_lower for kw in ['報告號碼', '報告清單', '列出報告']):
|
|
|
|
| 537 |
year_match = re.search(r'(\d{4})\s*年?', question)
|
| 538 |
month_match = re.search(r'(\d{1,2})\s*月', question)
|
| 539 |
from_clause = "FROM JobTimeline AS jt"
|
|
|
|
| 583 |
fixed_sql = " " + parsed_sql.strip() + " "
|
| 584 |
fixes_applied_fallback = []
|
| 585 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 586 |
schema_corrections = {'TSR53Report':'TSR53SampleDescription', 'TSR53InvoiceReportNo':'JobNo', 'TSR53ReportNo':'JobNo', 'TSR53InvoiceNo':'JobNo', 'TSR53InvoiceCreditNoteNo':'InvoiceCreditNoteNo', 'TSR53InvoiceLocalAmount':'LocalAmount', 'Status':'OverallRating', 'ReportStatus':'OverallRating'}
|
| 587 |
for wrong, correct in schema_corrections.items():
|
| 588 |
pattern = r'\b' + re.escape(wrong) + r'\b'
|