Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -494,12 +494,13 @@ class TextToSQLSystem:
|
|
| 494 |
|
| 495 |
return formatted.strip()
|
| 496 |
|
| 497 |
-
# 在 class TextToSQLSystem 內
|
| 498 |
-
|
| 499 |
def _validate_and_fix_sql(self, question: str, raw_response: str) -> Tuple[Optional[str], str]:
|
| 500 |
"""
|
| 501 |
-
(
|
| 502 |
-
|
|
|
|
| 503 |
"""
|
| 504 |
q_lower = question.lower()
|
| 505 |
|
|
@@ -507,61 +508,51 @@ class TextToSQLSystem:
|
|
| 507 |
# 第零層:統一實體識別引擎 (Unified Entity Recognition Engine)
|
| 508 |
# ==============================================================================
|
| 509 |
entity_match_data = None
|
|
|
|
| 510 |
entity_patterns = [
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
|
| 525 |
-
{'pattern': r"(申請方|申请方|申請廠商|申请厂商|applicant)\s*'\"?([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|$|有)", 'column': 'sd.ApplicantName', 'type': '申請方'},
|
| 526 |
-
# 增加了繁體的 "付款方", "付款廠商"
|
| 527 |
-
{'pattern': r"(付款方|付款厂商|invoiceto)\s*'\"?([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|$|有)", 'column': 'sd.InvoiceToName', 'type': '付款方'},
|
| 528 |
-
# 增加了繁體的 "代理商"
|
| 529 |
-
{'pattern': r"(代理商|agent)\s*'\"?([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|$|有)", 'column': 'sd.AgentName', 'type': '代理商'},
|
| 530 |
-
|
| 531 |
-
# 模式3: 单独匹配一个 ID (e.g., "c0761n") - 较低优先级
|
| 532 |
-
{'pattern': r"\b([A-Z]\d{4}[A-Z])\b", 'column': 'sd.ApplicantID', 'type': 'ID'}
|
| 533 |
]
|
| 534 |
|
| 535 |
for p in entity_patterns:
|
| 536 |
match = re.search(p['pattern'], question, re.IGNORECASE)
|
| 537 |
if match:
|
| 538 |
-
# 根據正則表達式捕獲組的數量來決定取哪個��
|
| 539 |
entity_value = match.group(2) if len(match.groups()) > 1 else match.group(1)
|
| 540 |
entity_match_data = {
|
| 541 |
"type": p['type'],
|
| 542 |
-
"name": entity_value.strip().upper(),
|
| 543 |
"column": p['column']
|
| 544 |
}
|
| 545 |
-
# 找到第一個匹配項(最高優先級)後就立刻停止
|
| 546 |
break
|
| 547 |
|
| 548 |
# ==============================================================================
|
| 549 |
-
#
|
| 550 |
# ==============================================================================
|
| 551 |
|
| 552 |
-
# --- 1. 初始化 SQL 組件 ---
|
| 553 |
intents = {}
|
| 554 |
sql_components = {
|
| 555 |
'select': [],
|
| 556 |
-
'from': "
|
| 557 |
'joins': [],
|
| 558 |
-
'where': [
|
| 559 |
'group_by': [],
|
| 560 |
'order_by': [],
|
| 561 |
'log_parts': []
|
| 562 |
}
|
| 563 |
|
| 564 |
-
# ---
|
| 565 |
|
| 566 |
# 偵測器 2.1: 核心動作意圖 (計數 vs. 列表)
|
| 567 |
if any(kw in q_lower for kw in ['幾份', '多少', '數量', '總數', 'how many', 'count']):
|
|
@@ -586,20 +577,15 @@ class TextToSQLSystem:
|
|
| 586 |
sql_components['where'].append(f"strftime('%m', jt.ReportAuthorization) = '{month}'")
|
| 587 |
sql_components['log_parts'].append(f"{month}月")
|
| 588 |
|
| 589 |
-
# 偵測器 2.3: 實體意圖 (
|
| 590 |
if entity_match_data:
|
| 591 |
-
# 確保 JOIN 只添加一次
|
| 592 |
if "TSR53SampleDescription" not in " ".join(sql_components['joins']):
|
| 593 |
sql_components['joins'].append("JOIN TSR53SampleDescription AS sd ON jt.JobNo = sd.JobNo")
|
| 594 |
-
|
| 595 |
entity_name, column_name = entity_match_data["name"], entity_match_data["column"]
|
| 596 |
match_operator = "=" if column_name.endswith("ID") else "LIKE"
|
| 597 |
-
# 對於 LIKE 查詢,在值兩邊加上 %
|
| 598 |
entity_value = f"'%{entity_name}%'" if match_operator == "LIKE" else f"'{entity_name}'"
|
| 599 |
sql_components['where'].append(f"{column_name} {match_operator} {entity_value}")
|
| 600 |
sql_components['log_parts'].append(entity_match_data["type"] + ":" + entity_name)
|
| 601 |
-
|
| 602 |
-
# 如果是列表查詢,我們可以顯示更多實體相關的資訊
|
| 603 |
if intents.get('action') == 'list':
|
| 604 |
sql_components['select'].append("sd.BuyerName")
|
| 605 |
|
|
@@ -614,17 +600,34 @@ class TextToSQLSystem:
|
|
| 614 |
sql_components['joins'].append("JOIN TSR53SampleDescription AS sd ON jt.JobNo = sd.JobNo")
|
| 615 |
sql_components['where'].append("sd.OverallRating = 'Pass'")
|
| 616 |
sql_components['log_parts'].append("Pass")
|
| 617 |
-
|
| 618 |
-
#
|
| 619 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 620 |
if 'action' in intents:
|
| 621 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 622 |
from_clause = sql_components['from']
|
| 623 |
joins_clause = " ".join(sql_components['joins'])
|
| 624 |
-
where_clause = "WHERE " + " AND ".join(sql_components['where'])
|
| 625 |
orderby_clause = "ORDER BY " + ", ".join(sql_components['order_by']) if sql_components['order_by'] else ""
|
| 626 |
|
| 627 |
-
template_sql = f"{
|
| 628 |
|
| 629 |
query_log = " ".join(sql_components['log_parts'])
|
| 630 |
self._log(f"🔄 偵測到組合意圖【{query_log}】,啟用動態模板。", "INFO")
|
|
|
|
| 494 |
|
| 495 |
return formatted.strip()
|
| 496 |
|
| 497 |
+
# 在 class TextToSQLSystem 內
|
| 498 |
+
|
| 499 |
def _validate_and_fix_sql(self, question: str, raw_response: str) -> Tuple[Optional[str], str]:
|
| 500 |
"""
|
| 501 |
+
(V27 / 統一實體識別 + 模組化意圖引擎 + 實驗組偵測 最終版)
|
| 502 |
+
一個多層次的SQL生成引擎。它優先使用基於規則的動態模板生成器,
|
| 503 |
+
如果無法匹配,則回退到解析和修正AI模型的輸出。
|
| 504 |
"""
|
| 505 |
q_lower = question.lower()
|
| 506 |
|
|
|
|
| 508 |
# 第零層:統一實體識別引擎 (Unified Entity Recognition Engine)
|
| 509 |
# ==============================================================================
|
| 510 |
entity_match_data = None
|
| 511 |
+
# 包含了繁簡體兼容的關鍵字
|
| 512 |
entity_patterns = [
|
| 513 |
+
# 模式1: 匹配 "类型 + ID" (e.g., "买家ID C0761N") - 最高优先级
|
| 514 |
+
{'pattern': r"(買家|买家|buyer)\s*(?:id|代號|代碼|代号|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.BuyerID', 'type': '買家ID'},
|
| 515 |
+
{'pattern': r"(申請方|申请方|申請廠商|申请厂商|applicant)\s*(?:id|代號|代碼|代号|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.ApplicantID', 'type': '申請方ID'},
|
| 516 |
+
{'pattern': r"(付款方|付款厂商|invoiceto)\s*(?:id|代號|代碼|代号|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.InvoiceToID', 'type': '付款方ID'},
|
| 517 |
+
{'pattern': r"(代理商|agent)\s*(?:id|代號|代碼|代号|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.AgentID', 'type': '代理商ID'},
|
| 518 |
+
|
| 519 |
+
# 模式2: 匹配 "類型 + 名稱" (e.g., "買家 Gap")
|
| 520 |
+
{'pattern': r"(買家|买家|buyer|客戶)\s*'\"?([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|$|有)", 'column': 'sd.BuyerName', 'type': '買家'},
|
| 521 |
+
{'pattern': r"(申請方|申请方|申請廠商|申请厂商|applicant)\s*'\"?([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|$|有)", 'column': 'sd.ApplicantName', 'type': '申請方'},
|
| 522 |
+
{'pattern': r"(付款方|付款厂商|invoiceto)\s*'\"?([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|$|有)", 'column': 'sd.InvoiceToName', 'type': '付款方'},
|
| 523 |
+
{'pattern': r"(代理商|agent)\s*'\"?([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|$|有)", 'column': 'sd.AgentName', 'type': '代理商'},
|
| 524 |
+
|
| 525 |
+
# 模式3: 单独匹配一个 ID (e.g., "c0761n") - 较低优先级
|
| 526 |
+
{'pattern': r"\b([A-Z]\d{4}[A-Z])\b", 'column': 'sd.ApplicantID', 'type': 'ID'}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 527 |
]
|
| 528 |
|
| 529 |
for p in entity_patterns:
|
| 530 |
match = re.search(p['pattern'], question, re.IGNORECASE)
|
| 531 |
if match:
|
|
|
|
| 532 |
entity_value = match.group(2) if len(match.groups()) > 1 else match.group(1)
|
| 533 |
entity_match_data = {
|
| 534 |
"type": p['type'],
|
| 535 |
+
"name": entity_value.strip().upper(),
|
| 536 |
"column": p['column']
|
| 537 |
}
|
|
|
|
| 538 |
break
|
| 539 |
|
| 540 |
# ==============================================================================
|
| 541 |
+
# 第一層:模組化意圖偵測與動態SQL組合
|
| 542 |
# ==============================================================================
|
| 543 |
|
|
|
|
| 544 |
intents = {}
|
| 545 |
sql_components = {
|
| 546 |
'select': [],
|
| 547 |
+
'from': "",
|
| 548 |
'joins': [],
|
| 549 |
+
'where': [],
|
| 550 |
'group_by': [],
|
| 551 |
'order_by': [],
|
| 552 |
'log_parts': []
|
| 553 |
}
|
| 554 |
|
| 555 |
+
# --- 運行一系列獨立的意圖偵測器 ---
|
| 556 |
|
| 557 |
# 偵測器 2.1: 核心動作意圖 (計數 vs. 列表)
|
| 558 |
if any(kw in q_lower for kw in ['幾份', '多少', '數量', '總數', 'how many', 'count']):
|
|
|
|
| 577 |
sql_components['where'].append(f"strftime('%m', jt.ReportAuthorization) = '{month}'")
|
| 578 |
sql_components['log_parts'].append(f"{month}月")
|
| 579 |
|
| 580 |
+
# 偵測器 2.3: 實體意圖 (使用預處理的結果)
|
| 581 |
if entity_match_data:
|
|
|
|
| 582 |
if "TSR53SampleDescription" not in " ".join(sql_components['joins']):
|
| 583 |
sql_components['joins'].append("JOIN TSR53SampleDescription AS sd ON jt.JobNo = sd.JobNo")
|
|
|
|
| 584 |
entity_name, column_name = entity_match_data["name"], entity_match_data["column"]
|
| 585 |
match_operator = "=" if column_name.endswith("ID") else "LIKE"
|
|
|
|
| 586 |
entity_value = f"'%{entity_name}%'" if match_operator == "LIKE" else f"'{entity_name}'"
|
| 587 |
sql_components['where'].append(f"{column_name} {match_operator} {entity_value}")
|
| 588 |
sql_components['log_parts'].append(entity_match_data["type"] + ":" + entity_name)
|
|
|
|
|
|
|
| 589 |
if intents.get('action') == 'list':
|
| 590 |
sql_components['select'].append("sd.BuyerName")
|
| 591 |
|
|
|
|
| 600 |
sql_components['joins'].append("JOIN TSR53SampleDescription AS sd ON jt.JobNo = sd.JobNo")
|
| 601 |
sql_components['where'].append("sd.OverallRating = 'Pass'")
|
| 602 |
sql_components['log_parts'].append("Pass")
|
| 603 |
+
|
| 604 |
+
# 偵測器 2.5: 實驗組 (LabGroup) 意圖
|
| 605 |
+
lab_group_match = re.search(r'([A-Z]{1,2})組', question, re.IGNORECASE)
|
| 606 |
+
if lab_group_match:
|
| 607 |
+
lab_group = lab_group_match.group(1).upper()
|
| 608 |
+
# 測試項目數量應從 JobItemsInProgress 計算
|
| 609 |
+
sql_components['joins'].append("JOIN JobItemsInProgress AS jip ON jt.JobNo = jip.JobNo")
|
| 610 |
+
sql_components['where'].append(f"jip.LabGroup = '{lab_group}'")
|
| 611 |
+
sql_components['log_parts'].append(f"{lab_group}組")
|
| 612 |
+
# 如果是計數,目標應該是測試項目而不是報告
|
| 613 |
+
if intents.get('action') == 'count':
|
| 614 |
+
sql_components['select'] = ["COUNT(jip.ItemCode) AS item_count"] # 覆蓋掉原有的 SELECT
|
| 615 |
+
sql_components['log_parts'][0] = "測試項目總數" # 更新日誌
|
| 616 |
+
|
| 617 |
+
# --- 3. 判斷是否觸發了模板,並動態組合 SQL ---
|
| 618 |
if 'action' in intents:
|
| 619 |
+
# 動態決定主表和必要的預設條件
|
| 620 |
+
sql_components['from'] = "FROM JobTimeline AS jt"
|
| 621 |
+
sql_components['where'].insert(0, "jt.ReportAuthorization IS NOT NULL")
|
| 622 |
+
|
| 623 |
+
# 組合所有SQL組件
|
| 624 |
+
select_clause = "SELECT " + ", ".join(sorted(list(set(sql_components['select']))))
|
| 625 |
from_clause = sql_components['from']
|
| 626 |
joins_clause = " ".join(sql_components['joins'])
|
| 627 |
+
where_clause = "WHERE " + " AND ".join(sql_components['where']) if sql_components['where'] else ""
|
| 628 |
orderby_clause = "ORDER BY " + ", ".join(sql_components['order_by']) if sql_components['order_by'] else ""
|
| 629 |
|
| 630 |
+
template_sql = f"{select_clause} {from_clause} {joins_clause} {where_clause} {orderby_clause};"
|
| 631 |
|
| 632 |
query_log = " ".join(sql_components['log_parts'])
|
| 633 |
self._log(f"🔄 偵測到組合意圖【{query_log}】,啟用動態模板。", "INFO")
|