Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -494,25 +494,19 @@ class TextToSQLSystem:
|
|
| 494 |
|
| 495 |
return formatted.strip()
|
| 496 |
|
| 497 |
-
|
| 498 |
|
| 499 |
def _validate_and_fix_sql(self, question: str, raw_response: str) -> Tuple[Optional[str], str]:
|
| 500 |
"""
|
| 501 |
-
(
|
| 502 |
-
一個
|
| 503 |
-
引入了全新的、统一的实体识别引擎,能够准确解析 "买家 Gap", "c0761n",
|
| 504 |
-
"买家ID c0761n" 等多种复杂的实体提问模式。
|
| 505 |
"""
|
| 506 |
q_lower = question.lower()
|
| 507 |
-
|
| 508 |
# ==============================================================================
|
| 509 |
-
# 第
|
| 510 |
# ==============================================================================
|
| 511 |
-
|
| 512 |
-
# --- **全新的统一实体识别引擎** ---
|
| 513 |
entity_match_data = None
|
| 514 |
-
|
| 515 |
-
# 定义多种识别模式,【优先级从高到低】
|
| 516 |
entity_patterns = [
|
| 517 |
# 模式1: 匹配 "类型 + ID" (e.g., "买家ID C0761N") - 最高优先级
|
| 518 |
{'pattern': r"(買家|buyer)\s*(?:id|代號|代碼)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.BuyerID', 'type': '買家ID'},
|
|
@@ -533,67 +527,103 @@ class TextToSQLSystem:
|
|
| 533 |
for p in entity_patterns:
|
| 534 |
match = re.search(p['pattern'], question, re.IGNORECASE)
|
| 535 |
if match:
|
|
|
|
| 536 |
entity_value = match.group(2) if len(match.groups()) > 1 else match.group(1)
|
| 537 |
entity_match_data = {
|
| 538 |
"type": p['type'],
|
| 539 |
-
"name": entity_value.strip().upper(),
|
| 540 |
"column": p['column']
|
| 541 |
}
|
|
|
|
| 542 |
break
|
| 543 |
-
|
| 544 |
-
#
|
| 545 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 546 |
|
| 547 |
-
#
|
| 548 |
-
if any(kw in q_lower for kw in ['
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
|
| 557 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 558 |
|
| 559 |
-
|
| 560 |
-
|
| 561 |
-
|
| 562 |
-
|
| 563 |
-
|
| 564 |
-
|
| 565 |
|
| 566 |
-
|
| 567 |
-
|
| 568 |
-
|
| 569 |
-
|
| 570 |
-
|
| 571 |
-
|
| 572 |
-
|
| 573 |
-
|
| 574 |
-
|
| 575 |
-
|
| 576 |
-
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
|
| 584 |
-
|
| 585 |
-
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
|
| 589 |
-
else
|
| 590 |
-
|
| 591 |
-
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
|
|
|
|
|
|
|
| 595 |
# ==============================================================================
|
| 596 |
-
# 第二层:
|
| 597 |
# ==============================================================================
|
| 598 |
self._log("未觸發任何模板,嘗試解析並修正 AI 輸出...", "INFO")
|
| 599 |
|
|
@@ -601,7 +631,7 @@ class TextToSQLSystem:
|
|
| 601 |
if not parsed_sql:
|
| 602 |
self._log(f"❌ 未能從模型回應中解析出任何 SQL。原始回應: {raw_response}", "ERROR")
|
| 603 |
return None, f"無法解析SQL。原始回應:\n{raw_response}"
|
| 604 |
-
|
| 605 |
self._log(f"📊 解析出的原始 SQL: {parsed_sql}", "DEBUG")
|
| 606 |
|
| 607 |
fixed_sql = " " + parsed_sql.strip() + " "
|
|
|
|
| 494 |
|
| 495 |
return formatted.strip()
|
| 496 |
|
| 497 |
+
# 在 class TextToSQLSystem 內
|
| 498 |
|
| 499 |
def _validate_and_fix_sql(self, question: str, raw_response: str) -> Tuple[Optional[str], str]:
|
| 500 |
"""
|
| 501 |
+
(V26 / 統一實體識別 + 模組化意圖引擎版)
|
| 502 |
+
在模組化意圖偵測前,先運行一個帶優先級的統一實體識別引擎。
|
|
|
|
|
|
|
| 503 |
"""
|
| 504 |
q_lower = question.lower()
|
| 505 |
+
|
| 506 |
# ==============================================================================
|
| 507 |
+
# 第零層:統一實體識別引擎 (Unified Entity Recognition Engine)
|
| 508 |
# ==============================================================================
|
|
|
|
|
|
|
| 509 |
entity_match_data = None
|
|
|
|
|
|
|
| 510 |
entity_patterns = [
|
| 511 |
# 模式1: 匹配 "类型 + ID" (e.g., "买家ID C0761N") - 最高优先级
|
| 512 |
{'pattern': r"(買家|buyer)\s*(?:id|代號|代碼)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.BuyerID', 'type': '買家ID'},
|
|
|
|
| 527 |
for p in entity_patterns:
|
| 528 |
match = re.search(p['pattern'], question, re.IGNORECASE)
|
| 529 |
if match:
|
| 530 |
+
# 根據正則表達式捕獲組的數量來決定取哪個組
|
| 531 |
entity_value = match.group(2) if len(match.groups()) > 1 else match.group(1)
|
| 532 |
entity_match_data = {
|
| 533 |
"type": p['type'],
|
| 534 |
+
"name": entity_value.strip().upper(), # 標準化:移除前後空格並轉大寫
|
| 535 |
"column": p['column']
|
| 536 |
}
|
| 537 |
+
# 找到第一個匹配項(最高優先級)後就立刻停止
|
| 538 |
break
|
| 539 |
+
|
| 540 |
+
# ==============================================================================
|
| 541 |
+
# 第一層:模組化意圖偵測 (Modular Intent Detection)
|
| 542 |
+
# ==============================================================================
|
| 543 |
+
|
| 544 |
+
# --- 1. 初始化 SQL 組件 ---
|
| 545 |
+
intents = {}
|
| 546 |
+
sql_components = {
|
| 547 |
+
'select': [],
|
| 548 |
+
'from': "FROM JobTimeline AS jt",
|
| 549 |
+
'joins': [],
|
| 550 |
+
'where': ["jt.ReportAuthorization IS NOT NULL"],
|
| 551 |
+
'group_by': [],
|
| 552 |
+
'order_by': [],
|
| 553 |
+
'log_parts': []
|
| 554 |
+
}
|
| 555 |
+
|
| 556 |
+
# --- 2. 運行一系列獨立的意圖偵測器 ---
|
| 557 |
|
| 558 |
+
# 偵測器 2.1: 核心動作意圖 (計數 vs. 列表)
|
| 559 |
+
if any(kw in q_lower for kw in ['幾份', '多少', '數量', '總數', 'how many', 'count']):
|
| 560 |
+
intents['action'] = 'count'
|
| 561 |
+
sql_components['select'].append("COUNT(DISTINCT jt.JobNo) AS report_count")
|
| 562 |
+
sql_components['log_parts'].append("報告總數")
|
| 563 |
+
elif any(kw in q_lower for kw in ['報告號碼', '報告清單', '列出報告', 'report number', 'list of reports']):
|
| 564 |
+
intents['action'] = 'list'
|
| 565 |
+
sql_components['select'].append("jt.JobNo, jt.ReportAuthorization")
|
| 566 |
+
sql_components['order_by'].append("jt.ReportAuthorization DESC")
|
| 567 |
+
sql_components['log_parts'].append("報告列表")
|
| 568 |
+
|
| 569 |
+
# 偵測器 2.2: 時間意圖
|
| 570 |
+
year_match = re.search(r'(\d{4})\s*年?', question)
|
| 571 |
+
month_match = re.search(r'(\d{1,2})\s*月', question)
|
| 572 |
+
if year_match:
|
| 573 |
+
year = year_match.group(1)
|
| 574 |
+
sql_components['where'].append(f"strftime('%Y', jt.ReportAuthorization) = '{year}'")
|
| 575 |
+
sql_components['log_parts'].append(f"{year}年")
|
| 576 |
+
if month_match:
|
| 577 |
+
month = month_match.group(1).zfill(2)
|
| 578 |
+
sql_components['where'].append(f"strftime('%m', jt.ReportAuthorization) = '{month}'")
|
| 579 |
+
sql_components['log_parts'].append(f"{month}月")
|
| 580 |
+
|
| 581 |
+
# 偵測器 2.3: 實體意圖 (現在使用預處理的結果)
|
| 582 |
+
if entity_match_data:
|
| 583 |
+
# 確保 JOIN 只添加一次
|
| 584 |
+
if "TSR53SampleDescription" not in " ".join(sql_components['joins']):
|
| 585 |
+
sql_components['joins'].append("JOIN TSR53SampleDescription AS sd ON jt.JobNo = sd.JobNo")
|
| 586 |
|
| 587 |
+
entity_name, column_name = entity_match_data["name"], entity_match_data["column"]
|
| 588 |
+
match_operator = "=" if column_name.endswith("ID") else "LIKE"
|
| 589 |
+
# 對於 LIKE 查詢,在值兩邊加上 %
|
| 590 |
+
entity_value = f"'%{entity_name}%'" if match_operator == "LIKE" else f"'{entity_name}'"
|
| 591 |
+
sql_components['where'].append(f"{column_name} {match_operator} {entity_value}")
|
| 592 |
+
sql_components['log_parts'].append(entity_match_data["type"] + ":" + entity_name)
|
| 593 |
|
| 594 |
+
# 如果是列表查詢,我們可以顯示更多實體相關的資訊
|
| 595 |
+
if intents.get('action') == 'list':
|
| 596 |
+
sql_components['select'].append("sd.BuyerName")
|
| 597 |
+
|
| 598 |
+
# 偵測器 2.4: 評級意圖 (Pass/Fail)
|
| 599 |
+
if 'fail' in q_lower or '失敗' in q_lower:
|
| 600 |
+
if "TSR53SampleDescription" not in " ".join(sql_components['joins']):
|
| 601 |
+
sql_components['joins'].append("JOIN TSR53SampleDescription AS sd ON jt.JobNo = sd.JobNo")
|
| 602 |
+
sql_components['where'].append("sd.OverallRating = 'Fail'")
|
| 603 |
+
sql_components['log_parts'].append("Fail")
|
| 604 |
+
elif 'pass' in q_lower or '通過' in q_lower:
|
| 605 |
+
if "TSR53SampleDescription" not in " ".join(sql_components['joins']):
|
| 606 |
+
sql_components['joins'].append("JOIN TSR53SampleDescription AS sd ON jt.JobNo = sd.JobNo")
|
| 607 |
+
sql_components['where'].append("sd.OverallRating = 'Pass'")
|
| 608 |
+
sql_components['log_parts'].append("Pass")
|
| 609 |
+
|
| 610 |
+
# --- 3. ���斷是否觸發了模板,並組合 SQL ---
|
| 611 |
+
|
| 612 |
+
if 'action' in intents:
|
| 613 |
+
final_select = "SELECT " + ", ".join(sorted(list(set(sql_components['select']))))
|
| 614 |
+
from_clause = sql_components['from']
|
| 615 |
+
joins_clause = " ".join(sql_components['joins'])
|
| 616 |
+
where_clause = "WHERE " + " AND ".join(sql_components['where'])
|
| 617 |
+
orderby_clause = "ORDER BY " + ", ".join(sql_components['order_by']) if sql_components['order_by'] else ""
|
| 618 |
+
|
| 619 |
+
template_sql = f"{final_select} {from_clause} {joins_clause} {where_clause} {orderby_clause};"
|
| 620 |
+
|
| 621 |
+
query_log = " ".join(sql_components['log_parts'])
|
| 622 |
+
self._log(f"🔄 偵測到組合意圖【{query_log}】,啟用動態模板。", "INFO")
|
| 623 |
+
return self._finalize_sql(template_sql, f"模板覆寫: {query_log} 查詢")
|
| 624 |
+
|
| 625 |
# ==============================================================================
|
| 626 |
+
# 第二层:AI 生成修正流程 (Fallback)
|
| 627 |
# ==============================================================================
|
| 628 |
self._log("未觸發任何模板,嘗試解析並修正 AI 輸出...", "INFO")
|
| 629 |
|
|
|
|
| 631 |
if not parsed_sql:
|
| 632 |
self._log(f"❌ 未能從模型回應中解析出任何 SQL。原始回應: {raw_response}", "ERROR")
|
| 633 |
return None, f"無法解析SQL。原始回應:\n{raw_response}"
|
| 634 |
+
|
| 635 |
self._log(f"📊 解析出的原始 SQL: {parsed_sql}", "DEBUG")
|
| 636 |
|
| 637 |
fixed_sql = " " + parsed_sql.strip() + " "
|