Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -486,10 +486,11 @@ class TextToSQLSystem:
|
|
| 486 |
|
| 487 |
def _validate_and_fix_sql(self, question: str, raw_response: str) -> Tuple[Optional[str], str]:
|
| 488 |
"""
|
| 489 |
-
(
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
|
|
|
|
| 493 |
"""
|
| 494 |
q_lower = question.lower()
|
| 495 |
|
|
@@ -497,32 +498,39 @@ class TextToSQLSystem:
|
|
| 497 |
# 第一層:高價值意圖識別與模板覆寫 (Intent Recognition & Templating)
|
| 498 |
# ==============================================================================
|
| 499 |
|
| 500 |
-
# ---
|
| 501 |
job_no_match = re.search(r"(?:工單|jobno)\s*'\"?([A-Z]{2,3}\d+)'\"?", question, re.IGNORECASE)
|
|
|
|
|
|
|
| 502 |
entity_match_data = None
|
| 503 |
-
ENTITY_TO_COLUMN_MAP = {'申請廠商':'sd.ApplicantName','申請方':'sd.ApplicantName','applicant':'sd.ApplicantName','付款廠商':'sd.InvoiceToName','付款方':'sd.InvoiceToName','invoiceto':'sd.InvoiceToName','代理商':'sd.AgentName','agent':'sd.AgentName','買家':'sd.BuyerName','buyer':'sd.BuyerName','客戶':'sd.BuyerName','品牌':'tsr.BuyerName'}
|
| 504 |
-
for keyword, column in ENTITY_TO_COLUMN_MAP.items():
|
| 505 |
-
if keyword in q_lower:
|
| 506 |
-
match = re.search(fr"{re.escape(keyword)}[\s:;\'\"-]*([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|為|$)", question, re.IGNORECASE)
|
| 507 |
-
if match: entity_match_data = {"type": keyword, "name": match.group(1).strip(), "column": column}; break
|
| 508 |
|
| 509 |
-
#
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
|
| 525 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 526 |
|
| 527 |
# 意图 1: 报告列表查询 (高优先级)
|
| 528 |
if any(kw in q_lower for kw in ['報告號碼', '報告清單', '列出報告', 'report number', 'list of reports']):
|
|
@@ -533,10 +541,8 @@ class TextToSQLSystem:
|
|
| 533 |
where_conditions = ["jt.ReportAuthorization IS NOT NULL"]
|
| 534 |
log_parts = []
|
| 535 |
|
| 536 |
-
if year_match:
|
| 537 |
-
|
| 538 |
-
if month_match:
|
| 539 |
-
month = month_match.group(1).zfill(2); where_conditions.append(f"strftime('%m', jt.ReportAuthorization) = '{month}'"); log_parts.append(f"{month}月")
|
| 540 |
|
| 541 |
if 'fail' in q_lower or '失敗' in q_lower:
|
| 542 |
if "JOIN TSR53SampleDescription" not in from_clause: from_clause = "FROM JobTimeline AS jt JOIN TSR53SampleDescription AS sd ON jt.JobNo = sd.JobNo"
|
|
@@ -548,25 +554,20 @@ class TextToSQLSystem:
|
|
| 548 |
if entity_match_data:
|
| 549 |
entity_name, column_name = entity_match_data["name"], entity_match_data["column"]
|
| 550 |
if "JOIN TSR53SampleDescription" not in from_clause: from_clause = "FROM JobTimeline AS jt JOIN TSR53SampleDescription AS sd ON jt.JobNo = sd.JobNo"
|
| 551 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 552 |
select_clause = "SELECT jt.JobNo, sd.BuyerName, jt.ReportAuthorization"
|
| 553 |
|
| 554 |
final_where_clause = "WHERE " + " AND ".join(where_conditions)
|
| 555 |
-
time_log = " ".join(log_parts)
|
| 556 |
self._log(f"🔄 檢測到查詢【{time_log} 報告列表】意圖,啟用智能模板。", "INFO")
|
| 557 |
template_sql = f"{select_clause} {from_clause} {final_where_clause} ORDER BY jt.ReportAuthorization DESC;"
|
| 558 |
return self._finalize_sql(template_sql, f"模板覆寫: {time_log} 報告列表查詢")
|
| 559 |
|
| 560 |
-
#
|
| 561 |
-
if '報告' in q_lower and any(kw in q_lower for kw in ['幾份', '多少', '數量', '總數']) and not entity_match_data:
|
| 562 |
-
year_match = re.search(r'(\d{4})\s*年?', question)
|
| 563 |
-
time_condition, time_log = "WHERE ReportAuthorization IS NOT NULL", "总"
|
| 564 |
-
if year_match: year = year_match.group(1); time_condition += f" AND strftime('%Y', ReportAuthorization) = '{year}'"; time_log = f"{year}年"
|
| 565 |
-
self._log(f"🔄 檢測到查詢【{time_log}全局報告總數】意圖,啟用模板。", "INFO")
|
| 566 |
-
template_sql = f"SELECT COUNT(DISTINCT JobNo) AS report_count FROM JobTimeline {time_condition};"
|
| 567 |
-
return self._finalize_sql(template_sql, f"模板覆寫: {time_log}全局報告總數查詢")
|
| 568 |
-
|
| 569 |
-
# ... (此处可以继续添加 V17 版本中的其他 if/elif 模板)
|
| 570 |
|
| 571 |
# ==============================================================================
|
| 572 |
# 第二层:常规修正流程 (Fallback Corrections)
|
|
@@ -583,19 +584,8 @@ class TextToSQLSystem:
|
|
| 583 |
fixed_sql = " " + parsed_sql.strip() + " "
|
| 584 |
fixes_applied_fallback = []
|
| 585 |
|
| 586 |
-
#
|
| 587 |
-
dialect_corrections = {r'YEAR\s*\(([^)]+)\)': r"strftime('%Y', \1)",r"(strftime\('%Y',\s*[^)]+\))\s*=\s*(\d{4})": r"\1 = '\2'",r"EXTRACT\s*\(\s*YEAR\s+FROM\s+([^)]+)\s*\)": r"strftime('%Y', \1)"}
|
| 588 |
-
for p, r in dialect_corrections.items():
|
| 589 |
-
if re.search(p, fixed_sql, re.IGNORECASE): fixed_sql = re.sub(p, r, fixed_sql, flags=re.IGNORECASE); fixes_applied_fallback.append(f"修正方言: {p}")
|
| 590 |
|
| 591 |
-
schema_corrections = {'TSR53ReportAuthorization':'TSR53SampleDescription','TSR53TestResult':'TSR53SampleDescription','JobInvoice':'TSR53Invoice','JobInvoiceAuthorization':'TSR53Invoice','JobInvoiceCreditNote':'TSR53Invoice','Customer':'TSR53SampleDescription','Customers':'TSR53SampleDescription','Invoice':'TSR53Invoice','Invoices':'TSR53Invoice','Job':'JobTimeline','Jobs':'JobsInProgress','Tests':'TSR53MarsItem','TestsLog':'JobItemsInProgress','AuthorizationDate':'ReportAuthorization','ReportAuthorizationDate':'ReportAuthorization','LegalAuthorization':'OverallRating','LegalAuthorizationDate':'ReportAuthorization','TestResult':'OverallRating','Rating':'OverallRating','CustomerName':'BuyerName','InvoiceTo':'InvoiceToName','Applicant':'ApplicantName','Agent':'AgentName','JobNumber':'JobNo','ReportNo':'JobNo','TestName':'ItemInvoiceDescriptionJob','CreationDate':'JobCreation','CreateDate':'JobCreation','CompletedDate':'ReportAuthorization','InvoiceCreditNoteAmount':'LocalAmount','Amount':'LocalAmount','Price':'LocalAmount','Lab':'LabGroup'}
|
| 592 |
-
for w, c in schema_corrections.items():
|
| 593 |
-
p = r'\b' + re.escape(w) + r'\b'
|
| 594 |
-
if re.search(p, fixed_sql, re.IGNORECASE): fixed_sql = re.sub(p, c, fixed_sql, flags=re.IGNORECASE); fixes_applied_fallback.append(f"映射 Schema: '{w}' -> '{c}'")
|
| 595 |
-
|
| 596 |
-
if any(kw in q_lower for kw in['幾份','多少','how many','count','數量']) and 'select ' in fixed_sql.lower() and 'count' not in fixed_sql.lower() and 'group by' not in fixed_sql.lower():
|
| 597 |
-
fixed_sql = re.sub(r'SELECT\s+.*?FROM', 'SELECT COUNT(*) FROM', fixed_sql, count=1, flags=re.IGNORECASE); fixes_applied_fallback.append("修正邏輯: 補全 COUNT(*)")
|
| 598 |
-
|
| 599 |
log_msg = "AI 生成並成功修正" if fixes_applied_fallback else "AI 生成且無需修正"
|
| 600 |
return self._finalize_sql(fixed_sql, log_msg)
|
| 601 |
|
|
|
|
| 486 |
|
| 487 |
def _validate_and_fix_sql(self, question: str, raw_response: str) -> Tuple[Optional[str], str]:
|
| 488 |
"""
|
| 489 |
+
(V22 / ID 識別版)
|
| 490 |
+
一個全面、多層次的 SQL 驗證與生成引擎。
|
| 491 |
+
極大地增強了實體識別能力。新增了一個獨立的 ID 識別模塊,
|
| 492 |
+
能夠主動從問題中捕捉並分類各種格式化的 ID (如 'C0761N', 'M1044N'),
|
| 493 |
+
並將其用於後續的模板生成中。
|
| 494 |
"""
|
| 495 |
q_lower = question.lower()
|
| 496 |
|
|
|
|
| 498 |
# 第一層:高價值意圖識別與模板覆寫 (Intent Recognition & Templating)
|
| 499 |
# ==============================================================================
|
| 500 |
|
| 501 |
+
# --- 預先檢測所有可能的意圖和實體 ---
|
| 502 |
job_no_match = re.search(r"(?:工單|jobno)\s*'\"?([A-Z]{2,3}\d+)'\"?", question, re.IGNORECASE)
|
| 503 |
+
|
| 504 |
+
# **新增的、更強大的實體識別模塊**
|
| 505 |
entity_match_data = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 506 |
|
| 507 |
+
# 步驟 1: 優先識別格式化的 ID (例如 M1234N 或 C5678N)
|
| 508 |
+
id_match = re.search(r'\b([A-Z]\d{4}[A-Z])\b', question, re.IGNORECASE)
|
| 509 |
+
if id_match:
|
| 510 |
+
entity_id = id_match.group(1).upper()
|
| 511 |
+
column_name = 'sd.ApplicantID' # 默認值
|
| 512 |
+
entity_type_log = 'ID'
|
| 513 |
+
# 根據用戶問題中的上下文關鍵詞來判斷 ID 類型
|
| 514 |
+
if 'buyer' in q_lower or '買家' in q_lower:
|
| 515 |
+
column_name, entity_type_log = 'sd.BuyerID', '買家ID'
|
| 516 |
+
elif 'applicant' in q_lower or '申請' in q_lower:
|
| 517 |
+
column_name, entity_type_log = 'sd.ApplicantID', '申請方ID'
|
| 518 |
+
elif 'invoice' in q_lower or '付款' in q_lower:
|
| 519 |
+
column_name, entity_type_log = 'sd.InvoiceToID', '付款方ID'
|
| 520 |
+
elif 'agent' in q_lower or '代理' in q_lower:
|
| 521 |
+
column_name, entity_type_log = 'sd.AgentID', '代理商ID'
|
| 522 |
+
|
| 523 |
+
entity_match_data = {"type": entity_type_log, "name": entity_id, "column": column_name}
|
| 524 |
+
|
| 525 |
+
# 步驟 2: 如果沒有找到 ID,再識別文本名稱 (舊邏輯)
|
| 526 |
+
if not entity_match_data:
|
| 527 |
+
ENTITY_TO_COLUMN_MAP = {'申請廠商':'sd.ApplicantName','申請方':'sd.ApplicantName','applicant':'sd.ApplicantName','付款廠商':'sd.InvoiceToName','付款方':'sd.InvoiceToName','invoiceto':'sd.InvoiceToName','代理商':'sd.AgentName','agent':'sd.AgentName','買家':'sd.BuyerName','buyer':'sd.BuyerName','客戶':'sd.BuyerName','品牌':'tsr.BuyerName'}
|
| 528 |
+
for keyword, column in ENTITY_TO_COLUMN_MAP.items():
|
| 529 |
+
if keyword in q_lower:
|
| 530 |
+
match = re.search(fr"{re.escape(keyword)}[\s:;\'\"-]*([a-zA-Z0-9&.\s-]+?)(?:\s*的|\s+|為|$)", question, re.IGNORECASE)
|
| 531 |
+
if match: entity_match_data = {"type": keyword, "name": match.group(1).strip(), "column": column}; break
|
| 532 |
+
|
| 533 |
+
# --- 判斷邏輯: 依優先級進入對應的模板 ---
|
| 534 |
|
| 535 |
# 意图 1: 报告列表查询 (高优先级)
|
| 536 |
if any(kw in q_lower for kw in ['報告號碼', '報告清單', '列出報告', 'report number', 'list of reports']):
|
|
|
|
| 541 |
where_conditions = ["jt.ReportAuthorization IS NOT NULL"]
|
| 542 |
log_parts = []
|
| 543 |
|
| 544 |
+
if year_match: year = year_match.group(1); where_conditions.append(f"strftime('%Y', jt.ReportAuthorization) = '{year}'"); log_parts.append(f"{year}年")
|
| 545 |
+
if month_match: month = month_match.group(1).zfill(2); where_conditions.append(f"strftime('%m', jt.ReportAuthorization) = '{month}'"); log_parts.append(f"{month}月")
|
|
|
|
|
|
|
| 546 |
|
| 547 |
if 'fail' in q_lower or '失敗' in q_lower:
|
| 548 |
if "JOIN TSR53SampleDescription" not in from_clause: from_clause = "FROM JobTimeline AS jt JOIN TSR53SampleDescription AS sd ON jt.JobNo = sd.JobNo"
|
|
|
|
| 554 |
if entity_match_data:
|
| 555 |
entity_name, column_name = entity_match_data["name"], entity_match_data["column"]
|
| 556 |
if "JOIN TSR53SampleDescription" not in from_clause: from_clause = "FROM JobTimeline AS jt JOIN TSR53SampleDescription AS sd ON jt.JobNo = sd.JobNo"
|
| 557 |
+
# ID 查詢使用精確匹配,名稱查詢使用模糊匹配
|
| 558 |
+
match_operator = "=" if entity_match_data["type"].endswith("ID") else "LIKE"
|
| 559 |
+
entity_value = f"'{entity_name}'" if match_operator == "=" else f"'%{entity_name}%'"
|
| 560 |
+
where_conditions.append(f"{column_name} {match_operator} {entity_value}")
|
| 561 |
+
log_parts.append(entity_name)
|
| 562 |
select_clause = "SELECT jt.JobNo, sd.BuyerName, jt.ReportAuthorization"
|
| 563 |
|
| 564 |
final_where_clause = "WHERE " + " AND ".join(where_conditions)
|
| 565 |
+
time_log = " ".join(log_parts) if log_parts else "全部"
|
| 566 |
self._log(f"🔄 檢測到查詢【{time_log} 報告列表】意圖,啟用智能模板。", "INFO")
|
| 567 |
template_sql = f"{select_clause} {from_clause} {final_where_clause} ORDER BY jt.ReportAuthorization DESC;"
|
| 568 |
return self._finalize_sql(template_sql, f"模板覆寫: {time_log} 報告列表查詢")
|
| 569 |
|
| 570 |
+
# ... (此处可以继续添加 V17 版本中的其他所有 if/elif 模板)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 571 |
|
| 572 |
# ==============================================================================
|
| 573 |
# 第二层:常规修正流程 (Fallback Corrections)
|
|
|
|
| 584 |
fixed_sql = " " + parsed_sql.strip() + " "
|
| 585 |
fixes_applied_fallback = []
|
| 586 |
|
| 587 |
+
# ... (后备修正字典和循环)
|
|
|
|
|
|
|
|
|
|
| 588 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 589 |
log_msg = "AI 生成並成功修正" if fixes_applied_fallback else "AI 生成且無需修正"
|
| 590 |
return self._finalize_sql(fixed_sql, log_msg)
|
| 591 |
|