Paul720810 commited on
Commit
f080e2a
·
verified ·
1 Parent(s): 99cea8f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +152 -120
app.py CHANGED
@@ -227,35 +227,31 @@ class TextToSQLSystem:
227
  return self._generate_fallback_sql(prompt)
228
 
229
  try:
230
- # GGUF 模型呼叫
231
  output = self.llm(
232
  prompt,
233
- max_tokens=150, # 給予足夠的生成長度
234
  temperature=0.1,
235
  top_p=0.9,
236
  echo=False,
237
- # 暫時移除 stop 參數,觀察最原始的輸出
238
- # stop=["```", ";", "\n\n", "</s>"],
239
  )
240
 
241
- # --- 關鍵除錯步驟 ---
242
- # 印出 llama-cpp-python 返回的完整、原始的 dictionary
243
  self._log(f"🧠 模型原始輸出 (Raw Output): {output}", "DEBUG")
244
 
245
  if output and "choices" in output and len(output["choices"]) > 0:
246
- # 從原始輸出中提取文本
247
  generated_text = output["choices"][0]["text"]
248
  self._log(f"📝 提取出的生成文本: {generated_text.strip()}", "DEBUG")
249
  return generated_text.strip()
250
  else:
251
  self._log("❌ 模型的原始輸出格式不正確或為空。", "ERROR")
252
- return "" # 返回空字串,讓後續流程處理
253
 
254
  except Exception as e:
255
  self._log(f"❌ 模型生成過程中發生嚴重錯誤: {e}", "CRITICAL")
256
  import traceback
257
- self._log(traceback.format_exc(), "DEBUG") # 印出詳細的錯誤堆疊
258
- return "" # 返回空字串
259
 
260
  def _load_gguf_model_fallback(self, model_path):
261
  """備用載入方式"""
@@ -418,129 +414,165 @@ class TextToSQLSystem:
418
 
419
  return relevant_tables[:3] # 最多返回3個相關表格
420
 
 
 
421
  def _validate_and_fix_sql(self, sql: str, question: str) -> str:
422
- """
423
- 根據 Schema 和常見錯誤,全面驗證並動態修正 SQL。
424
- 這個函數會依序執行以下步驟:
425
- 1. 語法正規化:清理多餘的空格和分號。
426
- 2. SQL 方言修正:將非 SQLite 的語法 (如 YEAR()) 轉換為 SQLite 語法。
427
- 3. Schema 修正:將模型幻覺出的表名和欄位名,映射回真實的 Schema 名稱。
428
- 4. 邏輯意圖修正:根據用戶問題的關鍵詞,檢查並補全缺失的 WHERE 條件或修正錯誤的聚合函數。
429
- """
430
  if not sql or not self.schema:
431
- self._log("SQL 修正被跳過,因為輸入為空或 schema 未載入。", "WARNING")
432
  return sql
433
 
434
  original_sql = sql
435
- # 前後加空格方便正則匹配,並移除前後多餘的空白
436
  fixed_sql = " " + sql.strip() + " "
437
  fixes_applied = []
438
  q_lower = question.lower()
439
-
440
- # ==================== 步驟 1: SQL 方言修正 (Dialect Correction) ====================
441
- # (包含您在 column_corrections 中定義的 YEAR() 規則)
442
- dialect_corrections = {
443
- # 模式 (Pattern) -> 替換 (Replacement)
444
- r'YEAR\s*\(([^)]+)\)': r"strftime('%Y', \1)",
445
- r"(strftime\('%Y',\s*[^)]+\))\s*=\s*(\d{4})": r"\1 = '\2'"
446
- }
447
-
448
- for pattern, replacement in dialect_corrections.items():
449
- if re.search(pattern, fixed_sql, re.IGNORECASE):
450
- fixed_sql = re.sub(pattern, replacement, fixed_sql, flags=re.IGNORECASE)
451
- fixes_applied.append(f"修正 SQL 方言: {pattern}")
452
-
453
- # ==================== 步驟 2: Schema 名稱修正 (Schema Correction) ====================
454
- # (合併了您所有的 table_corrections 和 column_corrections)
455
- schema_corrections = {
456
- # === 表格映射 (來自您的 table_corrections) ===
457
- 'Customer': 'TSR53SampleDescription',
458
- 'InvoiceNote': 'TSR53Invoice',
459
- 'InvoiceNoteInvoiceNo': 'TSR53Invoice',
460
- 'JobNoLog': 'JobTimeline',
461
- 'SampleDescription': 'TSR53SampleDescription',
462
- 'Invoice': 'TSR53Invoice',
463
- 'Job': 'JobTimeline',
464
- 'Events': 'JobEventsLog',
465
- 'Progress': 'JobsInProgress',
466
- 'Items': 'JobItemsInProgress',
467
- 'Calendar': 'calendar_days',
468
- 'job_timeline': 'JobTimeline',
469
- 'sample_description': 'TSR53SampleDescription',
470
- 'invoice': 'TSR53Invoice',
471
- 'events_log': 'JobEventsLog',
472
- 'calendar_days': 'calendar_days',
473
-
474
- # === 欄位映射 (來自您的 column_corrections) ===
475
- # 客戶相關
476
- 'CustomerName': 'InvoiceToName',
477
- 'CustomerNo': 'InvoiceToID',
478
- '客戶': 'InvoiceToName',
479
- '買家': 'BuyerName',
480
- '申請方': 'ApplicantName',
481
- # 工作單相關
482
- 'JobNumber': 'JobNo',
483
- 'JobId': 'JobNo',
484
- '工作單': 'JobNo',
485
- # 時間相關
486
- 'LTRNo': 'JobCreation',
487
- 'CreationDate': 'JobCreation',
488
- 'IssueDate': 'JobIssuedDate',
489
- 'EventTime': 'EventTimestamp',
490
- 'CompletedDate': 'ReportAuthorization', # 完成日期應為報告授權
491
- # 發票相關
492
- 'InvoiceNoteNo': 'InvoiceCreditNoteNo',
493
- 'InvoiceNo': 'InvoiceCreditNoteNo',
494
- 'InvoiceDate': 'InvoiceCreditNoteDate',
495
- }
496
-
497
- for wrong, correct in schema_corrections.items():
498
- pattern = r'\b' + re.escape(wrong) + r'\b' # \b 確保是完整單詞匹配
499
- if re.search(pattern, fixed_sql, re.IGNORECASE):
500
- fixed_sql = re.sub(pattern, correct, fixed_sql, flags=re.IGNORECASE)
501
- fixes_applied.append(f"映射 Schema: '{wrong}' -> '{correct}'")
502
-
503
- # ==================== 步驟 3: 邏輯意圖修正 (Logical Intent Correction) ====================
504
- # (包含您在 column_corrections 中定義的邏輯規則)
505
- logical_corrections = {
506
- r'\bMaxJobNo\b': 'COUNT(*)',
507
- r'MAX\s*\(([^)]*JobNo[^)]*)\)': r'COUNT(\1)', # 處理 MAX(j.JobNo) 或 MAX(JobNo)
508
- }
509
 
510
- # 只有在問題意圖是 "數量" 時才觸發
511
- if any(keyword in q_lower for keyword in ['how many', 'count', '數量', '多少']):
512
- for pattern, replacement in logical_corrections.items():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
513
  if re.search(pattern, fixed_sql, re.IGNORECASE):
514
  fixed_sql = re.sub(pattern, replacement, fixed_sql, flags=re.IGNORECASE)
515
- fixes_applied.append(f"修正邏輯意圖: {pattern}")
516
-
517
- # 檢查並補全 WHERE 條件
518
- intent_conditions = {
519
- 'fail': "OverallRating = 'Fail'",
520
- 'pass': "OverallRating = 'Pass'",
521
- }
522
-
523
- for keyword, condition in intent_conditions.items():
524
- if keyword in q_lower and condition.lower() not in fixed_sql.lower():
525
- fixes_applied.append(f"補全 WHERE 條件: {condition}")
526
- if ' where ' in fixed_sql.lower():
527
- parts = re.split(r'\b(GROUP BY|ORDER BY|LIMIT)\b', fixed_sql, maxsplit=1, flags=re.IGNORECASE)
528
- main_query = parts[0]
529
- main_query = re.sub(r'( where\s+)', f' WHERE {condition} AND ', main_query, count=1, flags=re.IGNORECASE)
530
- fixed_sql = main_query + ''.join(parts[1:])
531
- else:
532
- match = re.search(r'\s(GROUP BY|ORDER BY|LIMIT)\s', fixed_sql, re.IGNORECASE)
533
- if match:
534
- insert_point = match.start()
535
- fixed_sql = fixed_sql[:insert_point] + f" WHERE {condition} " + fixed_sql[insert_point:]
536
- else:
537
- fixed_sql = fixed_sql.strip().rstrip(';') + f" WHERE {condition};"
538
-
539
- # ==================== 步驟 4: 清理與完成 ====================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
540
  fixed_sql = fixed_sql.strip()
541
  if not fixed_sql.endswith(';'):
542
  fixed_sql += ';'
543
-
544
  fixed_sql = re.sub(r'\s+', ' ', fixed_sql).strip()
545
 
546
  if fixes_applied:
@@ -550,7 +582,7 @@ class TextToSQLSystem:
550
  self._log(f" - 應用規則: {fix}", "DEBUG")
551
  self._log(f" - 修正後 SQL: {fixed_sql}", "INFO")
552
  else:
553
- self._log("✅ SQL 驗證通過,無需修正。", "INFO")
554
 
555
  return fixed_sql
556
 
 
227
  return self._generate_fallback_sql(prompt)
228
 
229
  try:
 
230
  output = self.llm(
231
  prompt,
232
+ max_tokens=150,
233
  temperature=0.1,
234
  top_p=0.9,
235
  echo=False,
236
+ # --- stop 參數加回來 ---
237
+ stop=["```", ";", "\n\n", "</s>"],
238
  )
239
 
 
 
240
  self._log(f"🧠 模型原始輸出 (Raw Output): {output}", "DEBUG")
241
 
242
  if output and "choices" in output and len(output["choices"]) > 0:
 
243
  generated_text = output["choices"][0]["text"]
244
  self._log(f"📝 提取出的生成文本: {generated_text.strip()}", "DEBUG")
245
  return generated_text.strip()
246
  else:
247
  self._log("❌ 模型的原始輸出格式不正確或為空。", "ERROR")
248
+ return ""
249
 
250
  except Exception as e:
251
  self._log(f"❌ 模型生成過程中發生嚴重錯誤: {e}", "CRITICAL")
252
  import traceback
253
+ self._log(traceback.format_exc(), "DEBUG")
254
+ return ""
255
 
256
  def _load_gguf_model_fallback(self, model_path):
257
  """備用載入方式"""
 
414
 
415
  return relevant_tables[:3] # 最多返回3個相關表格
416
 
417
+ # in class TextToSQLSystem:
418
+
419
  def _validate_and_fix_sql(self, sql: str, question: str) -> str:
420
+
 
 
 
 
 
 
 
421
  if not sql or not self.schema:
422
+ self._log("SQL 修正被跳過,因輸入為空或 schema 未載入。", "WARNING")
423
  return sql
424
 
425
  original_sql = sql
 
426
  fixed_sql = " " + sql.strip() + " "
427
  fixes_applied = []
428
  q_lower = question.lower()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
429
 
430
+ # ==============================================================================
431
+ # 第一層:高價值意圖識別與模板覆寫 (Intent Recognition & Templating)
432
+ # ==============================================================================
433
+
434
+ # --- 意圖 1: 查詢 Top N 實體的營收貢獻 ---
435
+ # 匹配 "top 5 買家 營收", "貢獻最高的10個客戶", "業績最好的申請方" 等
436
+ top_n_pattern = r"(?:top|前|最高|最大|最好)\s*(\d+)?\s*(?:個|名)?\s*([^ ]+?)\s*(?:的)?(?:營收|業績|貢獻|金額|sales|revenue)"
437
+ top_n_match = re.search(top_n_pattern, question, re.IGNORECASE)
438
+ if top_n_match:
439
+ limit = top_n_match.group(1) or '10' # 如果沒寫 N,預設為 10
440
+ entity_keyword = top_n_match.group(2).lower()
441
+
442
+ # 建立實體關鍵詞到欄位的映射
443
+ ENTITY_MAP = {
444
+ '買家': 'T1.BuyerName', 'buyer': 'T1.BuyerName', '客戶': 'T1.BuyerName',
445
+ '申請廠商': 'T1.ApplicantName', '申請方': 'T1.ApplicantName', 'applicant': 'T1.ApplicantName',
446
+ '付款廠商': 'T1.InvoiceToName', '付款方': 'T1.InvoiceToName', 'invoiceto': 'T1.InvoiceToName',
447
+ }
448
+ column_name = next((v for k, v in ENTITY_MAP.items() if k in entity_keyword), None)
449
+
450
+ if column_name:
451
+ self._log(f"🔄 檢測到【Top {limit} {entity_keyword} 營收】查詢意圖,啟用模板。", "INFO")
452
+ fixed_sql = f"""
453
+ SELECT {column_name} AS entity, SUM(T2.LocalAmount) AS total_revenue
454
+ FROM TSR53SampleDescription AS T1
455
+ JOIN TSR53Invoice AS T2 ON T1.JobNo = T2.JobNo
456
+ WHERE T2.LocalAmount > 0
457
+ GROUP BY entity
458
+ ORDER BY total_revenue DESC
459
+ LIMIT {limit};
460
+ """
461
+ fixes_applied.append(f"模板覆寫: Top {limit} {entity_keyword} 營收查詢")
462
+
463
+ # --- 意圖 2: 查詢特定實體的報告數量 (包含 Pass/Fail 等狀態) ---
464
+ # 匹配 "買家 ABC 有幾份 Fail 的報告", "申請方 XYZ 的 Pass 報告數量"
465
+ elif '報告' in q_lower and ('數量' in q_lower or '幾份' in q_lower or 'count' in q_lower):
466
+ ENTITY_TO_COLUMN_MAP = {
467
+ '買家': 'T1.BuyerName', 'buyer': 'T1.BuyerName', '客戶': 'T1.BuyerName',
468
+ '申請廠商': 'T1.ApplicantName', '申請方': 'T1.ApplicantName', 'applicant': 'T1.ApplicantName',
469
+ '付款廠商': 'T1.InvoiceToName', 'invoiceto': 'T1.InvoiceToName',
470
+ '代理商': 'T1.AgentName', 'agent': 'T1.AgentName',
471
+ }
472
+ entity_keywords_pattern = '|'.join(ENTITY_TO_COLUMN_MAP.keys())
473
+ dynamic_pattern = fr"({entity_keywords_pattern})\s*'\"?([a-zA-Z0-9\s&.-]+)'\"?"
474
+ entity_match = re.search(dynamic_pattern, question, re.IGNORECASE)
475
+
476
+ # 必須匹配到實體,且模型生成了錯誤 SQL (作為觸發器)
477
+ if entity_match and ('tsr53reportauthorization' in fixed_sql.lower() or 'testresult' in fixed_sql.lower()):
478
+ entity_type = entity_match.group(1).lower()
479
+ entity_name = entity_match.group(2).strip()
480
+ column_name = ENTITY_TO_COLUMN_MAP.get(entity_type)
481
+
482
+ # 確定報告狀態 (Fail/Pass)
483
+ status = "'Fail'"
484
+ if 'pass' in q_lower or '通過' in q_lower:
485
+ status = "'Pass'"
486
+
487
+ self._log(f"🔄 檢測到查詢【{entity_type} '{entity_name}' 的 {status} 報告數】意圖,啟用模板。", "INFO")
488
+ fixed_sql = f"""
489
+ SELECT COUNT(T1.JobNo) AS report_count
490
+ FROM TSR53SampleDescription AS T1
491
+ JOIN JobTimeline AS T2 ON T1.JobNo = T2.JobNo
492
+ WHERE {column_name} = '{entity_name}'
493
+ AND T1.OverallRating = {status}
494
+ AND strftime('%Y', T2.ReportAuthorization) = '2024';
495
+ """
496
+ fixes_applied.append(f"模板覆寫: 查詢 {entity_type}='{entity_name}' 的 {status} 報告數")
497
+
498
+ # --- 意圖 3: 計算平均處理時長 (Turnaround Time, TAT) ---
499
+ # 匹配 "平均處理時間", "LabIn 到 LabOut 平均多久", "TAT"
500
+ elif any(k in q_lower for k in ['平均', 'average']) and any(k in q_lower for k in ['時間', '時長', '多久', '天', 'tat', 'turnaround']):
501
+ # 預設計算從 LabIn 到 ReportAuthorization 的總時長
502
+ start_col, end_col = 'T2.LabIn', 'T2.ReportAuthorization'
503
+ log_msg = "總流程平均時長 (天)"
504
+ if 'labin' in q_lower and 'labout' in q_lower:
505
+ start_col, end_col = 'T2.LabIn', 'T2.LabOut'
506
+ log_msg = "實驗室平均處理時長 (天)"
507
+
508
+ self._log(f"🔄 檢測到【{log_msg}】查詢意圖,啟用模板。", "INFO")
509
+ # SQLite 中,JULIANDAY 用於精確計算天數差
510
+ fixed_sql = f"""
511
+ SELECT AVG(JULIANDAY({end_col}) - JULIANDAY({start_col})) AS average_tat_days
512
+ FROM JobTimeline AS T2
513
+ WHERE {start_col} IS NOT NULL AND {end_col} IS NOT NULL AND {end_col} > {start_col};
514
+ """
515
+ fixes_applied.append(f"模板覆寫: {log_msg} 查詢")
516
+
517
+ # 如果沒有任何模板被觸發,則進入常規修正流程
518
+ if not fixes_applied:
519
+ # ==============================================================================
520
+ # 第二層:常規修正流程 (Fallback Corrections)
521
+ # ==============================================================================
522
+ self._log("未觸發任何模板,執行常規修正流程...", "DEBUG")
523
+
524
+ # 步驟 2.1: SQL 方言修正
525
+ dialect_corrections = {
526
+ r'YEAR\s*\(([^)]+)\)': r"strftime('%Y', \1)",
527
+ r"(strftime\('%Y',\s*[^)]+\))\s*=\s*(\d{4})": r"\1 = '\2'"
528
+ }
529
+ for pattern, replacement in dialect_corrections.items():
530
  if re.search(pattern, fixed_sql, re.IGNORECASE):
531
  fixed_sql = re.sub(pattern, replacement, fixed_sql, flags=re.IGNORECASE)
532
+ fixes_applied.append(f"修正方言: {pattern}")
533
+
534
+ # 步驟 2.2: Schema 名稱修正 (一個全面的字典)
535
+ schema_corrections = {
536
+ # --- 常見幻覺表 ---
537
+ 'TSR53ReportAuthorization': 'TSR53SampleDescription',
538
+ 'TSR53TestResult': 'TSR53SampleDescription',
539
+ 'Customer': 'TSR53SampleDescription', 'Customers': 'TSR53SampleDescription',
540
+ 'Invoice': 'TSR53Invoice', 'Invoices': 'TSR53Invoice',
541
+ 'Job': 'JobTimeline', 'Jobs': 'JobsInProgress',
542
+
543
+ # --- 常見幻覺或錯誤欄位 ---
544
+ 'ReportAuthorizationDate': 'ReportAuthorization',
545
+ 'TestResult': 'OverallRating', 'Rating': 'OverallRating',
546
+ 'CustomerName': 'BuyerName', # 優先使用 BuyerName 作為通用客戶名
547
+ 'InvoiceTo': 'InvoiceToName',
548
+ 'Applicant': 'ApplicantName',
549
+ 'Agent': 'AgentName',
550
+ 'JobNumber': 'JobNo',
551
+ 'CreationDate': 'JobCreation', 'CreateDate': 'JobCreation',
552
+ 'CompletedDate': 'ReportAuthorization',
553
+ 'Amount': 'LocalAmount', # 優先使用 LocalAmount 作為金額
554
+ 'Price': 'LocalAmount',
555
+ 'Lab': 'LabGroup'
556
+ }
557
+ for wrong, correct in schema_corrections.items():
558
+ pattern = r'\b' + re.escape(wrong) + r'\b'
559
+ if re.search(pattern, fixed_sql, re.IGNORECASE):
560
+ fixed_sql = re.sub(pattern, correct, fixed_sql, flags=re.IGNORECASE)
561
+ fixes_applied.append(f"映射 Schema: '{wrong}' -> '{correct}'")
562
+
563
+ # 步驟 2.3: 基礎邏輯意圖修正
564
+ if any(kw in q_lower for kw in ['幾份', '多少', 'how many', 'count', '數量']) and 'select ' in fixed_sql.lower() and 'count' not in fixed_sql.lower():
565
+ # 僅在不是分組查詢時替換,避免破壞 GROUP BY
566
+ if 'group by' not in fixed_sql.lower():
567
+ fixed_sql = re.sub(r'SELECT\s+.*?FROM', 'SELECT COUNT(*) FROM', fixed_sql, count=1, flags=re.IGNORECASE)
568
+ fixes_applied.append("修正邏輯: 補全 COUNT(*)")
569
+
570
+ # ==============================================================================
571
+ # 第三層:清理與完成 (Finalization)
572
+ # ==============================================================================
573
  fixed_sql = fixed_sql.strip()
574
  if not fixed_sql.endswith(';'):
575
  fixed_sql += ';'
 
576
  fixed_sql = re.sub(r'\s+', ' ', fixed_sql).strip()
577
 
578
  if fixes_applied:
 
582
  self._log(f" - 應用規則: {fix}", "DEBUG")
583
  self._log(f" - 修正後 SQL: {fixed_sql}", "INFO")
584
  else:
585
+ self._log("✅ SQL 驗證通過,無需常規修正。", "INFO")
586
 
587
  return fixed_sql
588