Paul720810 commited on
Commit
b69d84b
·
verified ·
1 Parent(s): afb724a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +326 -93
app.py CHANGED
@@ -66,26 +66,53 @@ def analyze_question_type(question: str) -> Dict:
66
  analysis = {
67
  "type": "unknown",
68
  "keywords": [],
69
- "has_count": "多少" in question_lower or "幾個" in question_lower or "數量" in question_lower,
70
- "has_date": "時間" in question_lower or "日期" in question_lower or "月份" in question_lower or "年" in question_lower,
71
- "has_group": "每" in question_lower or "各" in question_lower or "分組" in question_lower,
72
  "specific_intent": "general_query" # 新增:具體意圖,預設為通用查詢
73
  }
74
 
75
- # **更精確的意圖識別**
76
- if "每月" in question_lower and ("完成" in question_lower or "報告" in question_lower or "工作單" in question_lower):
77
  analysis["specific_intent"] = "monthly_completion_count"
78
  analysis["type"] = "time_series"
79
- elif ("評級" in question_lower or "pass" in question_lower or "fail" in question_lower) and ("統計" in question_lower or "分佈" in question_lower or "多少" in question_lower):
80
  analysis["specific_intent"] = "rating_distribution"
81
  analysis["type"] = "statistics"
82
- elif "金額" in question_lower and ("最高" in question_lower or "top" in question_lower or "排名" in question_lower):
83
  analysis["specific_intent"] = "amount_ranking"
84
  analysis["type"] = "ranking"
85
- elif ("公司" in question_lower or "客戶" in question_lower or "申請方" in question_lower) and ("統計" in question_lower or "數量" in question_lower or "排名" in question_lower):
86
  analysis["specific_intent"] = "company_statistics"
87
  analysis["type"] = "statistics"
88
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  return analysis
90
 
91
  # ==================== 完整數據加載模塊 ====================
@@ -112,52 +139,100 @@ class CompleteDataLoader:
112
  user_content = item['messages'][0]['content']
113
  assistant_content = item['messages'][1]['content']
114
 
115
- # 改進的問題提取邏輯
 
 
 
116
  question_match = re.search(r'指令:\s*(.*?)(?:\n|$)', user_content)
117
  if question_match:
118
  question = question_match.group(1).strip()
119
- else:
120
- # 如果沒有找到「指令:」格式,嘗試直接使用內容
 
 
 
 
 
 
 
121
  question = user_content.strip()
122
 
123
- # 改進的SQL提取邏輯
 
 
 
124
  sql_match = re.search(r'SQL查詢:\s*(.*?)(?:\n|$)', assistant_content, re.DOTALL)
125
  if sql_match:
126
  sql_query = sql_match.group(1).strip()
127
- else:
128
- # 如果沒有找到「SQL查詢:」格式,嘗試提取SQL代碼塊
 
129
  sql_block_match = re.search(r'```sql\s*(.*?)\s*```', assistant_content, re.DOTALL)
130
  if sql_block_match:
131
  sql_query = sql_block_match.group(1).strip()
132
- else:
133
- sql_query = assistant_content.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
  # 清理SQL查詢
136
- sql_query = re.sub(r'```sql|```', '', sql_query).strip()
 
 
 
137
 
138
- # 驗證數據質量
139
  if not question or len(question.strip()) < 3:
140
  skipped_reasons["empty_question"] += 1
141
  continue
142
 
143
- if not sql_query or len(sql_query.strip()) < 10:
144
  skipped_reasons["empty_sql"] += 1
145
  continue
146
 
147
- # 基本SQL驗證
148
- if "SELECT" not in sql_query.upper():
 
149
  skipped_reasons["invalid_format"] += 1
150
  continue
151
 
152
  self.questions.append(question)
153
  self.sql_answers.append(sql_query)
154
  successful_loads += 1
 
 
 
 
 
 
 
155
  else:
156
  skipped_reasons["invalid_format"] += 1
157
 
158
  except Exception as e:
159
  skipped_reasons["parse_error"] += 1
160
- if idx < 5: # 只顯示前5個錯誤
161
  print(f"跳過第 {idx} 項資料,錯誤: {e}")
162
  continue
163
 
@@ -253,21 +328,109 @@ class CompleteTextToSQLSystem:
253
  year_match = re.search(r'(\d{4})', text)
254
  return year_match.group(1) if year_match else datetime.now().strftime('%Y')
255
 
256
- def generate_sql_from_question(self, question: str, analysis: Dict) -> str:
257
- """通用SQL生成器 (作為最終備用)"""
258
- # 此函數現在作為無法識別具體意圖時的通用後備方案
259
- return f"""-- 通用查詢範本
260
- SELECT
261
- JobNo as 工作單號,
262
- ApplicantName as 申請方,
263
- OverallRating as 評級
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
  FROM TSR53SampleDescription
 
265
  LIMIT 20;"""
266
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
  def intelligent_repair_sql(self, user_question: str, similar_question: str) -> str:
268
- """智能修復SQL - 基於當前使用者問題的意圖"""
269
  analysis = analyze_question_type(user_question)
270
  intent = analysis["specific_intent"]
 
271
 
272
  if similar_question != "無相似問題":
273
  comment = f"-- 根據類似問題 '{similar_question}' (原SQL無效) 進行智能修復\n"
@@ -277,75 +440,131 @@ LIMIT 20;"""
277
  if intent == "monthly_completion_count":
278
  year = self.extract_year(user_question)
279
  return comment + f"""-- 查詢 {year} 年每月完成的工作單數量
280
- SELECT
281
- strftime('%Y-%m', jt.ReportAuthorization) as 月份,
282
- COUNT(*) as 完成數量
283
- FROM JobTimeline jt
284
- WHERE strftime('%Y', jt.ReportAuthorization) = '{year}'
285
- AND jt.ReportAuthorization IS NOT NULL
286
- GROUP BY strftime('%Y-%m', jt.ReportAuthorization)
287
- ORDER BY 月份;"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
 
289
  elif intent == "rating_distribution":
290
  return comment + """-- 查詢評級分佈統計
291
- SELECT
292
- OverallRating as 評級,
293
- COUNT(*) as 數量,
294
- ROUND(COUNT(*) * 100.0 / (
295
- SELECT COUNT(*)
296
- FROM TSR53SampleDescription
297
- WHERE OverallRating IS NOT NULL
298
- ), 2) as 百分比
299
- FROM TSR53SampleDescription
300
- WHERE OverallRating IS NOT NULL
301
- GROUP BY OverallRating
302
- ORDER BY 數量 DESC;"""
303
 
304
  elif intent == "amount_ranking":
305
  return comment + """-- 查詢工作單金額排名
306
- WITH JobTotalAmount AS (
307
- SELECT JobNo, SUM(LocalAmount) AS TotalAmount
308
- FROM (
309
- SELECT DISTINCT JobNo, InvoiceCreditNoteNo, LocalAmount
310
- FROM TSR53Invoice
311
- WHERE LocalAmount IS NOT NULL
312
- )
313
- GROUP BY JobNo
314
  )
315
- SELECT
316
- jta.JobNo as 工作單號,
317
- sd.ApplicantName as 申請方,
318
- jta.TotalAmount as 總金額
319
- FROM JobTotalAmount jta
320
- JOIN TSR53SampleDescription sd ON sd.JobNo = jta.JobNo
321
- WHERE sd.ApplicantName IS NOT NULL
322
- ORDER BY jta.TotalAmount DESC
323
- LIMIT 10;"""
 
 
324
 
325
  elif intent == "company_statistics":
326
  return comment + """-- 查詢申請方工作單統計
327
- SELECT
328
- ApplicantName as 申請方名稱,
329
- COUNT(*) as 工作單數量
330
- FROM TSR53SampleDescription
331
- WHERE ApplicantName IS NOT NULL
332
- GROUP BY ApplicantName
333
- ORDER BY 工作單數量 DESC
334
- LIMIT 20;"""
335
 
336
  # 通用查詢模板
337
  return comment + """-- 通用查詢範本
338
- SELECT
339
- JobNo as 工作單號,
340
- ApplicantName as 申請方,
341
- BuyerName as 買方,
342
- OverallRating as 評級
343
- FROM TSR53SampleDescription
344
- WHERE ApplicantName IS NOT NULL
345
- LIMIT 20;"""
346
 
347
  def generate_sql(self, user_question: str) -> Tuple[str, str]:
348
- """主流程:生成SQL查詢 (改進版本)"""
349
  log_messages = [f"⏰ {get_current_time()} 開始處理問題: '{user_question[:50]}...'"]
350
 
351
  if not user_question or not user_question.strip():
@@ -366,12 +585,13 @@ LIMIT 20;"""
366
 
367
  log_messages.append(f"🔍 找到相似問題 (相似度: {similarity_score:.3f}): '{similar_question[:50]}...'")
368
 
369
- if similarity_score > SIMILARITY_THRESHOLD:
 
370
  original_sql = self.data_loader.sql_answers[corpus_id]
371
  validation = validate_sql(original_sql)
372
 
373
  if validation["valid"] and validation["is_safe"]:
374
- log_messages.append("✅ 相似度高且原SQL有效,直接採用")
375
  return original_sql, "\n".join(log_messages)
376
  else:
377
  log_messages.append(f"⚠️ 原SQL有問題: {', '.join(validation['issues'])}")
@@ -380,13 +600,26 @@ LIMIT 20;"""
380
  log_messages.append("✅ 智能修復完成")
381
  return repaired_sql, "\n".join(log_messages)
382
  else:
383
- log_messages.append(f"📉 相似度 ({similarity_score:.3f}) 低於閾值 ({SIMILARITY_THRESHOLD})")
 
 
 
 
 
 
 
 
 
 
 
 
384
 
385
- log_messages.append("🤖 未找到合適範本,使用意圖生成")
386
- intelligent_sql = self.intelligent_repair_sql(user_question, "無相似問題")
387
- log_messages.append("✅ 智能生成完成")
 
388
 
389
- return intelligent_sql, "\n".join(log_messages)
390
 
391
  # ==================== 初始化系統 ====================
392
  if HF_TOKEN is None:
 
66
  analysis = {
67
  "type": "unknown",
68
  "keywords": [],
69
+ "has_count": "多少" in question_lower or "幾個" in question_lower or "數量" in question_lower or "count" in question_lower,
70
+ "has_date": "時間" in question_lower or "日期" in question_lower or "月份" in question_lower or "年" in question_lower or "yesterday" in question_lower or "昨天" in question_lower,
71
+ "has_group": "每" in question_lower or "各" in question_lower or "分組" in question_lower or "group" in question_lower,
72
  "specific_intent": "general_query" # 新增:具體意圖,預設為通用查詢
73
  }
74
 
75
+ # **更精確的意圖識別 - 增加更多模式**
76
+ if ("每月" in question_lower or "monthly" in question_lower) and ("完成" in question_lower or "completed" in question_lower or "報告" in question_lower or "工作單" in question_lower):
77
  analysis["specific_intent"] = "monthly_completion_count"
78
  analysis["type"] = "time_series"
79
+ elif ("評級" in question_lower or "pass" in question_lower or "fail" in question_lower or "rating" in question_lower) and ("統計" in question_lower or "分佈" in question_lower or "多少" in question_lower or "distribution" in question_lower):
80
  analysis["specific_intent"] = "rating_distribution"
81
  analysis["type"] = "statistics"
82
+ elif ("金額" in question_lower or "amount" in question_lower or "價格" in question_lower or "費用" in question_lower) and ("最高" in question_lower or "top" in question_lower or "排名" in question_lower or "highest" in question_lower):
83
  analysis["specific_intent"] = "amount_ranking"
84
  analysis["type"] = "ranking"
85
+ elif ("公司" in question_lower or "客戶" in question_lower or "申請方" in question_lower or "company" in question_lower or "client" in question_lower) and ("統計" in question_lower or "數量" in question_lower or "排名" in question_lower or "count" in question_lower):
86
  analysis["specific_intent"] = "company_statistics"
87
  analysis["type"] = "statistics"
88
+ elif ("實驗室" in question_lower or "lab" in question_lower or "組" in question_lower) and ("完成" in question_lower or "completed" in question_lower):
89
+ analysis["specific_intent"] = "lab_completion"
90
+ analysis["type"] = "lab_specific"
91
+ elif ("異常" in question_lower or "超過" in question_lower or "延遲" in question_lower or "slow" in question_lower or "long" in question_lower):
92
+ analysis["specific_intent"] = "anomaly_detection"
93
+ analysis["type"] = "analysis"
94
+ elif ("買方" in question_lower or "buyer" in question_lower) and ("完成" in question_lower or "completed" in question_lower):
95
+ analysis["specific_intent"] = "buyer_specific"
96
+ analysis["type"] = "buyer_analysis"
97
+ elif ("耗時" in question_lower or "時間" in question_lower or "duration" in question_lower or "time" in question_lower) and ("最久" in question_lower or "longest" in question_lower):
98
+ analysis["specific_intent"] = "duration_analysis"
99
+ analysis["type"] = "time_analysis"
100
+
101
+ # 提取關鍵詞以供後續使用
102
+ keywords = []
103
+ # 公司/品牌名稱
104
+ brand_patterns = [r"puma", r"under armour", r"skechers", r"nike", r"adidas"]
105
+ for pattern in brand_patterns:
106
+ if re.search(pattern, question_lower):
107
+ keywords.append(pattern.replace(" ", "_"))
108
+
109
+ # 實驗室組別
110
+ lab_patterns = [r"[a-e]組", r"ta", r"tb", r"tc", r"td", r"te"]
111
+ for pattern in lab_patterns:
112
+ if re.search(pattern, question_lower):
113
+ keywords.append(pattern)
114
+
115
+ analysis["keywords"] = keywords
116
  return analysis
117
 
118
  # ==================== 完整數據加載模塊 ====================
 
139
  user_content = item['messages'][0]['content']
140
  assistant_content = item['messages'][1]['content']
141
 
142
+ # 多種問題提取策略
143
+ question = None
144
+
145
+ # 策略1: 標準「指令:」格式
146
  question_match = re.search(r'指令:\s*(.*?)(?:\n|$)', user_content)
147
  if question_match:
148
  question = question_match.group(1).strip()
149
+
150
+ # 策略2: 如果沒找到,嘗試提取最後一行非空內容
151
+ if not question:
152
+ lines = [line.strip() for line in user_content.split('\n') if line.strip()]
153
+ if lines:
154
+ question = lines[-1]
155
+
156
+ # 策略3: 直接使用整個內容(作為最後手段)
157
+ if not question:
158
  question = user_content.strip()
159
 
160
+ # 多種SQL提取策略
161
+ sql_query = None
162
+
163
+ # 策略1: 標準「SQL查詢:」格式
164
  sql_match = re.search(r'SQL查詢:\s*(.*?)(?:\n|$)', assistant_content, re.DOTALL)
165
  if sql_match:
166
  sql_query = sql_match.group(1).strip()
167
+
168
+ # 策略2: SQL代碼塊格式
169
+ if not sql_query:
170
  sql_block_match = re.search(r'```sql\s*(.*?)\s*```', assistant_content, re.DOTALL)
171
  if sql_block_match:
172
  sql_query = sql_block_match.group(1).strip()
173
+
174
+ # 策略3: 查找任何包含 SELECT 的行
175
+ if not sql_query:
176
+ for line in assistant_content.split('\n'):
177
+ if 'SELECT' in line.upper():
178
+ # 從這行開始提取到最後或到下個非SQL行
179
+ sql_lines = []
180
+ found_start = False
181
+ for l in assistant_content.split('\n'):
182
+ if 'SELECT' in l.upper():
183
+ found_start = True
184
+ if found_start:
185
+ if l.strip() and not l.strip().startswith('```'):
186
+ sql_lines.append(l)
187
+ elif l.strip() == '' and sql_lines:
188
+ continue
189
+ elif found_start and len(sql_lines) > 0:
190
+ break
191
+ if sql_lines:
192
+ sql_query = '\n'.join(sql_lines).strip()
193
+ break
194
+
195
+ # 策略4: 如果還是沒找到,使用整個assistant內容
196
+ if not sql_query:
197
+ sql_query = assistant_content.strip()
198
 
199
  # 清理SQL查詢
200
+ if sql_query:
201
+ sql_query = re.sub(r'```sql|```', '', sql_query).strip()
202
+ sql_query = re.sub(r'^思考過程:.*?\n', '', sql_query, flags=re.MULTILINE).strip()
203
+ sql_query = re.sub(r'^SQL查詢:\s*', '', sql_query, flags=re.MULTILINE).strip()
204
 
205
+ # 數據質量驗證(降低標準以提高利用率)
206
  if not question or len(question.strip()) < 3:
207
  skipped_reasons["empty_question"] += 1
208
  continue
209
 
210
+ if not sql_query or len(sql_query.strip()) < 5: # 降低最小長度要求
211
  skipped_reasons["empty_sql"] += 1
212
  continue
213
 
214
+ # 更寬鬆的SQL驗證
215
+ sql_upper = sql_query.upper()
216
+ if "SELECT" not in sql_upper and "WITH" not in sql_upper:
217
  skipped_reasons["invalid_format"] += 1
218
  continue
219
 
220
  self.questions.append(question)
221
  self.sql_answers.append(sql_query)
222
  successful_loads += 1
223
+
224
+ # 調試:顯示前幾個成功案例
225
+ if successful_loads <= 3:
226
+ print(f"成功案例 {successful_loads}:")
227
+ print(f" 問題: {question[:50]}...")
228
+ print(f" SQL: {sql_query[:50]}...")
229
+
230
  else:
231
  skipped_reasons["invalid_format"] += 1
232
 
233
  except Exception as e:
234
  skipped_reasons["parse_error"] += 1
235
+ if idx < 3: # 只顯示前3個錯誤
236
  print(f"跳過第 {idx} 項資料,錯誤: {e}")
237
  continue
238
 
 
328
  year_match = re.search(r'(\d{4})', text)
329
  return year_match.group(1) if year_match else datetime.now().strftime('%Y')
330
 
331
+ def call_free_cloud_ai(self, user_question: str) -> str:
332
+ """調用免費雲端AI生成SQL - 當本地方法無法處理時的備選方案"""
333
+ try:
334
+ # 構建包含schema的prompt
335
+ schema_info = json.dumps(self.data_loader.schema_data, ensure_ascii=False, indent=2)
336
+
337
+ prompt = f"""你是一個SQL專家。根據以下資料庫schema和用戶問題,生成準確的SQL查詢。
338
+
339
+ 資料庫Schema:
340
+ {schema_info}
341
+
342
+ 用戶問題: {user_question}
343
+
344
+ 請分析問題並生成對應的SQL查詢。只回傳SQL代碼,不要額外解釋。
345
+
346
+ SQL查詢:"""
347
+
348
+ # 使用 Hugging Face 免費 Inference API
349
+ headers = {"Authorization": f"Bearer {self.hf_token}"} if self.hf_token else {}
350
+
351
+ # 嘗試多個免費模型
352
+ models_to_try = [
353
+ "microsoft/DialoGPT-medium", # 對話模型
354
+ "google/flan-t5-large", # 指令跟隨模型
355
+ "bigscience/bloom-560m" # 通用生成模型
356
+ ]
357
+
358
+ for model in models_to_try:
359
+ try:
360
+ url = f"https://api-inference.huggingface.co/models/{model}"
361
+ response = requests.post(
362
+ url,
363
+ headers=headers,
364
+ json={"inputs": prompt, "parameters": {"max_length": 512, "temperature": 0.1}},
365
+ timeout=30
366
+ )
367
+
368
+ if response.status_code == 200:
369
+ result = response.json()
370
+ if isinstance(result, list) and len(result) > 0:
371
+ generated_text = result[0].get('generated_text', '')
372
+ # 提取SQL部分
373
+ sql_match = re.search(r'SELECT.*?;', generated_text, re.DOTALL | re.IGNORECASE)
374
+ if sql_match:
375
+ return f"-- 由免費雲端AI ({model}) 生成\n{sql_match.group(0)}"
376
+
377
+ except Exception as e:
378
+ print(f"模型 {model} 調用失敗: {e}")
379
+ continue
380
+
381
+ # 如果所有模型都失敗,返回基於意圖的本地生成
382
+ return self.generate_fallback_sql(user_question)
383
+
384
+ except Exception as e:
385
+ print(f"雲端AI調用失敗: {e}")
386
+ return self.generate_fallback_sql(user_question)
387
+
388
+ def generate_fallback_sql(self, user_question: str) -> str:
389
+ """當所有方法都失敗時的後備SQL生成"""
390
+ analysis = analyze_question_type(user_question)
391
+
392
+ # 基於關鍵詞的簡單SQL生成
393
+ question_lower = user_question.lower()
394
+
395
+ if "工作單" in question_lower or "job" in question_lower:
396
+ if "數量" in question_lower or "多少" in question_lower:
397
+ return """-- 後備方案:工作單數量查詢
398
+ SELECT COUNT(*) as 工作單總數
399
+ FROM TSR53SampleDescription
400
+ WHERE ApplicantName IS NOT NULL;"""
401
+ else:
402
+ return """-- 後備方案:工作單列表查詢
403
+ SELECT JobNo, ApplicantName, BuyerName, OverallRating
404
  FROM TSR53SampleDescription
405
+ WHERE ApplicantName IS NOT NULL
406
  LIMIT 20;"""
407
 
408
+ elif "評級" in question_lower or "rating" in question_lower:
409
+ return """-- 後備方案:評級統計查詢
410
+ SELECT OverallRating, COUNT(*) as 數量
411
+ FROM TSR53SampleDescription
412
+ WHERE OverallRating IS NOT NULL
413
+ GROUP BY OverallRating;"""
414
+
415
+ elif "金額" in question_lower or "amount" in question_lower:
416
+ return """-- 後備方案:金額統計查詢
417
+ SELECT JobNo, LocalAmount
418
+ FROM TSR53Invoice
419
+ WHERE LocalAmount IS NOT NULL
420
+ ORDER BY LocalAmount DESC
421
+ LIMIT 10;"""
422
+
423
+ # 默認通用查詢
424
+ return """-- 後備方案:通用查詢
425
+ SELECT JobNo, ApplicantName, BuyerName
426
+ FROM TSR53SampleDescription
427
+ LIMIT 10;"""
428
+
429
  def intelligent_repair_sql(self, user_question: str, similar_question: str) -> str:
430
+ """智能修復SQL - 基於當前使用者問題的意圖 (擴展版本)"""
431
  analysis = analyze_question_type(user_question)
432
  intent = analysis["specific_intent"]
433
+ keywords = analysis["keywords"]
434
 
435
  if similar_question != "無相似問題":
436
  comment = f"-- 根據類似問題 '{similar_question}' (原SQL無效) 進行智能修復\n"
 
440
  if intent == "monthly_completion_count":
441
  year = self.extract_year(user_question)
442
  return comment + f"""-- 查詢 {year} 年每月完成的工作單數量
443
+ SELECT
444
+ strftime('%Y-%m', jt.ReportAuthorization) as 月份,
445
+ COUNT(*) as 完成數量
446
+ FROM JobTimeline jt
447
+ WHERE strftime('%Y', jt.ReportAuthorization) = '{year}'
448
+ AND jt.ReportAuthorization IS NOT NULL
449
+ GROUP BY strftime('%Y-%m', jt.ReportAuthorization)
450
+ ORDER BY 月份;"""
451
+
452
+ elif intent == "lab_completion":
453
+ # 實驗室特定查詢
454
+ lab_mapping = {"a組": "TA", "b組": "TB", "c組": "TC", "d組": "TD", "e組": "TE"}
455
+ lab_code = None
456
+ for chinese, code in lab_mapping.items():
457
+ if chinese in user_question.lower():
458
+ lab_code = code
459
+ break
460
+
461
+ if lab_code:
462
+ return comment + f"""-- 查詢{lab_code}實驗室完成的測試項目
463
+ SELECT COUNT(*) as 完成數量
464
+ FROM JobTimeline_{lab_code}
465
+ WHERE DATE(end_time) = DATE('now','-1 day');"""
466
+ else:
467
+ return comment + """-- 通用實驗室查詢
468
+ SELECT COUNT(*) as 總完成數量
469
+ FROM JobTimeline
470
+ WHERE ReportAuthorization IS NOT NULL;"""
471
+
472
+ elif intent == "buyer_specific":
473
+ # 買方特定查詢
474
+ buyer_name = "Unknown"
475
+ for keyword in keywords:
476
+ if keyword in ["puma", "under_armour", "skechers", "nike", "adidas"]:
477
+ buyer_name = keyword.replace("_", " ").title()
478
+ break
479
+
480
+ return comment + f"""-- 查詢買方 {buyer_name} 的已完成工作單
481
+ SELECT sd.JobNo, sd.BuyerName, jt.ReportAuthorization
482
+ FROM TSR53SampleDescription sd
483
+ JOIN JobTimeline jt ON jt.JobNo = sd.JobNo
484
+ WHERE sd.BuyerName LIKE '%{buyer_name}%'
485
+ AND jt.ReportAuthorization IS NOT NULL
486
+ ORDER BY jt.ReportAuthorization DESC;"""
487
+
488
+ elif intent == "duration_analysis":
489
+ return comment + """-- 查詢從 LabIn 到 LabOut 耗時最久的工作單
490
+ SELECT JobNo,
491
+ ROUND(julianday(LabOut) - julianday(LabIn), 2) AS 耗時天數
492
+ FROM JobTimeline
493
+ WHERE LabIn IS NOT NULL AND LabOut IS NOT NULL
494
+ ORDER BY 耗時天數 DESC
495
+ LIMIT 5;"""
496
+
497
+ elif intent == "anomaly_detection":
498
+ return comment + """-- 查詢從創建到授權超過 14 天的異常工單
499
+ SELECT JobNo,
500
+ ROUND(julianday(ReportAuthorization) - julianday(JobCreation), 2) AS 處理天數
501
+ FROM JobTimeline
502
+ WHERE JobCreation IS NOT NULL
503
+ AND ReportAuthorization IS NOT NULL
504
+ AND (julianday(ReportAuthorization) - julianday(JobCreation)) > 14
505
+ ORDER BY 處理天數 DESC
506
+ LIMIT 20;"""
507
 
508
  elif intent == "rating_distribution":
509
  return comment + """-- 查詢評級分佈統計
510
+ SELECT
511
+ OverallRating as 評級,
512
+ COUNT(*) as 數量,
513
+ ROUND(COUNT(*) * 100.0 / (
514
+ SELECT COUNT(*)
515
+ FROM TSR53SampleDescription
516
+ WHERE OverallRating IS NOT NULL
517
+ ), 2) as 百分比
518
+ FROM TSR53SampleDescription
519
+ WHERE OverallRating IS NOT NULL
520
+ GROUP BY OverallRating
521
+ ORDER BY 數量 DESC;"""
522
 
523
  elif intent == "amount_ranking":
524
  return comment + """-- 查詢工作單金額排名
525
+ WITH JobTotalAmount AS (
526
+ SELECT JobNo, SUM(LocalAmount) AS TotalAmount
527
+ FROM (
528
+ SELECT DISTINCT JobNo, InvoiceCreditNoteNo, LocalAmount
529
+ FROM TSR53Invoice
530
+ WHERE LocalAmount IS NOT NULL
 
 
531
  )
532
+ GROUP BY JobNo
533
+ )
534
+ SELECT
535
+ jta.JobNo as 工作單號,
536
+ sd.ApplicantName as 申請方,
537
+ jta.TotalAmount as 總金額
538
+ FROM JobTotalAmount jta
539
+ JOIN TSR53SampleDescription sd ON sd.JobNo = jta.JobNo
540
+ WHERE sd.ApplicantName IS NOT NULL
541
+ ORDER BY jta.TotalAmount DESC
542
+ LIMIT 10;"""
543
 
544
  elif intent == "company_statistics":
545
  return comment + """-- 查詢申請方工作單統計
546
+ SELECT
547
+ ApplicantName as 申請方名稱,
548
+ COUNT(*) as 工作單數量
549
+ FROM TSR53SampleDescription
550
+ WHERE ApplicantName IS NOT NULL
551
+ GROUP BY ApplicantName
552
+ ORDER BY 工作單數量 DESC
553
+ LIMIT 20;"""
554
 
555
  # 通用查詢模板
556
  return comment + """-- 通用查詢範本
557
+ SELECT
558
+ JobNo as 工作單號,
559
+ ApplicantName as 申請方,
560
+ BuyerName as 買方,
561
+ OverallRating as 評級
562
+ FROM TSR53SampleDescription
563
+ WHERE ApplicantName IS NOT NULL
564
+ LIMIT 20;"""
565
 
566
  def generate_sql(self, user_question: str) -> Tuple[str, str]:
567
+ """主流程:生成SQL查詢 (雲端AI增強版本)"""
568
  log_messages = [f"⏰ {get_current_time()} 開始處理問題: '{user_question[:50]}...'"]
569
 
570
  if not user_question or not user_question.strip():
 
585
 
586
  log_messages.append(f"🔍 找到相似問題 (相似度: {similarity_score:.3f}): '{similar_question[:50]}...'")
587
 
588
+ # 降低相似度閾值,增加匹配機會
589
+ if similarity_score > max(SIMILARITY_THRESHOLD - 0.1, 0.5):
590
  original_sql = self.data_loader.sql_answers[corpus_id]
591
  validation = validate_sql(original_sql)
592
 
593
  if validation["valid"] and validation["is_safe"]:
594
+ log_messages.append("✅ 相似度較高且原SQL有效,直接採用")
595
  return original_sql, "\n".join(log_messages)
596
  else:
597
  log_messages.append(f"⚠️ 原SQL有問題: {', '.join(validation['issues'])}")
 
600
  log_messages.append("✅ 智能修復完成")
601
  return repaired_sql, "\n".join(log_messages)
602
  else:
603
+ log_messages.append(f"📉 相似度 ({similarity_score:.3f}) 較低,嘗試其他方法")
604
+
605
+ # 3. 嘗試基於意圖的本地生成
606
+ if analysis["specific_intent"] != "general_query":
607
+ log_messages.append("🤖 使用意圖導向生成")
608
+ intelligent_sql = self.intelligent_repair_sql(user_question, "無相似問題")
609
+ validation = validate_sql(intelligent_sql)
610
+
611
+ if validation["valid"]:
612
+ log_messages.append("✅ 意圖導向生成成功")
613
+ return intelligent_sql, "\n".join(log_messages)
614
+ else:
615
+ log_messages.append("⚠️ 意圖導向生成結果有問題,嘗試雲端AI")
616
 
617
+ # 4. 調用免費雲端AI(針對未見過的問題)
618
+ log_messages.append("🌐 調用免費雲端AI處理未見過的問題...")
619
+ cloud_sql = self.call_free_cloud_ai(user_question)
620
+ log_messages.append("✅ 雲端AI回應完成")
621
 
622
+ return cloud_sql, "\n".join(log_messages)
623
 
624
  # ==================== 初始化系統 ====================
625
  if HF_TOKEN is None: