Paul720810 commited on
Commit
892956c
·
verified ·
1 Parent(s): 6e5e1b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -11
app.py CHANGED
@@ -147,16 +147,21 @@ class TextToSQLSystem:
147
  # 檢查內存狀況
148
  self._log(check_memory_usage())
149
 
150
- # 1. 載入嵌入模型
151
- self._log(f"載入嵌入模型: {embed_model_name}")
152
- self.embed_tokenizer = AutoTokenizer.from_pretrained(embed_model_name)
153
- self.embed_model = AutoModel.from_pretrained(embed_model_name)
154
- try:
155
- self.embed_model.to(self.embed_device)
156
- self._log(f"嵌入模型設備: {self.embed_device}")
157
- except Exception as e:
158
- self._log(f"將嵌入模型移動到設備失敗: {e}", "WARNING")
159
- self.embed_device = "cpu"
 
 
 
 
 
160
 
161
  # 2. 載入數據庫結構
162
  self.schema = self._load_schema()
@@ -263,7 +268,8 @@ class TextToSQLSystem:
263
  temperature=0.1,
264
  top_p=0.9,
265
  echo=False,
266
- stop=["```", ";", "\n\n", "</s>"],
 
267
  )
268
  elapsed = (datetime.now() - start_ts).total_seconds()
269
  self._log(f"推論耗時: {elapsed:.2f}s", "DEBUG")
@@ -311,6 +317,10 @@ class TextToSQLSystem:
311
  """編碼文本為嵌入向量"""
312
  if isinstance(texts, str):
313
  texts = [texts]
 
 
 
 
314
  inputs = self.embed_tokenizer(texts, padding=True, truncation=True,
315
  return_tensors="pt", max_length=512)
316
  # 移動到對應設備
@@ -641,6 +651,19 @@ SELECT
641
  'group_by': [], 'order_by': [], 'log_parts': []
642
  }
643
 
 
 
 
 
 
 
 
 
 
 
 
 
 
644
  # 動作意圖:count / list
645
  if any(kw in q_lower for kw in ['幾份', '份數', '份数', '多少', '數量', '總數', 'how many', 'count']):
646
  intents['action'] = 'count'
 
147
  # 檢查內存狀況
148
  self._log(check_memory_usage())
149
 
150
+ # 1. 嵌入模型(在禁用索引時略過以節省記憶體)
151
+ if ENABLE_INDEX:
152
+ self._log(f"載入嵌入模型: {embed_model_name}")
153
+ self.embed_tokenizer = AutoTokenizer.from_pretrained(embed_model_name)
154
+ self.embed_model = AutoModel.from_pretrained(embed_model_name)
155
+ try:
156
+ self.embed_model.to(self.embed_device)
157
+ self._log(f"嵌入模型設備: {self.embed_device}")
158
+ except Exception as e:
159
+ self._log(f"將嵌入模型移動到設備失敗: {e}", "WARNING")
160
+ self.embed_device = "cpu"
161
+ else:
162
+ self.embed_tokenizer = None
163
+ self.embed_model = None
164
+ self._log("ENABLE_INDEX=0,略過嵌入模型載入以節省記憶體")
165
 
166
  # 2. 載入數據庫結構
167
  self.schema = self._load_schema()
 
268
  temperature=0.1,
269
  top_p=0.9,
270
  echo=False,
271
+ # 避免在分號處截斷
272
+ stop=["```", "\n\n", "</s>"],
273
  )
274
  elapsed = (datetime.now() - start_ts).total_seconds()
275
  self._log(f"推論耗時: {elapsed:.2f}s", "DEBUG")
 
317
  """編碼文本為嵌入向量"""
318
  if isinstance(texts, str):
319
  texts = [texts]
320
+ if (self.embed_model is None) or (self.embed_tokenizer is None):
321
+ # 在禁用索引情況下不應被呼叫;保險處理
322
+ self._log("嵌入模型未載入(ENABLE_INDEX=0),_encode_texts 被略過。", "WARNING")
323
+ return torch.empty((len(texts), 384)) # 回傳空張量佔位
324
  inputs = self.embed_tokenizer(texts, padding=True, truncation=True,
325
  return_tensors="pt", max_length=512)
326
  # 移動到對應設備
 
651
  'group_by': [], 'order_by': [], 'log_parts': []
652
  }
653
 
654
+ # 先處理多年份比較:如 "2021 與 2022 比較"、"2021年跟2022年對比"
655
+ years = re.findall(r"(20\d{2})\s*年?", q)
656
+ is_compare = re.search(r"比較|對比|對照|compare|versus|vs\.?", q)
657
+ if len(set(years)) >= 2 and is_compare:
658
+ ys = sorted(set(years))[:4]
659
+ want_items = ("測試項目" in q) or ("item" in q_lower)
660
+ select_expr = "COUNT(jip.ItemCode) AS item_count" if want_items else "COUNT(DISTINCT jt.JobNo) AS report_count"
661
+ join_items = "JOIN JobItemsInProgress AS jip ON jt.JobNo = jip.JobNo" if want_items else ""
662
+ where_years = " OR ".join([f"strftime('%Y', jt.ReportAuthorization) = '{y}'" for y in ys])
663
+ template = f"SELECT strftime('%Y', jt.ReportAuthorization) AS 年份, {select_expr} FROM JobTimeline AS jt {join_items} WHERE jt.ReportAuthorization IS NOT NULL AND ({where_years}) GROUP BY 年份 ORDER BY 年份;"
664
+ self._log(f"🔄 多年份比較模板: years={','.join(ys)} items={want_items}")
665
+ return self._finalize_sql(template, f"模板覆寫: {','.join(ys)} 年比較")
666
+
667
  # 動作意圖:count / list
668
  if any(kw in q_lower for kw in ['幾份', '份數', '份数', '多少', '數量', '總數', 'how many', 'count']):
669
  intents['action'] = 'count'