Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -147,16 +147,21 @@ class TextToSQLSystem:
|
|
| 147 |
# 檢查內存狀況
|
| 148 |
self._log(check_memory_usage())
|
| 149 |
|
| 150 |
-
# 1.
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
|
| 161 |
# 2. 載入數據庫結構
|
| 162 |
self.schema = self._load_schema()
|
|
@@ -263,7 +268,8 @@ class TextToSQLSystem:
|
|
| 263 |
temperature=0.1,
|
| 264 |
top_p=0.9,
|
| 265 |
echo=False,
|
| 266 |
-
|
|
|
|
| 267 |
)
|
| 268 |
elapsed = (datetime.now() - start_ts).total_seconds()
|
| 269 |
self._log(f"推論耗時: {elapsed:.2f}s", "DEBUG")
|
|
@@ -311,6 +317,10 @@ class TextToSQLSystem:
|
|
| 311 |
"""編碼文本為嵌入向量"""
|
| 312 |
if isinstance(texts, str):
|
| 313 |
texts = [texts]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 314 |
inputs = self.embed_tokenizer(texts, padding=True, truncation=True,
|
| 315 |
return_tensors="pt", max_length=512)
|
| 316 |
# 移動到對應設備
|
|
@@ -641,6 +651,19 @@ SELECT
|
|
| 641 |
'group_by': [], 'order_by': [], 'log_parts': []
|
| 642 |
}
|
| 643 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 644 |
# 動作意圖:count / list
|
| 645 |
if any(kw in q_lower for kw in ['幾份', '份數', '份数', '多少', '數量', '總數', 'how many', 'count']):
|
| 646 |
intents['action'] = 'count'
|
|
|
|
| 147 |
# 檢查內存狀況
|
| 148 |
self._log(check_memory_usage())
|
| 149 |
|
| 150 |
+
# 1. 嵌入模型(在禁用索引時略過以節省記憶體)
|
| 151 |
+
if ENABLE_INDEX:
|
| 152 |
+
self._log(f"載入嵌入模型: {embed_model_name}")
|
| 153 |
+
self.embed_tokenizer = AutoTokenizer.from_pretrained(embed_model_name)
|
| 154 |
+
self.embed_model = AutoModel.from_pretrained(embed_model_name)
|
| 155 |
+
try:
|
| 156 |
+
self.embed_model.to(self.embed_device)
|
| 157 |
+
self._log(f"嵌入模型設備: {self.embed_device}")
|
| 158 |
+
except Exception as e:
|
| 159 |
+
self._log(f"將嵌入模型移動到設備失敗: {e}", "WARNING")
|
| 160 |
+
self.embed_device = "cpu"
|
| 161 |
+
else:
|
| 162 |
+
self.embed_tokenizer = None
|
| 163 |
+
self.embed_model = None
|
| 164 |
+
self._log("ENABLE_INDEX=0,略過嵌入模型載入以節省記憶體")
|
| 165 |
|
| 166 |
# 2. 載入數據庫結構
|
| 167 |
self.schema = self._load_schema()
|
|
|
|
| 268 |
temperature=0.1,
|
| 269 |
top_p=0.9,
|
| 270 |
echo=False,
|
| 271 |
+
# 避免在分號處截斷
|
| 272 |
+
stop=["```", "\n\n", "</s>"],
|
| 273 |
)
|
| 274 |
elapsed = (datetime.now() - start_ts).total_seconds()
|
| 275 |
self._log(f"推論耗時: {elapsed:.2f}s", "DEBUG")
|
|
|
|
| 317 |
"""編碼文本為嵌入向量"""
|
| 318 |
if isinstance(texts, str):
|
| 319 |
texts = [texts]
|
| 320 |
+
if (self.embed_model is None) or (self.embed_tokenizer is None):
|
| 321 |
+
# 在禁用索引情況下不應被呼叫;保險處理
|
| 322 |
+
self._log("嵌入模型未載入(ENABLE_INDEX=0),_encode_texts 被略過。", "WARNING")
|
| 323 |
+
return torch.empty((len(texts), 384)) # 回傳空張量佔位
|
| 324 |
inputs = self.embed_tokenizer(texts, padding=True, truncation=True,
|
| 325 |
return_tensors="pt", max_length=512)
|
| 326 |
# 移動到對應設備
|
|
|
|
| 651 |
'group_by': [], 'order_by': [], 'log_parts': []
|
| 652 |
}
|
| 653 |
|
| 654 |
+
# 先處理多年份比較:如 "2021 與 2022 比較"、"2021年跟2022年對比"
|
| 655 |
+
years = re.findall(r"(20\d{2})\s*年?", q)
|
| 656 |
+
is_compare = re.search(r"比較|對比|對照|compare|versus|vs\.?", q)
|
| 657 |
+
if len(set(years)) >= 2 and is_compare:
|
| 658 |
+
ys = sorted(set(years))[:4]
|
| 659 |
+
want_items = ("測試項目" in q) or ("item" in q_lower)
|
| 660 |
+
select_expr = "COUNT(jip.ItemCode) AS item_count" if want_items else "COUNT(DISTINCT jt.JobNo) AS report_count"
|
| 661 |
+
join_items = "JOIN JobItemsInProgress AS jip ON jt.JobNo = jip.JobNo" if want_items else ""
|
| 662 |
+
where_years = " OR ".join([f"strftime('%Y', jt.ReportAuthorization) = '{y}'" for y in ys])
|
| 663 |
+
template = f"SELECT strftime('%Y', jt.ReportAuthorization) AS 年份, {select_expr} FROM JobTimeline AS jt {join_items} WHERE jt.ReportAuthorization IS NOT NULL AND ({where_years}) GROUP BY 年份 ORDER BY 年份;"
|
| 664 |
+
self._log(f"🔄 多年份比較模板: years={','.join(ys)} items={want_items}")
|
| 665 |
+
return self._finalize_sql(template, f"模板覆寫: {','.join(ys)} 年比較")
|
| 666 |
+
|
| 667 |
# 動作意圖:count / list
|
| 668 |
if any(kw in q_lower for kw in ['幾份', '份數', '份数', '多少', '數量', '總數', 'how many', 'count']):
|
| 669 |
intents['action'] = 'count'
|