Spaces:

Paul720810
/

Softline-SQL-Assistant

Sleeping

App Files Files Community

Paul720810 commited on Sep 5, 2025

Commit

85e2894

verified ·

1 Parent(s): 931be3f

Update app.py

Browse files

Files changed (1) hide show

app.py +94 -29

app.py CHANGED Viewed

@@ -89,41 +89,59 @@ class TextToSQLSystem:
         """載入 GGUF 模型並處理錯誤"""
         try:
             self._log("載入 GGUF 模型...")
             model_path = hf_hub_download(
                 repo_id=GGUF_REPO_ID,
                 filename=GGUF_FILENAME,
-                repo_type="dataset"
             )
-            # 檢查文件完整性
-            file_size = os.path.getsize(model_path)
-            expected_size = 986 * 1024 * 1024  # 986MB
-            if file_size != expected_size:
-                self._log(f"⚠️ 文件大小不匹配: {file_size} != {expected_size}", "WARNING")
-                # 重新下載
-                os.remove(model_path)
                 model_path = hf_hub_download(
                     repo_id=GGUF_REPO_ID,
                     filename=GGUF_FILENAME,
                     repo_type="dataset",
                     force_download=True
                 )
-            # 使用更兼容的參數
             self.llm = Llama(
                 model_path=model_path,
-                n_ctx=1024,
-                n_threads=max(2, os.cpu_count() - 1),  # 留一個核心給系統
-                n_batch=256,
-                verbose=True,  # 開啟詳細日誌
-                n_gpu_layers=0  # 強制使用CPU
             )
-            self._log("✅ GGUF 模型載入成功")
         except Exception as e:
-            self._log(f"❌ GGUF 模型載入失敗: {e}", "ERROR")
-            self._log("嘗試使用備用載入方式...")
-            self._load_gguf_model_fallback(model_path)
     def _load_gguf_model_fallback(self, model_path):
         """備用載入方式"""
         try:
@@ -320,29 +338,76 @@ class TextToSQLSystem:
         return prompt
     def huggingface_api_call(self, prompt: str) -> str:
-        """使用 GGUF 模型生成"""
         if self.llm is None:
-            return "模型未載入"
         try:
-            # 檢查prompt長度
-            if len(prompt) > 1800:
-                prompt = prompt[:1800] + "..."
             output = self.llm(
                 prompt,
-                max_tokens=256,
-                temperature=0.1,
-                top_p=0.9,
-                stop=["</s>", "```", ";", "\n\n"],
                 echo=False
             )
-            return output["choices"][0]["text"].strip()
         except Exception as e:
             self._log(f"❌ 生成失敗: {e}", "ERROR")
-            return f"生成失敗: {e}"
     def process_question(self, question: str) -> Tuple[str, str]:
         """處理使用者問題"""
         # 檢查緩存

         """載入 GGUF 模型並處理錯誤"""
         try:
             self._log("載入 GGUF 模型...")
+            # 強制重新下載模型
             model_path = hf_hub_download(
                 repo_id=GGUF_REPO_ID,
                 filename=GGUF_FILENAME,
+                repo_type="dataset",
+                force_download=True  # 強制重新下載
             )
+            # 使用驗證方法檢查檔案
+            if not self._validate_model_file(model_path):
+                self._log("❌ 模型檔案驗證失敗，嘗試重新下載", "ERROR")
+                # 刪除損壞的檔案並重新下載
+                if os.path.exists(model_path):
+                    os.remove(model_path)
                 model_path = hf_hub_download(
                     repo_id=GGUF_REPO_ID,
                     filename=GGUF_FILENAME,
                     repo_type="dataset",
                     force_download=True
                 )
+                # 再次驗證
+                if not self._validate_model_file(model_path):
+                    raise ValueError("重新下載後檔案仍然無效")
+            # 使用更保守的參數載入模型
             self.llm = Llama(
                 model_path=model_path,
+                n_ctx=512,          # 減少上下文長度
+                n_threads=4,        # 固定線程數
+                n_batch=128,        # 減少批次大小
+                verbose=False,      # 關閉詳細輸出
+                use_mmap=True,      # 使用記憶體映射
+                use_mlock=False,    # 不鎖定記憶體
+                n_gpu_layers=0      # 強制使用 CPU
             )
+            # 測試模型是否能正常生成
+            test_output = self.llm("SELECT", max_tokens=5, temperature=0.1)
+            if not test_output or 'choices' not in test_output:
+                raise RuntimeError("模型載入後無法正常生成")
+            self._log("✅ GGUF 模型載入並測試成功")
         except Exception as e:
+            self._log(f"❌ GGUF 模型載入失敗: {str(e)}", "ERROR")
+            self._log("嘗試使用替代方案...", "INFO")
+            self.llm = None
+            # 可以在這裡添加使用其他模型的邏輯
+            # 例如使用 Hugging Face Transformers 的備用方案
     def _load_gguf_model_fallback(self, model_path):
         """備用載入方式"""
         try:
         return prompt
     def huggingface_api_call(self, prompt: str) -> str:
+        """使用 GGUF 模型生成或提供替代方案"""
         if self.llm is None:
+            # 返回基於規則的簡單 SQL 生成
+            return self._generate_fallback_sql(prompt)
         try:
+            if len(prompt) > 1500:  # 縮短提示長度
+                prompt = prompt[:1500] + "..."
             output = self.llm(
                 prompt,
+                max_tokens=128,     # 減少最大 token 數
+                temperature=0.0,    # 使用確定性生成
+                top_p=0.95,
+                stop=["</s>", "```", "\n\n", "問題:"],  # 添加更多停止詞
                 echo=False
             )
+            if output and 'choices' in output and output['choices']:
+                return output["choices"][0]["text"].strip()
+            else:
+                return "模型生成失敗"
         except Exception as e:
             self._log(f"❌ 生成失敗: {e}", "ERROR")
+            return self._generate_fallback_sql(prompt)
+    def _generate_fallback_sql(self, prompt: str) -> str:
+        """當模型不可用時的備用 SQL 生成"""
+        prompt_lower = prompt.lower()
+        # 簡單的關鍵詞匹配生成基本 SQL
+        if "統計" in prompt or "數量" in prompt or "多少" in prompt:
+            if "月" in prompt:
+                return "SELECT strftime('%Y-%m', completed_time) as month, COUNT(*) as count FROM jobtimeline GROUP BY month ORDER BY month;"
+            elif "客戶" in prompt:
+                return "SELECT applicant, COUNT(*) as count FROM tsr53sampledescription GROUP BY applicant ORDER BY count DESC;"
+            else:
+                return "SELECT COUNT(*) as total_count FROM jobtimeline WHERE completed_time IS NOT NULL;"
+        elif "金額" in prompt or "總額" in prompt:
+            return "SELECT SUM(amount) as total_amount FROM tsr53invoice;"
+        elif "評級" in prompt or "pass" in prompt_lower or "fail" in prompt_lower:
+            return "SELECT rating, COUNT(*) as count FROM tsr53sampledescription GROUP BY rating;"
+        else:
+            return "SELECT * FROM jobtimeline LIMIT 10;"
+    def _validate_model_file(self, model_path):
+        """驗證模型檔案完整性"""
+        try:
+            if not os.path.exists(model_path):
+                return False
+            # 檢查檔案大小（至少應該有幾MB）
+            file_size = os.path.getsize(model_path)
+            if file_size < 10 * 1024 * 1024:  # 小於 10MB 可能有問題
+                return False
+            # 檢查 GGUF 檔案頭部
+            with open(model_path, 'rb') as f:
+                header = f.read(8)
+                if not header.startswith(b'GGUF'):
+                    return False
+            return True
+        except Exception:
+            return False
     def process_question(self, question: str) -> Tuple[str, str]:
         """處理使用者問題"""
         # 檢查緩存