Spaces:

Paul720810
/

Softline-SQL-Assistant

Sleeping

App Files Files Community

Paul720810 commited on Sep 13, 2025

Commit

3d52e16

verified ·

1 Parent(s): 097112d

Update app.py

Browse files

Files changed (1) hide show

app.py +545 -660

app.py CHANGED Viewed

@@ -4,6 +4,9 @@ import re
 import json
 import torch
 import numpy as np
 from datetime import datetime
 from datasets import load_dataset
 from huggingface_hub import hf_hub_download
@@ -11,30 +14,51 @@ from llama_cpp import Llama
 from typing import List, Dict, Tuple, Optional
 import faiss
 from functools import lru_cache
-import re
 # 使用 transformers 替代 sentence-transformers
 from transformers import AutoModel, AutoTokenizer
 import torch.nn.functional as F
-# ==================== 配置區 ====================
 DATASET_REPO_ID = "Paul720810/Text-to-SQL-Softline"
 GGUF_REPO_ID = "Paul720810/gguf-models"
-GGUF_FILENAME = "qwen2.5-coder-1.5b-sql-finetuned.q4_k_m.gguf"
-#GGUF_FILENAME = "qwen2.5-coder-1.5b-sql-finetuned.q8_0.gguf"
-# 添加這一行：你的原始微調模型路徑
-FINETUNED_MODEL_PATH = "Paul720810/qwen2.5-coder-1.5b-sql-finetuned"  # ← 新增這行
-FEW_SHOT_EXAMPLES_COUNT = 2
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 EMBED_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
 print("=" * 60)
-print("🤖 Text-to-SQL 系統啟動中...")
-print(f"📊 數據集: {DATASET_REPO_ID}")
-print(f"🤖 嵌入模型: {EMBED_MODEL_NAME}")
-print(f"💻 設備: {DEVICE}")
 print("=" * 60)
 # ==================== 工具函數 ====================
@@ -44,57 +68,70 @@ def get_current_time():
 def format_log(message: str, level: str = "INFO") -> str:
     return f"[{get_current_time()}] [{level.upper()}] {message}"
-def parse_sql_from_response(response_text: str) -> Optional[str]:
-    """更健壯的 SQL 擷取 (multi-line 安全版)"""
-    if not response_text:
-        return None
-    text = response_text.strip()
-    # 1) 取得所有 ```sql / ``` 區塊，優先使用
-    code_blocks = re.findall(r"```(?:sql)?\s*\n([\s\S]*?)```", text, flags=re.IGNORECASE)
-    candidates = []
-    for block in code_blocks:
-        b = block.strip()
-        if 'select' in b.lower():
-            candidates.append(b)
-    # 2) 若無 code block，直接以正則抓第一個 SELECT...; 或到結尾
-    if not candidates:
-        m = re.search(r"SELECT\b[\s\S]*?(?:;|$)", text, flags=re.IGNORECASE)
-        if m:
-            candidates.append(m.group(0).strip())
-    if not candidates:
         return None
-    def clean(sql_raw: str) -> str:
-        # 去除註解行與多餘空白
-        lines = []
-        for line in sql_raw.split('\n'):
-            l = line.strip()
-            if not l:
-                continue
-            if l.startswith('--') or l.startswith('#'):
-                continue
-            lines.append(l)
-        sql_clean = ' '.join(lines)
-        # 移除多個反引號殘留
-        sql_clean = sql_clean.replace('```', '').strip()
-        # 若有多個分號只保留第一個前面內容後加單一分號
-        if ';' in sql_clean:
-            first_part = sql_clean.split(';')[0].strip()
-            sql_clean = first_part
-        if not sql_clean.lower().startswith('select'):
-            return ''
-        if not sql_clean.endswith(';'):
-            sql_clean += ';'
-        return sql_clean
-    for cand in candidates:
-        cleaned = clean(cand)
-        if cleaned:
-            return cleaned
     return None
 # ==================== Text-to-SQL 核心類 ====================
@@ -103,15 +140,21 @@ class TextToSQLSystem:
         self.log_history = []
         self._log("初始化系統...")
         self.query_cache = {}
-        self.backend = None  # 'gguf' | 'transformers' | None
-        self.gguf_llm = None  # 實際 llama.cpp 物件
         # 1. 載入嵌入模型
         self._log(f"載入嵌入模型: {embed_model_name}")
         self.embed_tokenizer = AutoTokenizer.from_pretrained(embed_model_name)
         self.embed_model = AutoModel.from_pretrained(embed_model_name)
-        if DEVICE == "cuda":
-            self.embed_model = self.embed_model.cuda()
         # 2. 載入數據庫結構
         self.schema = self._load_schema()
@@ -119,220 +162,122 @@ class TextToSQLSystem:
         # 3. 載入數據集並建立索引
         self.dataset, self.faiss_index = self._load_and_index_dataset()
-        # 4. 載入 GGUF 模型（添加錯誤處理）
         self._load_gguf_model()
-        self._log("✅ 系統初始化完成")
-        # 載入數據庫結構
-        self.schema = self._load_schema()
-        # 暫時添加：打印 schema 信息
-        if self.schema:
-            print("=" * 50)
-            print("數據庫 Schema 信息:")
-            for table_name, columns in self.schema.items():
-                print(f"\n表格: {table_name}")
-                print(f"欄位數: {len(columns)}")
-                print("欄位列表:")
-                for col in columns[:5]:  # 只顯示前5個
-                    print(f"  - {col['name']} ({col['type']})")
-            print("=" * 50)
-    # in class TextToSQLSystem:
     def _load_gguf_model(self):
-        """載入 GGUF 模型，使用更穩定、簡潔的參數"""
         try:
-            self._log("載入 GGUF 模型 (使用穩定性參數)...")
-            model_path = hf_hub_download(
-                repo_id=GGUF_REPO_ID,
-                filename=GGUF_FILENAME,
-                repo_type="dataset"
-            )
-            # 使用一組更基礎、更穩定的參數來載入模型
-            self.gguf_llm = Llama(
                 model_path=model_path,
-                n_ctx=2048,      # 將上下文增加到 2048 以確保 Prompt 不會超長
-                n_threads=4,     # 保持 4 線程
-                n_batch=512,     # 建議值
-                verbose=False,   # 設為 False 避免 llama.cpp 本身的日誌干擾
-                n_gpu_layers=0   # 確認在 CPU 上運行
             )
-            # 簡單測試模型是否能回應
-            self.gguf_llm("你好", max_tokens=3)
-            self.backend = "gguf"
-            self._log("✅ GGUF 模型載入成功")
         except Exception as e:
-            self._log(f"❌ GGUF 載入失敗: {e}", "ERROR")
-            self._log("系統將無法生成 SQL。請檢查模型檔案或 llama-cpp-python 安裝。", "CRITICAL")
             self.llm = None
-    def _try_gguf_loading(self):
-        """嘗試載入 GGUF"""
         try:
-            model_path = hf_hub_download(
-                repo_id=GGUF_REPO_ID,
-                filename=GGUF_FILENAME,
-                repo_type="dataset"
-            )
-            self.gguf_llm = Llama(
-                model_path=model_path,
-                n_ctx=512,
-                n_threads=4,
-                verbose=False,
-                n_gpu_layers=0
-            )
-            # 測試生成
-            test_result = self.gguf_llm("SELECT", max_tokens=5)
-            self._log("✅ GGUF 模型載入成功")
-            return True
-        except Exception as e:
-            self._log(f"GGUF 載入失敗: {e}", "WARNING")
             return False
-    def _load_transformers_model(self):
-        """使用 Transformers 載入你的微調模型"""
         try:
-            from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
-            import torch
-            self._log(f"載入 Transformers 模型: {FINETUNED_MODEL_PATH}")
-            # 載入你的微調模型
-            self.transformers_tokenizer = AutoTokenizer.from_pretrained(FINETUNED_MODEL_PATH)
-            self.transformers_model = AutoModelForCausalLM.from_pretrained(
-                FINETUNED_MODEL_PATH,
-                torch_dtype=torch.float32,  # CPU 使用 float32
-                device_map="cpu",           # 強制使用 CPU
-                trust_remote_code=True      # Qwen 模型可能需要
-            )
-            # 創建生成管道
-            self.generation_pipeline = pipeline(
-                "text-generation",
-                model=self.transformers_model,
-                tokenizer=self.transformers_tokenizer,
-                device=-1,  # CPU
-                max_length=512,
-                do_sample=True,
                 temperature=0.1,
                 top_p=0.9,
-                pad_token_id=self.transformers_tokenizer.eos_token_id
             )
-            # 標記目前後端為 transformers
-            self.backend = "transformers"
-            self._log("✅ Transformers 模型載入成功")
-        except Exception as e:
-            self._log(f"❌ Transformers 載入也失敗: {e}", "ERROR")
-    def huggingface_api_call(self, prompt: str) -> str:
-        """生成 SQL：優先使用 transformers，其次 gguf，最後 fallback"""
-        # transformers 後端
-        if self.backend == "transformers" and hasattr(self, "generation_pipeline"):
-            try:
-                gen = self.generation_pipeline(
-                    prompt,
-                    max_new_tokens=350,
-                    do_sample=True,
-                    temperature=0.05,
-                    top_p=0.9
-                )
-                # 盡量從 pipeline 結果提取文字
-                generated_text = ""
-                try:
-                    if isinstance(gen, list) and gen:
-                        first = gen[0]
-                        if isinstance(first, dict) and "generated_text" in first:
-                            generated_text = str(first["generated_text"])  # type: ignore[index]
-                        else:
-                            generated_text = str(first)
-                    else:
-                        generated_text = str(gen)
-                except Exception:
-                    generated_text = str(gen)
-                # 若包含 prompt，裁切前綴
-                if isinstance(generated_text, str) and generated_text.startswith(prompt):
-                    generated_text = generated_text[len(prompt):]
-                self._log(f"📝 提取出的生成文本: {generated_text.strip()}", "DEBUG")
-                lines = generated_text.strip().split('\n')
-                non_comment_lines = [line for line in lines if not line.strip().startswith('--')]
-                cleaned_text = "\n".join(non_comment_lines).strip()
-                if cleaned_text != generated_text.strip():
-                    self._log(f"🧹 清理掉註解後的文本: {cleaned_text}", "DEBUG")
-                if cleaned_text and not re.match(r"^\s*select\b", cleaned_text, flags=re.IGNORECASE):
-                    self._log("⚙️ 補上缺失的 'SELECT ' 起手以形成完整查詢", "DEBUG")
-                    cleaned_text = "SELECT " + cleaned_text.lstrip()
-                return cleaned_text
-            except Exception as e:
-                self._log(f"❌ Transformers 生成失敗: {e}", "ERROR")
-                return ""
-        # gguf 後端
-        if self.backend == "gguf" and self.gguf_llm is not None and callable(getattr(self.gguf_llm, "__call__", None)):
-            try:
-                output = self.gguf_llm(
-                    prompt,
-                    max_tokens=350,
-                    temperature=0.05,
-                    top_p=0.9,
-                    echo=False,
-                    stop=["```"]
-                )
-                self._log(f"🧠 模型原始輸出 (Raw Output): {output}", "DEBUG")
-                if output and "choices" in output and len(output["choices"]) > 0:
-                    generated_text = output["choices"][0]["text"]
-                    self._log(f"📝 提取出的生成文本: {generated_text.strip()}", "DEBUG")
-                    lines = str(generated_text).strip().split('\n')
-                    non_comment_lines = [line for line in lines if not line.strip().startswith('--')]
-                    cleaned_text = "\n".join(non_comment_lines).strip()
-                    if cleaned_text != str(generated_text).strip():
-                        self._log(f"🧹 清理掉註解後的文本: {cleaned_text}", "DEBUG")
-                    if cleaned_text and not re.match(r"^\s*select\b", cleaned_text, flags=re.IGNORECASE):
-                        self._log("⚙️ 補上缺失的 'SELECT ' 起手以形成完整查詢", "DEBUG")
-                        cleaned_text = "SELECT " + cleaned_text.lstrip()
-                    return cleaned_text
-                else:
-                    self._log("❌ 模型的原始輸出格式不正確或為空。", "ERROR")
-                    return ""
-            except Exception as e:
-                self._log(f"❌ GGUF 生成失敗: {e}", "ERROR")
                 return ""
-        # 後備：都不可用時，回退
-        self._log("模型未載入或不可用，返回 fallback SQL。", "ERROR")
-        return self._generate_fallback_sql(prompt)
-    def _load_gguf_model_fallback(self, model_path):
-        """備用載入方式"""
-        try:
-            # 嘗試不同的參數組合
-            self.gguf_llm = Llama(
-                model_path=model_path,
-                n_ctx=512,  # 更小的上下文
-                n_threads=4,
-                n_batch=128,
-                vocab_only=False,
-                use_mmap=True,
-                use_mlock=False,
-                verbose=True
-            )
-            self._log("✅ 備用方式載入成功")
         except Exception as e:
-            self._log(f"❌ 備用方式也失敗: {e}", "ERROR")
-            self.gguf_llm = None
-    def _log(self, message: str, level: str = "INFO"):
-        self.log_history.append(format_log(message, level))
-        print(format_log(message, level))
     def _load_schema(self) -> Dict:
         """載入數據庫結構"""
@@ -340,91 +285,58 @@ class TextToSQLSystem:
             schema_path = hf_hub_download(
                 repo_id=DATASET_REPO_ID,
                 filename="sqlite_schema_FULL.json",
-                repo_type="dataset"
             )
             with open(schema_path, "r", encoding="utf-8") as f:
                 schema_data = json.load(f)
-                # 添加調試信息
-                self._log(f"📊 Schema 載入成功，包含 {len(schema_data)} 個表格:")
                 for table_name, columns in schema_data.items():
                     self._log(f"  - {table_name}: {len(columns)} 個欄位")
-                    # 顯示前3個欄位作為範例
-                    sample_cols = [col['name'] for col in columns[:3]]
-                    self._log(f"    範例欄位: {', '.join(sample_cols)}")
-                self._log("✅ 數據庫結構載入完成")
                 return schema_data
         except Exception as e:
-            self._log(f"❌ 載入 schema 失敗: {e}", "ERROR")
             return {}
-    # 也可以添加一個方法來檢查生成的 SQL 是否使用了正確的表格和欄位
-    def _analyze_sql_correctness(self, sql: str) -> Dict:
-        """分析 SQL 的正確性"""
-        analysis = {
-            'valid_tables': [],
-            'invalid_tables': [],
-            'valid_columns': [],
-            'invalid_columns': [],
-            'suggestions': []
-        }
-        if not self.schema:
-            return analysis
-        # 提取 SQL 中的表格名稱
-        table_pattern = r'FROM\s+(\w+)|JOIN\s+(\w+)'
-        table_matches = re.findall(table_pattern, sql, re.IGNORECASE)
-        used_tables = [match[0] or match[1] for match in table_matches]
-        # 檢查表格是否存在
-        valid_tables = list(self.schema.keys())
-        for table in used_tables:
-            if table in valid_tables:
-                analysis['valid_tables'].append(table)
-            else:
-                analysis['invalid_tables'].append(table)
-                # 尋找相似的表格名稱
-                for valid_table in valid_tables:
-                    if table.lower() in valid_table.lower() or valid_table.lower() in table.lower():
-                        analysis['suggestions'].append(f"{table} -> {valid_table}")
-        # 提取欄位名稱（簡單版本）
-        column_pattern = r'SELECT\s+(.*?)\s+FROM|WHERE\s+(\w+)\s*[=<>]|GROUP BY\s+(\w+)|ORDER BY\s+(\w+)'
-        column_matches = re.findall(column_pattern, sql, re.IGNORECASE)
-        return analysis
     def _encode_texts(self, texts):
         """編碼文本為嵌入向量"""
         if isinstance(texts, str):
             texts = [texts]
         inputs = self.embed_tokenizer(texts, padding=True, truncation=True,
-                                    return_tensors="pt", max_length=512)
-        if DEVICE == "cuda":
-            inputs = {k: v.cuda() for k, v in inputs.items()}
         with torch.no_grad():
             outputs = self.embed_model(**inputs)
         # 使用平均池化
         embeddings = outputs.last_hidden_state.mean(dim=1)
-        return embeddings.cpu()
     def _load_and_index_dataset(self):
         """載入數據集並建立 FAISS 索引"""
         try:
-            dataset = load_dataset(DATASET_REPO_ID, data_files="training_data.jsonl", split="train")
-            # 先過濾不完整樣本，避免 messages 長度不足導致索引或檢索報錯
-            try:
-                original_count = len(dataset)
-            except Exception:
-                original_count = None
             dataset = dataset.filter(
                 lambda ex: isinstance(ex.get("messages"), list)
                 and len(ex["messages"]) >= 2
@@ -434,10 +346,7 @@ class TextToSQLSystem:
                 )
             )
-            if original_count is not None:
-                self._log(
-                    f"資料集清理: 原始 {original_count} 筆, 過濾後 {len(dataset)} 筆, 移除 {original_count - len(dataset)} 筆"
-                )
             if len(dataset) == 0:
                 self._log("清理後資料集為空，無法建立索引。", "ERROR")
@@ -446,14 +355,19 @@ class TextToSQLSystem:
             corpus = [item['messages'][0]['content'] for item in dataset]
             self._log(f"正在編碼 {len(corpus)} 個問題...")
-            # 批量編碼
             embeddings_list = []
-            batch_size = 32
             for i in range(0, len(corpus), batch_size):
                 batch_texts = corpus[i:i+batch_size]
                 batch_embeddings = self._encode_texts(batch_texts)
                 embeddings_list.append(batch_embeddings)
                 self._log(f"已編碼 {min(i+batch_size, len(corpus))}/{len(corpus)}")
             all_embeddings = torch.cat(embeddings_list, dim=0).numpy()
@@ -462,11 +376,15 @@ class TextToSQLSystem:
             index = faiss.IndexFlatIP(all_embeddings.shape[1])
             index.add(all_embeddings.astype('float32'))
-            self._log("✅ 向量索引建立完成")
             return dataset, index
         except Exception as e:
-            self._log(f"❌ 載入數據失敗: {e}", "ERROR")
             return None, None
     def _identify_relevant_tables(self, question: str) -> List[str]:
@@ -497,12 +415,8 @@ class TextToSQLSystem:
         return relevant_tables[:3]  # 最多返回3個相關表格
-    # 請將這整個函數複製到您的 TextToSQLSystem class 內部
     def _format_relevant_schema(self, table_names: List[str]) -> str:
-        """
-        生成一個簡化的、不易被模型錯誤模仿的 Schema 字符串。
-        """
         if not self.schema:
             return "No schema available.\n"
@@ -522,257 +436,17 @@ class TextToSQLSystem:
         formatted = ""
         for table in real_table_names:
             if table in self.schema:
-                # 使用簡單的 "Table: ..." 和 "Columns: ..." 格式
                 formatted += f"Table: {table}\n"
                 cols_str = []
-                # 只顯示前 10 個關鍵欄位
-                for col in self.schema[table][:10]:
                     col_name = col['name']
                     col_type = col['type']
-                    col_desc = col.get('description', '').replace('\n', ' ')
-                    # 將描述信息放在括號裡
-                    if col_desc:
-                        cols_str.append(f"{col_name} ({col_type}, {col_desc})")
-                    else:
-                        cols_str.append(f"{col_name} ({col_type})")
                 formatted += f"Columns: {', '.join(cols_str)}\n\n"
         return formatted.strip()
-    # 在 class TextToSQLSystem 內
-    def _validate_and_fix_sql(self, question: str, raw_response: str) -> Tuple[Optional[str], str]:
-        """
-        (V29 / 穩健正則 + 智能計數 最終版)
-        一個多層次的SQL生成引擎。它優先使用基於規則的動態模板生成器，
-        如果無法匹配，則回退到解析和修正AI模型的輸出。
-        - 使用更簡潔、穩健的正則表達式來捕獲實體名稱。
-        - 根據問題是關於「報告」還是「測試項目」來智能地決定計數目標。
-        """
-        q_lower = question.lower()
-        # ==============================================================================
-        #  第零層：統一實體識別引擎 (Unified Entity Recognition Engine)
-        # ==============================================================================
-        entity_match_data = None
-        # 包含了繁簡體兼容和更穩健的模式
-        entity_patterns = [
-            # 模式1: 匹配 "类型 + ID" - (保持不變)
-            {'pattern': r"(買家|买家|buyer)\s*(?:id|代號|代碼|代号|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.BuyerID', 'type': '買家ID'},
-            {'pattern': r"(申請方|申请方|申請廠商|申请厂商|applicant)\s*(?:id|代號|代碼|代号|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.ApplicantID', 'type': '申請方ID'},
-            {'pattern': r"(付款方|付款厂商|invoiceto)\s*(?:id|代號|代碼|代号|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.InvoiceToID', 'type': '付款方ID'},
-            {'pattern': r"(代理商|agent)\s*(?:id|代號|代碼|代号|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.AgentID', 'type': '代理商ID'},
-            # 模式2: 匹配 "類型 + 名稱" - (簡化了模式，使其更穩健)
-            {'pattern': r"(買家|买家|buyer|客戶)\s+([a-zA-Z0-9&.-]+)", 'column': 'sd.BuyerName', 'type': '買家'},
-            {'pattern': r"(申請方|申请方|申請廠商|申请厂商|applicant)\s+([a-zA-Z0-9&.-]+)", 'column': 'sd.ApplicantName', 'type': '申請方'},
-            {'pattern': r"(付款方|付款厂商|invoiceto)\s+([a-zA-Z0-9&.-]+)", 'column': 'sd.InvoiceToName', 'type': '付款方'},
-            {'pattern': r"(代理商|agent)\s+([a-zA-Z0-9&.-]+)", 'column': 'sd.AgentName', 'type': '代理商'},
-            # 模式3: 单独匹配一个 ID - (保持不變)
-            {'pattern': r"\b([A-Z]\d{4}[A-Z])\b", 'column': 'sd.ApplicantID', 'type': 'ID'}
-        ]
-        for p in entity_patterns:
-            match = re.search(p['pattern'], question, re.IGNORECASE)
-            if match:
-                entity_value = match.group(2) if len(match.groups()) > 1 else match.group(1)
-                entity_match_data = {
-                    "type": p['type'],
-                    "name": entity_value.strip().upper(),
-                    "column": p['column']
-                }
-                break
-    # ==============================================================================
-        #  第一層：模組化意圖偵測與動態SQL組合
-        # ==============================================================================
-        intents = {}
-        sql_components = {
-            'select': [], 'from': "", 'joins': [], 'where': [],
-            'group_by': [], 'order_by': [], 'log_parts': []
-        }
-        # --- 運行一系列獨立的意圖偵測器 ---
-        # 偵測器 2.1: 核心動作意圖
-        if any(kw in q_lower for kw in ['幾份', '多少', '數量', '總數', 'how many', 'count']):
-            intents['action'] = 'count'
-            # 智能決定計數目標
-            if "測試項目" in question or "test item" in q_lower:
-                sql_components['select'].append("COUNT(jip.ItemCode) AS item_count")
-                sql_components['log_parts'].append("測試項目總數")
-            else: # 預設是計數報告
-                sql_components['select'].append("COUNT(DISTINCT jt.JobNo) AS report_count")
-                sql_components['log_parts'].append("報告總數")
-        elif any(kw in q_lower for kw in ['報告號碼', '報告清單', '列出報告', 'report number', 'list of reports']):
-            intents['action'] = 'list'
-            sql_components['select'].append("jt.JobNo, jt.ReportAuthorization")
-            sql_components['order_by'].append("jt.ReportAuthorization DESC")
-            sql_components['log_parts'].append("報告列表")
-        # 偵測器 2.2: 時間意圖
-        year_match = re.search(r'(\d{4})\s*年?', question)
-        month_match = re.search(r'(\d{1,2})\s*月', question)
-        if year_match:
-            year = year_match.group(1)
-            sql_components['where'].append(f"strftime('%Y', jt.ReportAuthorization) = '{year}'")
-            sql_components['log_parts'].append(f"{year}年")
-        if month_match:
-            month = month_match.group(1).zfill(2)
-            sql_components['where'].append(f"strftime('%m', jt.ReportAuthorization) = '{month}'")
-            sql_components['log_parts'].append(f"{month}月")
-        # 偵測器 2.3: 實體意圖
-        if entity_match_data:
-            if "TSR53SampleDescription" not in " ".join(sql_components['joins']):
-                 sql_components['joins'].append("JOIN TSR53SampleDescription AS sd ON jt.JobNo = sd.JobNo")
-            entity_name, column_name = entity_match_data["name"], entity_match_data["column"]
-            match_operator = "=" if column_name.endswith("ID") else "LIKE"
-            entity_value = f"'%{entity_name}%'" if match_operator == "LIKE" else f"'{entity_name}'"
-            sql_components['where'].append(f"{column_name} {match_operator} {entity_value}")
-            sql_components['log_parts'].append(entity_match_data["type"] + ":" + entity_name)
-            if intents.get('action') == 'list':
-                sql_components['select'].append("sd.BuyerName")
-        # 偵測器 2.4: 評級意圖
-        if 'fail' in q_lower or '失敗' in q_lower:
-            if "TSR53SampleDescription" not in " ".join(sql_components['joins']):
-                 sql_components['joins'].append("JOIN TSR53SampleDescription AS sd ON jt.JobNo = sd.JobNo")
-            sql_components['where'].append("sd.OverallRating = 'Fail'")
-            sql_components['log_parts'].append("Fail")
-        elif 'pass' in q_lower or '通過' in q_lower:
-            if "TSR53SampleDescription" not in " ".join(sql_components['joins']):
-                 sql_components['joins'].append("JOIN TSR53SampleDescription AS sd ON jt.JobNo = sd.JobNo")
-            sql_components['where'].append("sd.OverallRating = 'Pass'")
-            sql_components['log_parts'].append("Pass")
-        # 偵測器 2.5: 實驗組 (LabGroup) 意圖 (帶有別名映射)
-        lab_group_mapping = {'A': 'TA', 'B': 'TB', 'C': 'TC', 'D': 'TD', 'E': 'TE', 'Y': 'TY'}
-        lab_group_match = re.search(r'([A-Z]{1,2})組', question, re.IGNORECASE)
-        if lab_group_match:
-            user_input_group = lab_group_match.group(1).upper()
-            db_lab_group = lab_group_mapping.get(user_input_group, user_input_group)
-            sql_components['joins'].append("JOIN JobItemsInProgress AS jip ON jt.JobNo = jip.JobNo")
-            sql_components['where'].append(f"jip.LabGroup = '{db_lab_group}'")
-            sql_components['log_parts'].append(f"{user_input_group}組(->{db_lab_group})")
-        # --- 2.6: 兩年份比較模板（優先級：高） ---
-        # 偵測『比較/vs/對比/相較/相比』字樣，擷取兩個年份與（可選）買家名稱
-        compare_hit = any(kw in q_lower for kw in ["比較", "對比", "相較", "相比", "vs", "versus"])
-        years_found = re.findall(r"(20\d{2})", question)
-        years_unique = []
-        for y in years_found:
-            if y not in years_unique:
-                years_unique.append(y)
-        if compare_hit and len(years_unique) >= 2:
-            year_a, year_b = years_unique[0], years_unique[1]
-            # 嘗試抓買家名稱（英文/數字/符號），若沒有則不加 buyer 條件
-            buyer_name = None
-            # 1) 優先解析明確條件：BuyerName LIKE '%...%'
-            m_like = re.search(r"BuyerName\s+LIKE\s*'%([^']+)%'", question, re.IGNORECASE)
-            if m_like:
-                buyer_name = m_like.group(1).strip()
-            else:
-                # 2) 解析自然語言：避免 'BuyerName' 被誤判成 'buyer'
-                buyer_match = re.search(r"(?:買家|买家|客戶|客户|\bbuyer\b(?!name))\s*[:：]?\s*([A-Za-z0-9&.\- ]+)", question, re.IGNORECASE)
-                if buyer_match:
-                    buyer_name = buyer_match.group(1).strip()
-            # 判斷偏向金額或件數
-            amount_intent = any(kw in q_lower for kw in ["金額", "金钱", "amount", "營收", "業績", "營業額", "銷售額", "revenue"])
-            if amount_intent:
-                # 金額版：需要發票表，依架構命名使用 TSR53Invoice 與 LocalAmount；與樣本描述以 JobNo 關聯
-                sql = (
-                    "SELECT strftime('%Y', jt.ReportAuthorization) AS year, "
-                    "SUM(COALESCE(inv.LocalAmount, 0)) AS total_amount "
-                    "FROM JobTimeline AS jt "
-                    "JOIN TSR53SampleDescription AS sd ON sd.JobNo = jt.JobNo "
-                    "LEFT JOIN TSR53Invoice AS inv ON inv.JobNo = jt.JobNo "
-                    "WHERE jt.ReportAuthorization IS NOT NULL "
-                    f"AND strftime('%Y', jt.ReportAuthorization) IN ('{year_a}', '{year_b}') "
-                )
-                if buyer_name:
-                    sql += f"AND sd.BuyerName LIKE '%{buyer_name}%' "
-                sql += "GROUP BY year ORDER BY year;"
-                return self._finalize_sql(sql, f"模板覆寫: 兩年份金額比較 {year_a} vs {year_b}" )
-            else:
-                # 件數版：以報告數量為主，去重 JobNo
-                sql = (
-                    "SELECT strftime('%Y', jt.ReportAuthorization) AS year, "
-                    "COUNT(DISTINCT jt.JobNo) AS report_count "
-                    "FROM JobTimeline AS jt "
-                    "JOIN TSR53SampleDescription AS sd ON sd.JobNo = jt.JobNo "
-                    "WHERE jt.ReportAuthorization IS NOT NULL "
-                    f"AND strftime('%Y', jt.ReportAuthorization) IN ('{year_a}', '{year_b}') "
-                )
-                if buyer_name:
-                    sql += f"AND sd.BuyerName LIKE '%{buyer_name}%' "
-                sql += "GROUP BY year ORDER BY year;"
-                return self._finalize_sql(sql, f"模板覆寫: 兩年份件數比較 {year_a} vs {year_b}" )
-        # --- 3. 判斷是否觸發了模板，並動態組合 SQL ---
-        if 'action' in intents:
-            sql_components['from'] = "FROM JobTimeline AS jt"
-            # 只要有任何篩選條件，就加上報告已授權的基礎限制
-            if sql_components['where']:
-                 sql_components['where'].insert(0, "jt.ReportAuthorization IS NOT NULL")
-            select_clause = "SELECT " + ", ".join(sorted(list(set(sql_components['select']))))
-            from_clause = sql_components['from']
-            joins_clause = " ".join(sql_components['joins'])
-            where_clause = "WHERE " + " AND ".join(sql_components['where']) if sql_components['where'] else ""
-            orderby_clause = "ORDER BY " + ", ".join(sql_components['order_by']) if sql_components['order_by'] else ""
-            template_sql = f"{select_clause} {from_clause} {joins_clause} {where_clause} {orderby_clause};"
-            query_log = " ".join(sql_components['log_parts'])
-            self._log(f"🔄 偵測到組合意圖【{query_log}】，啟用動態模板。", "INFO")
-            return self._finalize_sql(template_sql, f"模板覆寫: {query_log} 查詢")
-        # ==============================================================================
-        #  第二层：AI 生成修正流程 (Fallback)
-        # ==============================================================================
-        self._log("未觸發任何模板，嘗試解析並修正 AI 輸出...", "INFO")
-        parsed_sql = parse_sql_from_response(raw_response)
-        if not parsed_sql:
-            self._log(f"❌ 未能從模型回應中解析出任何 SQL。原始回應: {raw_response}", "ERROR")
-            return None, f"無法解析SQL。原始回應:\n{raw_response}"
-        self._log(f"📊 解析出的原始 SQL: {parsed_sql}", "DEBUG")
-        fixed_sql = " " + parsed_sql.strip() + " "
-        fixes_applied_fallback = []
-        dialect_corrections = {r'YEAR\s*\(([^)]+)\)': r"strftime('%Y', \1)"}
-        for pattern, replacement in dialect_corrections.items():
-            if re.search(pattern, fixed_sql, re.IGNORECASE):
-                fixed_sql = re.sub(pattern, replacement, fixed_sql, flags=re.IGNORECASE)
-                fixes_applied_fallback.append(f"修正方言: {pattern}")
-        schema_corrections = {'TSR53Report':'TSR53SampleDescription', 'TSR53InvoiceReportNo':'JobNo', 'TSR53ReportNo':'JobNo', 'TSR53InvoiceNo':'JobNo', 'TSR53InvoiceCreditNoteNo':'InvoiceCreditNoteNo', 'TSR53InvoiceLocalAmount':'LocalAmount', 'Status':'OverallRating', 'ReportStatus':'OverallRating'}
-        for wrong, correct in schema_corrections.items():
-            pattern = r'\b' + re.escape(wrong) + r'\b'
-            if re.search(pattern, fixed_sql, re.IGNORECASE):
-                fixed_sql = re.sub(pattern, correct, fixed_sql, flags=re.IGNORECASE)
-                fixes_applied_fallback.append(f"映射 Schema: '{wrong}' -> '{correct}'")
-        log_msg = "AI 生成並成功修正" if fixes_applied_fallback else "AI 生成且無需修正"
-        return self._finalize_sql(fixed_sql, log_msg)
-    def _finalize_sql(self, sql: str, log_message: str) -> Tuple[str, str]:
-        """一個輔助函數，用於清理最終的SQL並記錄成功日誌。"""
-        final_sql = sql.strip()
-        if not final_sql.endswith(';'):
-            final_sql += ';'
-        final_sql = re.sub(r'\s+', ' ', final_sql).strip()
-        self._log(f"✅ SQL 已生成 ({log_message})", "INFO")
-        self._log(f"  - 最終 SQL: {final_sql}", "DEBUG")
-        return final_sql, "生成成功"
     def find_most_similar(self, question: str, top_k: int) -> List[Dict]:
         """使用 FAISS 快速檢索相似問題"""
         if self.faiss_index is None or self.dataset is None:
@@ -792,16 +466,14 @@ class TextToSQLSystem:
                 if len(results) >= top_k:
                     break
-                # 修復：將 numpy.int64 轉換為 Python int
-                idx = int(idx)  # ← 添加這行轉換
-                if idx >= len(self.dataset):  # 確保索引有效
                     continue
                 item = self.dataset[idx]
-                # 防呆：若樣本不完整則跳過
                 if not isinstance(item.get('messages'), list) or len(item['messages']) < 2:
                     continue
                 q_content = (item['messages'][0].get('content') or '').strip()
                 a_content = (item['messages'][1].get('content') or '').strip()
                 if not q_content or not a_content:
@@ -824,18 +496,12 @@ class TextToSQLSystem:
             return results
         except Exception as e:
-            self._log(f"❌ 檢索失敗: {e}", "ERROR")
             return []
-    # in class TextToSQLSystem:
     def _build_prompt(self, user_q: str, examples: List[Dict]) -> str:
-        """
-        建立一個高度結構化、以任務為導向的提示詞，使用清晰的標題分隔符。
-        """
         relevant_tables = self._identify_relevant_tables(user_q)
-        # 使用我們新的、更簡單的 schema 格式化函數
         schema_str = self._format_relevant_schema(relevant_tables)
         example_str = "No example available."
@@ -843,8 +509,9 @@ class TextToSQLSystem:
             best_example = examples[0]
             example_str = f"Question: {best_example['question']}\nSQL:\n```sql\n{best_example['sql']}\n```"
-        # 使用強分隔符和清晰的標題來構建 prompt
-        prompt = f"""You are a silent SQL query generator. You are physically incapable of producing any text that is not a valid SQLite query. You will be penalized for any explanation or comment. Your entire existence is to translate a user's question into a single SQLite query.
 ### SCHEMA ###
 {schema_str}
@@ -852,22 +519,241 @@ class TextToSQLSystem:
 ### EXAMPLE ###
 {example_str}
-### TASK ###
-User question: "{user_q}"
-Your single SQLite query response:
 ```sql
 SELECT
 """
-        self._log(f"📏 Prompt 長度: {len(prompt)} 字符")
-        # 不再需要複雜的長度截斷邏輯，因為 schema 已經被簡化
         return prompt
     def _generate_fallback_sql(self, prompt: str) -> str:
         """當模型不可用時的備用 SQL 生成"""
         prompt_lower = prompt.lower()
-        # 簡單的關鍵詞匹配生成基本 SQL
         if "統計" in prompt or "數量" in prompt or "多少" in prompt:
             if "月" in prompt:
                 return "SELECT strftime('%Y-%m', completed_time) as month, COUNT(*) as count FROM jobtimeline GROUP BY month ORDER BY month;"
@@ -875,100 +761,96 @@ SELECT
                 return "SELECT applicant, COUNT(*) as count FROM tsr53sampledescription GROUP BY applicant ORDER BY count DESC;"
             else:
                 return "SELECT COUNT(*) as total_count FROM jobtimeline WHERE completed_time IS NOT NULL;"
         elif "金額" in prompt or "總額" in prompt:
             return "SELECT SUM(amount) as total_amount FROM tsr53invoice;"
         elif "評級" in prompt or "pass" in prompt_lower or "fail" in prompt_lower:
             return "SELECT rating, COUNT(*) as count FROM tsr53sampledescription GROUP BY rating;"
         else:
             return "SELECT * FROM jobtimeline LIMIT 10;"
-    def _validate_model_file(self, model_path):
-        """驗證模型檔案完整性"""
-        try:
-            if not os.path.exists(model_path):
-                return False
-            # 檢查檔案大小（至少應該有幾MB）
-            file_size = os.path.getsize(model_path)
-            if file_size < 10 * 1024 * 1024:  # 小於 10MB 可能有問題
-                return False
-            # 檢查 GGUF 檔案頭部
-            with open(model_path, 'rb') as f:
-                header = f.read(8)
-                if not header.startswith(b'GGUF'):
-                    return False
-            return True
-        except Exception:
-            return False
-# in class TextToSQLSystem:
     def process_question(self, question: str) -> Tuple[str, str]:
-        """處理使用者問題 (V2 / 最終版)"""
         # 檢查緩存
         if question in self.query_cache:
-            self._log("⚡ 使用緩存結果")
             return self.query_cache[question]
         self.log_history = []
-        self._log(f"⏰ 處理問題: {question}")
-        for attempt in range(2): # --- 新增：最多嘗試 2 次 ---
-            self._log(f"🚀 開始第 {attempt + 1} 次嘗試...")
-            # 1. 檢索相似範例 (第二次嘗試時不再重複)
-            if attempt == 0:
-                self._log("🔍 尋找相似範例...")
-                examples = self.find_most_similar(question, FEW_SHOT_EXAMPLES_COUNT)
-                if examples: self._log(f"✅ 找到 {len(examples)} 個相似範例")
-            # 2. 建立提示詞
-            self._log("📝 建立 Prompt...")
-            prompt = self._build_prompt(question, examples)
-            # --- 新增：如果是第二次嘗試，加入修正指令 ---
-            if attempt > 0:
-                correction_prompt = "\nYour previous attempt failed because you did not provide a valid SQL query. REMEMBER: ONLY output the SQL code inside a ```sql block. DO NOT write comments or explanations.\nSQL:\n```sql\nSELECT "
-                # 將原本 prompt 的結尾替換成我們的修正指令
-                prompt = prompt.rsplit("SQL:\n```sql", 1)[0] + correction_prompt
-            # 3. 生成 AI 回應
-            self._log("🧠 開始生成 AI 回應...")
-            response = self.huggingface_api_call(prompt)
-            # 4. 驗證與生成
-            final_sql, status_message = self._validate_and_fix_sql(question, response)
-            if final_sql:
-                self._log(f"✅ 在第 {attempt + 1} 次嘗試成功！", "INFO")
-                result = (final_sql, status_message)
-                self.query_cache[question] = result # 緩存成功結果
-                return result
-            self._log(f"❌ 第 {attempt + 1} 次嘗試失敗。原因: {status_message}", "WARNING")
-        # --- 如果兩次都失敗 ---
-        self._log("❌ 所有嘗試均失敗，返回錯誤訊息。", "ERROR")
-        final_fallback_message = "模型多次嘗試後仍無法生成有效的SQL。"
-        return (final_fallback_message, "生成失敗")
-# ==================== Gradio 介面 ====================
 text_to_sql_system = TextToSQLSystem()
-def process_query(q: str):
-    if not q.strip():
-        return "", "等待輸入", "請輸入問題"
     sql, status = text_to_sql_system.process_question(q)
-    logs = "\n".join(text_to_sql_system.log_history[-10:])  # 只顯示最後10條日誌
     return sql, status, logs
 # 範例問題
@@ -980,36 +862,39 @@ examples = [
     "A組昨天完成了多少個測試項目？"
 ]
-with gr.Blocks(theme=gr.themes.Soft(), title="Text-to-SQL 智能助手") as demo:
-    gr.Markdown("# ⚡ Text-to-SQL 智能助手")
-    gr.Markdown("輸入自然語言問題，自動生成SQL查詢語句")
     with gr.Row():
         with gr.Column(scale=2):
-            inp = gr.Textbox(lines=3, label="💬 您的問題", placeholder="例如：2024年每月完成多少份報告？")
-            btn = gr.Button("🚀 生成 SQL", variant="primary")
             status = gr.Textbox(label="狀態", interactive=False)
         with gr.Column(scale=3):
-            sql_out = gr.Code(label="🤖 生成的 SQL", language="sql", lines=8)
-    with gr.Accordion("📋 處理日誌", open=False):
-        logs = gr.Textbox(lines=8, label="日誌", interactive=False)
     # 範例區
     gr.Examples(
         examples=examples,
         inputs=inp,
-        label="💡 點擊試用範例問題"
     )
     # 綁定事件
-    btn.click(process_query, inputs=[inp], outputs=[sql_out, status, logs])
-    inp.submit(process_query, inputs=[inp], outputs=[sql_out, status, logs])
 if __name__ == "__main__":
     demo.launch(
         server_name="0.0.0.0",
         server_port=7860,
-        share=False
     )

 import json
 import torch
 import numpy as np
+import psutil
+import gc
+import tempfile
 from datetime import datetime
 from datasets import load_dataset
 from huggingface_hub import hf_hub_download
 from typing import List, Dict, Tuple, Optional
 import faiss
 from functools import lru_cache
 # 使用 transformers 替代 sentence-transformers
 from transformers import AutoModel, AutoTokenizer
 import torch.nn.functional as F
+# ==================== 配置參數 ====================
 DATASET_REPO_ID = "Paul720810/Text-to-SQL-Softline"
 GGUF_REPO_ID = "Paul720810/gguf-models"
+GGUF_FILENAME = "qwen2-7b-instruct-sql-finetuned-stable.q4_k_m.gguf"
 EMBED_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
+# 可配置 GPU（HF 免費方案通常只有 CPU）
+USE_GPU = str(os.getenv("USE_GPU", "0")).lower() in {"1", "true", "yes", "y"}
+try:
+    N_GPU_LAYERS = int(os.getenv("N_GPU_LAYERS", "0"))
+except Exception:
+    N_GPU_LAYERS = 0
+DEVICE = "cuda" if (USE_GPU and torch.cuda.is_available()) else "cpu"
+# CPU 專用優化（可由環境變數覆蓋）
+def _int_env(name: str, default_val: int) -> int:
+    try:
+        return int(os.getenv(name, str(default_val)))
+    except Exception:
+        return default_val
+THREADS = _int_env("THREADS", min(4, os.cpu_count() or 2))  # llama.cpp 執行緒數
+CTX = _int_env("CTX", 768 if DEVICE == "cpu" else 1024)     # 上下文長度
+MAX_TOKENS = _int_env("MAX_TOKENS", 60)                      # 生成 token 上限
+FEW_SHOT_EXAMPLES_COUNT = _int_env("FEW_SHOT", 0 if DEVICE == "cpu" else 1)
+ENABLE_INDEX = str(os.getenv("ENABLE_INDEX", "0" if DEVICE == "cpu" else "1")).lower() in {"1", "true", "yes", "y"}
+EMBED_BATCH = _int_env("EMBED_BATCH", 8 if DEVICE == "cpu" else 16)
+# 使用 /tmp 作為暫存目錄
+TEMP_DIR = "/tmp/text_to_sql_cache"
+os.makedirs(TEMP_DIR, exist_ok=True)
 print("=" * 60)
+print("Text-to-SQL 系統啟動中 (HF 版本)...")
+print(f"數據集: {DATASET_REPO_ID}")
+print(f"嵌入模型: {EMBED_MODEL_NAME}")
+print(f"設備: {DEVICE} (USE_GPU={USE_GPU}, N_GPU_LAYERS={N_GPU_LAYERS})")
+print(f"THREADS={THREADS}, CTX={CTX}, MAX_TOKENS={MAX_TOKENS}, FEW_SHOT={FEW_SHOT_EXAMPLES_COUNT}, ENABLE_INDEX={ENABLE_INDEX}, EMBED_BATCH={EMBED_BATCH}")
+print(f"暫存目錄: {TEMP_DIR}")
 print("=" * 60)
 # ==================== 工具函數 ====================
 def format_log(message: str, level: str = "INFO") -> str:
     return f"[{get_current_time()}] [{level.upper()}] {message}"
+def check_memory_usage():
+    """檢查內存使用情況 - 簡化版本不依賴 psutil"""
+    try:
+        # 使用 /proc/meminfo 獲取內存信息 (Linux 環境)
+        with open('/proc/meminfo', 'r') as f:
+            lines = f.readlines()
+        mem_info = {}
+        for line in lines:
+            if line.startswith(('MemTotal:', 'MemFree:', 'MemAvailable:')):
+                key, value = line.split(':')
+                mem_info[key.strip()] = int(value.strip().split()[0])
+        total_gb = mem_info.get('MemTotal', 0) / (1024**2)
+        available_gb = mem_info.get('MemAvailable', mem_info.get('MemFree', 0)) / (1024**2)
+        used_percent = ((total_gb - available_gb) / total_gb * 100) if total_gb > 0 else 0
+        return f"內存使用率: {used_percent:.1f}% (可用: {available_gb:.1f}GB/{total_gb:.1f}GB)"
+    except:
+        # 如果無法讀取 /proc/meminfo，返回簡單信息
+        return "內存信息: 無法獲取詳細信息"
+def parse_sql_from_response(response_text: str) -> Optional[str]:
+    """從模型輸出提取 SQL"""
+    if not response_text:
         return None
+    response_text = response_text.strip()
+    # 1. 先找 ```sql ... ```
+    match = re.search(r"```sql\s*\n(.*?)\n```", response_text, re.DOTALL | re.IGNORECASE)
+    if match:
+        return match.group(1).strip()
+    # 2. 找任何 ``` 包圍的內容
+    match = re.search(r"```\s*\n?(.*?)\n?```", response_text, re.DOTALL)
+    if match:
+        sql_candidate = match.group(1).strip()
+        if sql_candidate.upper().startswith('SELECT'):
+            return sql_candidate
+    # 3. 找 SQL 語句（更寬鬆的匹配）
+    match = re.search(r"(SELECT\s+.*?;)", response_text, re.DOTALL | re.IGNORECASE)
+    if match:
+        return match.group(1).strip()
+    # 4. 找沒有分號的 SQL
+    match = re.search(r"(SELECT\s+.*?)(?=\n\n|\n```|$|\n[^,\s])", response_text, re.DOTALL | re.IGNORECASE)
+    if match:
+        sql = match.group(1).strip()
+        if not sql.endswith(';'):
+            sql += ';'
+        return sql
+    # 5. 如果包含 SELECT，嘗試提取整行
+    if 'SELECT' in response_text.upper():
+        lines = response_text.split('\n')
+        for line in lines:
+            line = line.strip()
+            if line.upper().startswith('SELECT'):
+                if not line.endswith(';'):
+                    line += ';'
+                return line
     return None
 # ==================== Text-to-SQL 核心類 ====================
         self.log_history = []
         self._log("初始化系統...")
         self.query_cache = {}
+        self.embed_device = DEVICE
+        # 檢查內存狀況
+        self._log(check_memory_usage())
         # 1. 載入嵌入模型
         self._log(f"載入嵌入模型: {embed_model_name}")
         self.embed_tokenizer = AutoTokenizer.from_pretrained(embed_model_name)
         self.embed_model = AutoModel.from_pretrained(embed_model_name)
+        try:
+            self.embed_model.to(self.embed_device)
+            self._log(f"嵌入模型設備: {self.embed_device}")
+        except Exception as e:
+            self._log(f"將嵌入模型移動到設備失敗: {e}", "WARNING")
+            self.embed_device = "cpu"
         # 2. 載入數據庫結構
         self.schema = self._load_schema()
         # 3. 載入數據集並建立索引
         self.dataset, self.faiss_index = self._load_and_index_dataset()
+        # 4. 載入 GGUF 模型（新增錯誤處理）
         self._load_gguf_model()
+        self._log("系統初始化完成")
+    def _log(self, message: str, level: str = "INFO"):
+        self.log_history.append(format_log(message, level))
+        print(format_log(message, level))
     def _load_gguf_model(self):
+        """載入 GGUF 模型，針對 Paperspace 環境優化"""
         try:
+            self._log("開始下載 GGUF 模���到 /tmp...")
+            # 檢查模型是否已存在於 /tmp
+            model_cache_path = os.path.join(TEMP_DIR, GGUF_FILENAME)
+            if os.path.exists(model_cache_path) and self._validate_model_file(model_cache_path):
+                self._log(f"發現快取模型: {model_cache_path}")
+                model_path = model_cache_path
+            else:
+                self._log("下載新模型...")
+                model_path = hf_hub_download(
+                    repo_id=GGUF_REPO_ID,
+                    filename=GGUF_FILENAME,
+                    repo_type="dataset",
+                    cache_dir=TEMP_DIR,
+                    resume_download=True
+                )
+                self._log(f"模型下載完成: {model_path}")
+            # 檢查內存情況
+            self._log(check_memory_usage())
+            # 使用 CPU 友好的參數載入模型（可選 GPU layers）
+            ngl = N_GPU_LAYERS if (DEVICE == "cuda" and N_GPU_LAYERS > 0) else 0
+            self._log(f"載入 GGUF 模型 (n_gpu_layers={ngl}, n_threads={THREADS}, n_ctx={CTX})...")
+            self.llm = Llama(
                 model_path=model_path,
+                n_ctx=CTX,       # 上下文長度（CPU 默認更小）
+                n_threads=THREADS,  # 使用多執行緒
+                n_batch=256,     # 批處理大小
+                verbose=False,
+                n_gpu_layers=ngl,  # 可選 GPU 加速
+                use_mmap=True,   # 使用內存映射減少內存占用
+                use_mlock=False, # 不鎖定內存
+                low_vram=True    # 啟用低內存模式
             )
+            # 簡單測試模型
+            test_result = self.llm("SELECT", max_tokens=3)
+            self._log("GGUF 模型載入成功")
+            # 再次檢查內存
+            self._log(check_memory_usage())
         except Exception as e:
+            self._log(f"GGUF 載入失敗: {e}", "ERROR")
+            self._log("系統將無法生成 SQL。請檢查模型檔案或內存情況。", "CRITICAL")
             self.llm = None
+    def _validate_model_file(self, model_path):
+        """驗證模型檔案完整性"""
         try:
+            if not os.path.exists(model_path):
+                return False
+            # 檢查檔案大小（至少應該有幾百MB）
+            file_size = os.path.getsize(model_path)
+            if file_size < 50 * 1024 * 1024:  # 小於 50MB 可能有問題
+                return False
+            # 檢查 GGUF 檔案頭部
+            with open(model_path, 'rb') as f:
+                header = f.read(8)
+                if not header.startswith(b'GGUF'):
+                    return False
+            return True
+        except Exception:
             return False
+    def huggingface_api_call(self, prompt: str) -> str:
+        """調用 GGUF 模型，並加入詳細的原始輸出日誌"""
+        if self.llm is None:
+            self._log("模型未載入，返回 fallback SQL。", "ERROR")
+            return self._generate_fallback_sql(prompt)
         try:
+            # 清理垃圾收集
+            gc.collect()
+            output = self.llm(
+                prompt,
+                max_tokens=MAX_TOKENS,  # 生成長度可配置
                 temperature=0.1,
                 top_p=0.9,
+                echo=False,
+                stop=["```", ";", "\n\n", "</s>"],
             )
+            self._log(f"模型原始輸出: {str(output)[:200]}...", "DEBUG")
+            if output and "choices" in output and len(output["choices"]) > 0:
+                generated_text = output["choices"][0]["text"]
+                self._log(f"提取出的生成文本: {generated_text.strip()}", "DEBUG")
+                return generated_text.strip()
+            else:
+                self._log("模型的原始輸出格式不正確或為空。", "ERROR")
                 return ""
         except Exception as e:
+            self._log(f"模型生成過程中發生嚴重錯誤: {e}", "CRITICAL")
+            import traceback
+            self._log(traceback.format_exc(), "DEBUG")
+            return ""
     def _load_schema(self) -> Dict:
         """載入數據庫結構"""
             schema_path = hf_hub_download(
                 repo_id=DATASET_REPO_ID,
                 filename="sqlite_schema_FULL.json",
+                repo_type="dataset",
+                cache_dir=TEMP_DIR
             )
             with open(schema_path, "r", encoding="utf-8") as f:
                 schema_data = json.load(f)
+                self._log(f"Schema 載入成功，包含 {len(schema_data)} 個表格:")
                 for table_name, columns in schema_data.items():
                     self._log(f"  - {table_name}: {len(columns)} 個欄位")
+                self._log("數據庫結構載入完成")
                 return schema_data
         except Exception as e:
+            self._log(f"載入 schema 失敗: {e}", "ERROR")
             return {}
     def _encode_texts(self, texts):
         """編碼文本為嵌入向量"""
         if isinstance(texts, str):
             texts = [texts]
         inputs = self.embed_tokenizer(texts, padding=True, truncation=True,
+                                      return_tensors="pt", max_length=512)
+        # 移動到對應設備
+        try:
+            inputs = {k: v.to(self.embed_device) for k, v in inputs.items()}
+        except Exception:
+            pass
         with torch.no_grad():
             outputs = self.embed_model(**inputs)
         # 使用平均池化
         embeddings = outputs.last_hidden_state.mean(dim=1)
+        return embeddings.detach().cpu()
     def _load_and_index_dataset(self):
         """載入數據集並建立 FAISS 索引"""
         try:
+            if not ENABLE_INDEX:
+                self._log("已禁用相似範例索引（ENABLE_INDEX=0）。啟動更快，將不使用 few-shot。")
+                return None, None
+            dataset = load_dataset(
+                DATASET_REPO_ID,
+                data_files="training_data.jsonl",
+                split="train",
+                cache_dir=TEMP_DIR
+            )
+            # 過濾不完整樣本
+            original_count = len(dataset)
             dataset = dataset.filter(
                 lambda ex: isinstance(ex.get("messages"), list)
                 and len(ex["messages"]) >= 2
                 )
             )
+            self._log(f"資料集清理: 原始 {original_count} 筆, 過濾後 {len(dataset)} 筆")
             if len(dataset) == 0:
                 self._log("清理後資料集為空，無法建立索引。", "ERROR")
             corpus = [item['messages'][0]['content'] for item in dataset]
             self._log(f"正在編碼 {len(corpus)} 個問題...")
+            # 批量編碼以節省內存
             embeddings_list = []
+            batch_size = EMBED_BATCH  # 可配置的批次大小（CPU 預設更小）
             for i in range(0, len(corpus), batch_size):
                 batch_texts = corpus[i:i+batch_size]
                 batch_embeddings = self._encode_texts(batch_texts)
                 embeddings_list.append(batch_embeddings)
+                # 清理內存
+                if i % (batch_size * 4) == 0:
+                    gc.collect()
                 self._log(f"已編碼 {min(i+batch_size, len(corpus))}/{len(corpus)}")
             all_embeddings = torch.cat(embeddings_list, dim=0).numpy()
             index = faiss.IndexFlatIP(all_embeddings.shape[1])
             index.add(all_embeddings.astype('float32'))
+            # 清理內存
+            del embeddings_list, all_embeddings
+            gc.collect()
+            self._log("向量索引建立完成")
             return dataset, index
         except Exception as e:
+            self._log(f"載入數據失敗: {e}", "ERROR")
             return None, None
     def _identify_relevant_tables(self, question: str) -> List[str]:
         return relevant_tables[:3]  # 最多返回3個相關表格
     def _format_relevant_schema(self, table_names: List[str]) -> str:
+        """生成一個簡化的 Schema 字符串"""
         if not self.schema:
             return "No schema available.\n"
         formatted = ""
         for table in real_table_names:
             if table in self.schema:
                 formatted += f"Table: {table}\n"
                 cols_str = []
+                # 只顯示前 8 個關鍵欄位以節省內存
+                for col in self.schema[table][:8]:
                     col_name = col['name']
                     col_type = col['type']
+                    cols_str.append(f"{col_name} ({col_type})")
                 formatted += f"Columns: {', '.join(cols_str)}\n\n"
         return formatted.strip()
     def find_most_similar(self, question: str, top_k: int) -> List[Dict]:
         """使用 FAISS 快速檢索相似問題"""
         if self.faiss_index is None or self.dataset is None:
                 if len(results) >= top_k:
                     break
+                idx = int(idx)
+                if idx >= len(self.dataset):
                     continue
                 item = self.dataset[idx]
                 if not isinstance(item.get('messages'), list) or len(item['messages']) < 2:
                     continue
                 q_content = (item['messages'][0].get('content') or '').strip()
                 a_content = (item['messages'][1].get('content') or '').strip()
                 if not q_content or not a_content:
             return results
         except Exception as e:
+            self._log(f"檢索失敗: {e}", "ERROR")
             return []
     def _build_prompt(self, user_q: str, examples: List[Dict]) -> str:
+        """建立簡化的提示詞"""
         relevant_tables = self._identify_relevant_tables(user_q)
         schema_str = self._format_relevant_schema(relevant_tables)
         example_str = "No example available."
             best_example = examples[0]
             example_str = f"Question: {best_example['question']}\nSQL:\n```sql\n{best_example['sql']}\n```"
+        # 簡化的 prompt，減少 token 使用
+        prompt = f"""### TASK ###
+Generate SQLite query for the question below.
 ### SCHEMA ###
 {schema_str}
 ### EXAMPLE ###
 {example_str}
+### QUESTION ###
+{user_q}
+SQL:
 ```sql
 SELECT
 """
         return prompt
+    def _rule_based_sql(self, question: str) -> Optional[str]:
+        """規則先行：對常見查詢用模板直接生成 SQL，繞過 LLM。"""
+        q = (question or "").strip()
+        q_lower = q.lower()
+        # 兩年比較（完成數量、每月）
+        m = re.search(r"(20\d{2}).{0,6}(?:與|和|跟)\s*(20\d{2}).{0,10}(比較|對比).{0,10}(完成|報告|數量|件|工單)", q)
+        if m:
+            y1, y2 = m.group(1), m.group(2)
+            return (
+                "SELECT strftime('%Y-%m', completed_time) AS month, "
+                f"SUM(CASE WHEN strftime('%Y', completed_time)='{y1}' THEN 1 ELSE 0 END) AS count_{y1}, "
+                f"SUM(CASE WHEN strftime('%Y', completed_time)='{y2}' THEN 1 ELSE 0 END) AS count_{y2} "
+                "FROM jobtimeline "
+                f"WHERE strftime('%Y', completed_time) IN ('{y1}','{y2}') "
+                "GROUP BY month ORDER BY month;"
+            )
+        # 指定年份每月完成數量
+        m = re.search(r"(20\d{2})年.*每月.*(完成|報告|數量|件|工單)", q)
+        if m:
+            year = m.group(1)
+            return (
+                "SELECT strftime('%Y-%m', completed_time) AS month, COUNT(*) AS count "
+                "FROM jobtimeline "
+                f"WHERE strftime('%Y', completed_time)='{year}' "
+                "GROUP BY month ORDER BY month;"
+            )
+        # 評級分布（Pass/Fail）
+        if ("評級" in q) or ("pass" in q_lower) or ("fail" in q_lower):
+            return "SELECT rating, COUNT(*) AS count FROM tsr53sampledescription GROUP BY rating;"
+        # 金額最高 Top N（預設 10）
+        m = re.search(r"金額.*?(?:最高|前|top)\s*(\d+)?", q_lower)
+        if m:
+            n = m.group(1) or "10"
+            return f"SELECT * FROM tsr53invoice ORDER BY amount DESC LIMIT {n};"
+        # 客戶工作單數量最多 Top N
+        m = re.search(r"客戶.*?(?:最多|top|前)\s*(\d+)?", q_lower)
+        if m:
+            n = m.group(1) or "10"
+            return f"SELECT applicant, COUNT(*) AS count FROM tsr53sampledescription GROUP BY applicant ORDER BY count DESC LIMIT {n};"
+        # 昨天完成多少
+        if "昨天" in q:
+            return (
+                "SELECT COUNT(*) AS count FROM jobtimeline "
+                "WHERE date(completed_time)=date('now','-1 day');"
+            )
+        return None
+    def _finalize_sql(self, sql_text: str, status: str) -> Tuple[str, str]:
+        """最終整理 SQL：補分號、去除多餘空白並回傳 (sql, 狀態)。"""
+        try:
+            sql_clean = (sql_text or "").strip()
+            if sql_clean and not sql_clean.endswith(";"):
+                sql_clean += ";"
+            return sql_clean, status
+        except Exception as e:
+            self._log(f"最終整理 SQL 失敗: {e}", "ERROR")
+            return (sql_text or ""), status
+    def _validate_and_fix_sql(self, question: str, raw_response: str) -> Tuple[Optional[str], str]:
+        """
+        (V29 / 穩健正則 + 智能計數) 多層次 SQL 生成：
+        1) 嘗試規則/模板動態組合
+        2) 失敗則解析 AI 輸出並做方言/Schema 修正
+        回傳: (sql 或 None, 狀態描述)
+        """
+        q = question or ""
+        q_lower = q.lower()
+        # 先嘗試內建的規則先行器
+        rb = self._rule_based_sql(q)
+        if rb:
+            self._log("_validate_and_fix_sql 命中規則模板")
+            return self._finalize_sql(rb, "規則生成")
+        # 統一實體識別（簡化版）
+        entity_match_data = None
+        entity_patterns = [
+            {'pattern': r"(買家|买家|buyer)\s*(?:id|代號|代碼|代号|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.BuyerID', 'type': '買家ID'},
+            {'pattern': r"(申請方|申请方|申請廠商|申请厂商|applicant)\s*(?:id|代號|代碼|代号|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.ApplicantID', 'type': '申請方ID'},
+            {'pattern': r"(付款方|付款厂商|invoiceto)\s*(?:id|代號|代碼|代号|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.InvoiceToID', 'type': '付款方ID'},
+            {'pattern': r"(代理商|agent)\s*(?:id|代號|代碼|代号|代码)\s*'\"?\b([A-Z]\d{4}[A-Z])\b'\"?", 'column': 'sd.AgentID', 'type': '代理商ID'},
+            {'pattern': r"(買家|买家|buyer|客戶)\s+([a-zA-Z0-9&.-]+)", 'column': 'sd.BuyerName', 'type': '買家'},
+            {'pattern': r"(申請方|申请方|申請廠商|申请厂商|applicant)\s+([a-zA-Z0-9&.-]+)", 'column': 'sd.ApplicantName', 'type': '申請方'},
+            {'pattern': r"(付款方|付款厂商|invoiceto)\s+([a-zA-Z0-9&.-]+)", 'column': 'sd.InvoiceToName', 'type': '付款方'},
+            {'pattern': r"(代理商|agent)\s+([a-zA-Z0-9&.-]+)", 'column': 'sd.AgentName', 'type': '代理商'},
+            {'pattern': r"\b([A-Z]\d{4}[A-Z])\b", 'column': 'sd.ApplicantID', 'type': 'ID'}
+        ]
+        for p in entity_patterns:
+            m = re.search(p['pattern'], q, re.IGNORECASE)
+            if m:
+                entity_value = m.group(2) if len(m.groups()) > 1 else m.group(1)
+                entity_match_data = {"type": p['type'], "name": entity_value.strip().upper(), "column": p['column']}
+                break
+        # 模組化意圖偵測與動態 SQL 組合
+        intents: Dict[str, str] = {}
+        sql = {
+            'select': [], 'from': '', 'joins': [], 'where': [],
+            'group_by': [], 'order_by': [], 'log_parts': []
+        }
+        # 動作意圖：count / list
+        if any(kw in q_lower for kw in ['幾份', '多少', '數量', '總數', 'how many', 'count']):
+            intents['action'] = 'count'
+            if ("測試項目" in q) or ("test item" in q_lower):
+                sql['select'].append("COUNT(jip.ItemCode) AS item_count")
+                sql['log_parts'].append("測試項目總數")
+            else:
+                sql['select'].append("COUNT(DISTINCT jt.JobNo) AS report_count")
+                sql['log_parts'].append("報告總數")
+        elif any(kw in q_lower for kw in ['報告號碼', '報告清單', '列出報告', 'report number', 'list of reports']):
+            intents['action'] = 'list'
+            sql['select'].append("jt.JobNo, jt.ReportAuthorization")
+            sql['order_by'].append("jt.ReportAuthorization DESC")
+            sql['log_parts'].append("報告列表")
+        # 時間意圖：年/月
+        ym = re.search(r'(\d{4})\s*年?', q)
+        mm = re.search(r'(\d{1,2})\s*月', q)
+        if ym:
+            year = ym.group(1)
+            sql['where'].append(f"strftime('%Y', jt.ReportAuthorization) = '{year}'")
+            sql['log_parts'].append(f"{year}年")
+        if mm:
+            month = mm.group(1).zfill(2)
+            sql['where'].append(f"strftime('%m', jt.ReportAuthorization) = '{month}'")
+            sql['log_parts'].append(f"{month}月")
+        # 實體意圖
+        if entity_match_data:
+            if "TSR53SampleDescription" not in " ".join(sql['joins']):
+                sql['joins'].append("JOIN TSR53SampleDescription AS sd ON jt.JobNo = sd.JobNo")
+            entity_name, column_name = entity_match_data['name'], entity_match_data['column']
+            match_op = '=' if column_name.endswith('ID') else 'LIKE'
+            entity_val = f"'%{entity_name}%'" if match_op == 'LIKE' else f"'{entity_name}'"
+            sql['where'].append(f"{column_name} {match_op} {entity_val}")
+            sql['log_parts'].append(entity_match_data['type'] + ":" + entity_name)
+            if intents.get('action') == 'list':
+                sql['select'].append("sd.BuyerName")
+        # 評級意圖
+        if ('fail' in q_lower) or ('失敗' in q_lower):
+            if "TSR53SampleDescription" not in " ".join(sql['joins']):
+                sql['joins'].append("JOIN TSR53SampleDescription AS sd ON jt.JobNo = sd.JobNo")
+            sql['where'].append("sd.OverallRating = 'Fail'")
+            sql['log_parts'].append("Fail")
+        elif ('pass' in q_lower) or ('通過' in q_lower):
+            if "TSR53SampleDescription" not in " ".join(sql['joins']):
+                sql['joins'].append("JOIN TSR53SampleDescription AS sd ON jt.JobNo = sd.JobNo")
+            sql['where'].append("sd.OverallRating = 'Pass'")
+            sql['log_parts'].append("Pass")
+        # 實驗組 (LabGroup)
+        lab_group_mapping = {'A': 'TA', 'B': 'TB', 'C': 'TC', 'D': 'TD', 'E': 'TE', 'Y': 'TY'}
+        lgm = re.search(r'([A-Z]{1,2})組', q, re.IGNORECASE)
+        if lgm:
+            user_group = lgm.group(1).upper()
+            db_group = lab_group_mapping.get(user_group, user_group)
+            sql['joins'].append("JOIN JobItemsInProgress AS jip ON jt.JobNo = jip.JobNo")
+            sql['where'].append(f"jip.LabGroup = '{db_group}'")
+            sql['log_parts'].append(f"{user_group}組(->{db_group})")
+        # 若動作已決定，組裝模板 SQL
+        if 'action' in intents:
+            sql['from'] = "FROM JobTimeline AS jt"
+            if sql['where']:
+                sql['where'].insert(0, "jt.ReportAuthorization IS NOT NULL")
+            select_clause = "SELECT " + ", ".join(sorted(list(set(sql['select'])))) if sql['select'] else "SELECT *"
+            from_clause = sql['from']
+            joins_clause = " ".join(sql['joins'])
+            where_clause = ("WHERE " + " AND ".join(sql['where'])) if sql['where'] else ""
+            orderby_clause = ("ORDER BY " + ", ".join(sql['order_by'])) if sql['order_by'] else ""
+            template_sql = f"{select_clause} {from_clause} {joins_clause} {where_clause} {orderby_clause};"
+            query_log = " ".join(sql['log_parts'])
+            self._log(f"🔄 偵測到組合意圖【{query_log}】，啟用動態模板。")
+            return self._finalize_sql(template_sql, f"模板覆寫: {query_log} 查詢")
+        # 第二層：解析 AI 輸出並修正
+        self._log("未觸發任何模板，嘗試解析並修正 AI 輸出…")
+        parsed_sql = parse_sql_from_response(raw_response)
+        if not parsed_sql:
+            self._log(f"❌ 未能從模型回應中解析出任何 SQL。原始回應: {raw_response}", "ERROR")
+            return None, f"無法解析SQL。原始回應:\n{raw_response}"
+        self._log(f"📊 解析出的原始 SQL: {parsed_sql}", "DEBUG")
+        fixed_sql = " " + parsed_sql.strip() + " "
+        fixes_applied = []
+        # 方言修正
+        dialect_corrections = {r'YEAR\s*\(([^)]+)\)': r"strftime('%Y', \1)"}
+        for pat, rep in dialect_corrections.items():
+            if re.search(pat, fixed_sql, re.IGNORECASE):
+                fixed_sql = re.sub(pat, rep, fixed_sql, flags=re.IGNORECASE)
+                fixes_applied.append(f"修正方言: {pat}")
+        # Schema 名稱修正（常見別名 => 真實欄位）
+        schema_map = {
+            'TSR53Report':'TSR53SampleDescription',
+            'TSR53InvoiceReportNo':'JobNo',
+            'TSR53ReportNo':'JobNo',
+            'TSR53InvoiceNo':'JobNo',
+            'TSR53InvoiceCreditNoteNo':'InvoiceCreditNoteNo',
+            'TSR53InvoiceLocalAmount':'LocalAmount',
+            'Status':'OverallRating',
+            'ReportStatus':'OverallRating'
+        }
+        for wrong, correct in schema_map.items():
+            pat = r'\b' + re.escape(wrong) + r'\b'
+            if re.search(pat, fixed_sql, re.IGNORECASE):
+                fixed_sql = re.sub(pat, correct, fixed_sql, flags=re.IGNORECASE)
+                fixes_applied.append(f"映射 Schema: '{wrong}' -> '{correct}'")
+        status = "AI 生成並成功修正" if fixes_applied else "AI 生成且無需修正"
+        return self._finalize_sql(fixed_sql, status)
     def _generate_fallback_sql(self, prompt: str) -> str:
         """當模型不可用時的備用 SQL 生成"""
         prompt_lower = prompt.lower()
         if "統計" in prompt or "數量" in prompt or "多少" in prompt:
             if "月" in prompt:
                 return "SELECT strftime('%Y-%m', completed_time) as month, COUNT(*) as count FROM jobtimeline GROUP BY month ORDER BY month;"
                 return "SELECT applicant, COUNT(*) as count FROM tsr53sampledescription GROUP BY applicant ORDER BY count DESC;"
             else:
                 return "SELECT COUNT(*) as total_count FROM jobtimeline WHERE completed_time IS NOT NULL;"
         elif "金額" in prompt or "總額" in prompt:
             return "SELECT SUM(amount) as total_amount FROM tsr53invoice;"
         elif "評級" in prompt or "pass" in prompt_lower or "fail" in prompt_lower:
             return "SELECT rating, COUNT(*) as count FROM tsr53sampledescription GROUP BY rating;"
         else:
             return "SELECT * FROM jobtimeline LIMIT 10;"
     def process_question(self, question: str) -> Tuple[str, str]:
+        """處理使用者問題"""
         # 檢查緩存
         if question in self.query_cache:
+            self._log("使用緩存結果")
             return self.query_cache[question]
         self.log_history = []
+        self._log(f"處理問題: {question}")
+        self._log(check_memory_usage())
+        # 0. 規則先行（命中則直接返回）
+        rb = self._rule_based_sql(question)
+        if rb:
+            self._log("規則命中，直接生成 SQL（跳過 LLM）")
+            self._log(f"最終 SQL: {rb}")
+            result = (rb, "規則生成")
+            self.query_cache[question] = result
+            gc.collect()
+            return result
+        # 1. 檢索相似範例
+        self._log("尋找相似範例...")
+        examples = self.find_most_similar(question, FEW_SHOT_EXAMPLES_COUNT)
+        if examples:
+            self._log(f"找到 {len(examples)} 個相似範例")
+        # 2. 建立提示詞
+        self._log("建立 Prompt...")
+        prompt = self._build_prompt(question, examples)
+        # 3. 生成 AI 回應
+        self._log("開始生成 AI 回應...")
+        response = self.huggingface_api_call(prompt)
+        # 4. 驗證/修正 SQL
+        fixed_sql, status_message = self._validate_and_fix_sql(question, response)
+        if not fixed_sql:
+            fixed_sql = "SELECT '未能生成有效的SQL，請嘗試換個問題描述';"
+            status_message = status_message or "生成失敗"
+        self._log(f"最終 SQL: {fixed_sql}")
+        result = (fixed_sql, status_message)
+        # 緩存結果
+        self.query_cache[question] = result
+        # 清理內存
+        gc.collect()
+        return result
+# ==================== Gradio 介面與 API ====================
+print("正在初始化 Text-to-SQL 系統...")
 text_to_sql_system = TextToSQLSystem()
+def process_query(q: str, prompt_override: str = ""):
+    if not (q or prompt_override).strip():
+        return "", "等待輸入", "請輸入問題或提供 prompt_override"
+    # 若提供 prompt_override：
+    if prompt_override and prompt_override.strip():
+        po = prompt_override.strip()
+        # 如果 override 本身就是 SQL，直接回傳
+        if po.upper().startswith("SELECT"):
+            if not po.strip().endswith(";"):
+                po = po.strip() + ";"
+            text_to_sql_system._log("使用 prompt_override 直接回傳 SQL")
+            logs = "\n".join(text_to_sql_system.log_history[-15:])
+            return po, "override", logs
+        # 否則當作完整 prompt 丟給 LLM
+        text_to_sql_system._log("使用 prompt_override 直接調用 LLM")
+        response = text_to_sql_system.huggingface_api_call(po)
+        fixed_sql, status_message = text_to_sql_system._validate_and_fix_sql(q or "", response)
+        if not fixed_sql:
+            fixed_sql = text_to_sql_system._generate_fallback_sql(po)
+            status_message = status_message or "override 回退"
+        text_to_sql_system._log(f"最終 SQL: {fixed_sql}")
+        logs = "\n".join(text_to_sql_system.log_history[-15:])
+        return fixed_sql, "override", logs
     sql, status = text_to_sql_system.process_question(q)
+    logs = "\n".join(text_to_sql_system.log_history[-15:])  # 顯示最後15條日誌
     return sql, status, logs
 # 範例問題
     "A組昨天完成了多少個測試項目？"
 ]
+with gr.Blocks(theme=gr.themes.Soft(), title="Text-to-SQL 智能助手 (HF Space)") as demo:
+    gr.Markdown("# Text-to-SQL 智能助手 (Hugging Face Space)")
+    gr.Markdown("輸入自然語言問題，自動生成SQL查詢語句。使用 /tmp 暫存，每次啟動重新下載模型。支援桌面端透過 /predict API 呼叫。")
     with gr.Row():
         with gr.Column(scale=2):
+            inp = gr.Textbox(lines=3, label="您的問題", placeholder="例如：2024年每月完成多少份報告？")
+            btn = gr.Button("生成 SQL", variant="primary")
             status = gr.Textbox(label="狀態", interactive=False)
+            # 隱藏的 prompt_override 供桌面端呼叫
+            prompt_override = gr.Textbox(label="prompt_override", visible=False)
         with gr.Column(scale=3):
+            sql_out = gr.Code(label="生成的 SQL", language="sql", lines=8)
+    with gr.Accordion("處理日誌", open=False):
+        logs = gr.Textbox(lines=10, label="日誌", interactive=False)
     # 範例區
     gr.Examples(
         examples=examples,
         inputs=inp,
+        label="點擊試用範例問題"
     )
     # 綁定事件
+    btn.click(process_query, inputs=[inp, prompt_override], outputs=[sql_out, status, logs], api_name="/predict")
+    inp.submit(process_query, inputs=[inp, prompt_override], outputs=[sql_out, status, logs])
 if __name__ == "__main__":
     demo.launch(
         server_name="0.0.0.0",
         server_port=7860,
+        share=True,
+        show_error=True
     )