Spaces:

Paul720810
/

Softline-SQL-Assistant

Sleeping

App Files Files Community

Paul720810 commited on Sep 3, 2025

Commit

7adb5ab

verified ·

1 Parent(s): 0481392

Update app.py

Browse files

Files changed (1) hide show

app.py +260 -827

app.py CHANGED Viewed

@@ -12,9 +12,15 @@ from typing import List, Dict, Tuple, Optional
 import numpy as np
 # ==================== 配置區 ====================
-HF_TOKEN = os.environ.get("HF_TOKEN", None) # 建議從環境變數讀取
 DATASET_REPO_ID = "Paul720810/Text-to-SQL-Softline"
-SIMILARITY_THRESHOLD = 0.65 # 適度提高閾值，確保檢索到的問題意圖更一致
 # 雲端環境檢測
 IS_SPACES = os.environ.get("SPACE_ID") is not None
@@ -30,831 +36,271 @@ print("=" * 60)
 # ==================== 獨立工具函數 (不依賴類別實例) ====================
 def get_current_time():
     """獲取當前時間字串"""
-    return datetime.now().strftime("%H:%M:%S")
-def validate_sql(sql_query: str) -> Dict:
-    """驗證SQL語句的語法和安全性"""
-    if not sql_query or not sql_query.strip():
-        return {"valid": False, "issues": ["SQL語句為空"], "is_safe": False, "empty": True}
-    sql_clean = sql_query.strip()
-    if len(sql_clean) < 5:
-        return {"valid": False, "issues": ["SQL過短"], "is_safe": False, "empty": True}
-    security_issues = []
-    sql_upper = sql_clean.upper()
-    dangerous_keywords = ['DROP', 'DELETE', 'INSERT', 'UPDATE', 'ALTER', 'TRUNCATE', 'EXEC', 'EXECUTE']
-    for keyword in dangerous_keywords:
-        if f" {keyword} " in f" {sql_upper} ":
-            security_issues.append(f"危險操作: {keyword}")
-    if "SELECT" not in sql_upper:
-        security_issues.append("缺少SELECT")
-    if "FROM" not in sql_upper:
-        security_issues.append("缺少FROM")
-    is_valid = not security_issues
-    is_safe = all('危險' not in issue for issue in security_issues)
-    return {"valid": is_valid, "issues": security_issues, "is_safe": is_safe, "empty": False}
-def analyze_question_type(question: str) -> Dict:
-    """增強的問題分析 - 更精確的意圖識別"""
-    question_lower = question.lower()
-    analysis = {
-        "type": "unknown",
-        "keywords": [],
-        "has_count": "多少" in question_lower or "幾個" in question_lower or "數量" in question_lower or "count" in question_lower,
-        "has_date": "時間" in question_lower or "日期" in question_lower or "月份" in question_lower or "年" in question_lower or "yesterday" in question_lower or "昨天" in question_lower,
-        "has_group": "每" in question_lower or "各" in question_lower or "分組" in question_lower or "group" in question_lower,
-        "specific_intent": "general_query"  # 新增：具體意圖，預設為通用查詢
-    }
-    # **更精確的意圖識別 - 增加更多模式**
-    if ("每月" in question_lower or "monthly" in question_lower) and ("完成" in question_lower or "completed" in question_lower or "報告" in question_lower or "工作單" in question_lower):
-        analysis["specific_intent"] = "monthly_completion_count"
-        analysis["type"] = "time_series"
-    elif ("評級" in question_lower or "pass" in question_lower or "fail" in question_lower or "rating" in question_lower) and ("統計" in question_lower or "分佈" in question_lower or "多少" in question_lower or "distribution" in question_lower):
-        analysis["specific_intent"] = "rating_distribution"
-        analysis["type"] = "statistics"
-    elif ("金額" in question_lower or "amount" in question_lower or "價格" in question_lower or "費用" in question_lower) and ("最高" in question_lower or "top" in question_lower or "排名" in question_lower or "highest" in question_lower):
-        analysis["specific_intent"] = "amount_ranking"
-        analysis["type"] = "ranking"
-    elif ("公司" in question_lower or "客戶" in question_lower or "申請方" in question_lower or "company" in question_lower or "client" in question_lower) and ("統計" in question_lower or "數量" in question_lower or "排名" in question_lower or "count" in question_lower):
-        analysis["specific_intent"] = "company_statistics"
-        analysis["type"] = "statistics"
-    elif ("實驗室" in question_lower or "lab" in question_lower or "組" in question_lower) and ("完成" in question_lower or "completed" in question_lower):
-        analysis["specific_intent"] = "lab_completion"
-        analysis["type"] = "lab_specific"
-    elif ("異常" in question_lower or "超過" in question_lower or "延遲" in question_lower or "slow" in question_lower or "long" in question_lower):
-        analysis["specific_intent"] = "anomaly_detection"
-        analysis["type"] = "analysis"
-    elif ("買方" in question_lower or "buyer" in question_lower) and ("完成" in question_lower or "completed" in question_lower):
-        analysis["specific_intent"] = "buyer_specific"
-        analysis["type"] = "buyer_analysis"
-    elif ("耗時" in question_lower or "時間" in question_lower or "duration" in question_lower or "time" in question_lower) and ("最久" in question_lower or "longest" in question_lower):
-        analysis["specific_intent"] = "duration_analysis"
-        analysis["type"] = "time_analysis"
-    # 提取關鍵詞以供後續使用
-    keywords = []
-    # 公司/品牌名稱
-    brand_patterns = [r"puma", r"under armour", r"skechers", r"nike", r"adidas"]
-    for pattern in brand_patterns:
-        if re.search(pattern, question_lower):
-            keywords.append(pattern.replace(" ", "_"))
-    # 實驗室組別
-    lab_patterns = [r"[a-e]組", r"ta", r"tb", r"tc", r"td", r"te"]
-    for pattern in lab_patterns:
-        if re.search(pattern, question_lower):
-            keywords.append(pattern)
-    analysis["keywords"] = keywords
-    return analysis
-# ==================== 完整數據加載模塊 ====================
-class CompleteDataLoader:
-    def __init__(self, hf_token: str):
-        self.hf_token = hf_token
-        self.questions = []
-        self.sql_answers = []
-        self.sql_quality = []
-        self.schema_data = {}
-    def preview_dataset_structure(self, sample_size: int = 5) -> None:
-        """預覽數據集結構以幫助調試"""
-        try:
-            print(f"📋 預覽數據集結構 (前 {sample_size} 個範例)...")
-            raw_dataset = load_dataset(DATASET_REPO_ID, token=self.hf_token)['train']
-            for i in range(min(sample_size, len(raw_dataset))):
-                item = raw_dataset[i]
-                print(f"\n--- 範例 {i+1} ---")
-                if 'messages' in item:
-                    user_content = item['messages'][0]['content']
-                    assistant_content = item['messages'][1]['content']
-                    print(f"User: {user_content[:120]}...")
-                    print(f"Assistant: {assistant_content[:120]}...")
-                    # 檢查SQL代碼塊
-                    sql_block_match = re.search(r'```sql\s*(.*?)\s*```', assistant_content, re.DOTALL)
-                    if sql_block_match:
-                        sql_content = sql_block_match.group(1).strip()
-                        print(f"✅ 找到SQL代碼塊: {sql_content[:60]}...")
-                    else:
-                        print("❌ 未找到SQL代碼塊")
-                        # 檢查是否有其他SQL格式
-                        if 'SELECT' in assistant_content.upper():
-                            print("⚠️ 但包含SELECT關鍵字")
-                        if 'SQL查詢:' in assistant_content:
-                            print("⚠️ 但包含'SQL查詢:'標記")
-                    # 檢查是否為JSON格式
-                    if assistant_content.strip().startswith('{'):
-                        try:
-                            json_data = json.loads(assistant_content)
-                            print(f"JSON Keys: {list(json_data.keys())}")
-                        except:
-                            print("JSON解析失敗")
-                else:
-                    print(f"無messages字段: {list(item.keys())}")
-            print(f"\n總數據量: {len(raw_dataset)} 項")
-        except Exception as e:
-            print(f"預覽失敗: {e}")
-    def diagnose_data_issues(self, sample_size: int = 20) -> None:
-        """診斷數據問題"""
         try:
-            print(f"🔍 診斷數據問題 (檢查前 {sample_size} 個可能有問題的項目)...")
-            raw_dataset = load_dataset(DATASET_REPO_ID, token=self.hf_token)['train']
-            issues_found = {"no_sql_block": 0, "empty_assistant": 0, "parsing_error": 0, "other": 0}
-            for i in range(min(sample_size, len(raw_dataset))):
-                item = raw_dataset[i]
-                try:
-                    if 'messages' in item and len(item['messages']) >= 2:
-                        assistant_content = item['messages'][1]['content']
-                        # 檢查SQL代碼塊
-                        sql_block_match = re.search(r'```sql\s*(.*?)\s*```', assistant_content, re.DOTALL)
-                        if not sql_block_match:
-                            issues_found["no_sql_block"] += 1
-                            if issues_found["no_sql_block"] <= 3:
-                                print(f"\n❌ 無SQL代碼塊 #{i}: {assistant_content[:200]}...")
-                        if not assistant_content.strip():
-                            issues_found["empty_assistant"] += 1
-                except Exception as e:
-                    issues_found["parsing_error"] += 1
-                    if issues_found["parsing_error"] <= 2:
-                        print(f"\n💥 解析錯誤 #{i}: {e}")
-            print(f"\n📊 診斷結果:")
-            for issue, count in issues_found.items():
-                print(f"  {issue}: {count}")
         except Exception as e:
-            print(f"診斷失敗: {e}")
-    def load_complete_dataset(self) -> bool:
-        try:
-            print(f"[{get_current_time()}] 正在加載完整數據集 '{DATASET_REPO_ID}'...")
-            raw_dataset = load_dataset(DATASET_REPO_ID, token=self.hf_token)['train']
-            successful_loads = 0
-            total_items = len(raw_dataset)
-            skipped_reasons = {"empty_question": 0, "empty_sql": 0, "parse_error": 0, "invalid_format": 0, "json_parse_error": 0}
-            for idx, item in enumerate(raw_dataset):
-                try:
-                    if 'messages' in item and len(item['messages']) >= 2:
-                        user_content = item['messages'][0]['content']
-                        assistant_content = item['messages'][1]['content']
-                        # 多種問題提取策略
-                        question = None
-                        # 策略1: 檢查是否為JSON格式的回應
-                        try:
-                            if assistant_content.strip().startswith('{'):
-                                json_data = json.loads(assistant_content)
-                                if 'sql' in json_data:
-                                    sql_query = json_data['sql']
-                                elif 'query' in json_data:
-                                    sql_query = json_data['query']
-                                else:
-                                    sql_query = None
-                                # 從JSON中提取問題 (如果有的話)
-                                if 'question' in json_data:
-                                    question = json_data['question']
-                                elif 'user_query' in json_data:
-                                    question = json_data['user_query']
-                            else:
-                                sql_query = None
-                        except json.JSONDecodeError:
-                            sql_query = None
-                        # 策略2: 標準「指令:」格式
-                        if not question:
-                            question_match = re.search(r'指令:\s*(.*?)(?:\n|$)', user_content)
-                            if question_match:
-                                question = question_match.group(1).strip()
-                        # 策略3: 如果沒找到，嘗試提取最後一行非空內容
-                        if not question:
-                            lines = [line.strip() for line in user_content.split('\n') if line.strip() and not line.startswith('#')]
-                            if lines:
-                                # 過濾掉看起來像標題的行
-                                for line in reversed(lines):
-                                    if not line.startswith('###') and '?' in line and len(line) > 5:
-                                        question = line
-                                        break
-                                if not question and lines:
-                                    question = lines[-1]
-                        # 策略4: 直接使用整個內容（作為最後手段）
-                        if not question:
-                            question = user_content.strip()
-                        # SQL提取邏輯（如果還沒從JSON中獲得）
-                        if not sql_query:
-                            # 策略1: SQL代碼塊格式（最常見）
-                            sql_block_match = re.search(r'```sql\s*(.*?)\s*```', assistant_content, re.DOTALL)
-                            if sql_block_match:
-                                sql_query = sql_block_match.group(1).strip()
-                            # 策略2: 標準「SQL查詢:」格式
-                            if not sql_query:
-                                sql_match = re.search(r'SQL查詢:\s*(.*?)(?:\n\n|$)', assistant_content, re.DOTALL)
-                                if sql_match:
-                                    sql_query = sql_match.group(1).strip()
-                                    # 清理可能的代碼塊標記
-                                    sql_query = re.sub(r'```sql|```', '', sql_query).strip()
-                            # 策略3: 查找任何包含 SELECT 或 WITH 的多行內容
-                            if not sql_query:
-                                lines = assistant_content.split('\n')
-                                sql_lines = []
-                                in_sql_block = False
-                                for line in lines:
-                                    line_upper = line.upper().strip()
-                                    # 開始條件：找到SQL關鍵字
-                                    if not in_sql_block and (line_upper.startswith('SELECT') or line_upper.startswith('WITH')):
-                                        in_sql_block = True
-                                        sql_lines.append(line)
-                                    # 繼續條件：在SQL塊中
-                                    elif in_sql_block:
-                                        # 結束條件：空行或看起來不像SQL的行
-                                        if not line.strip():
-                                            break
-                                        elif line.strip().startswith('```') and len(sql_lines) > 0:
-                                            break
-                                        elif line_upper.startswith('思考過程:') or line_upper.startswith('上下文:'):
-                                            break
-                                        else:
-                                            sql_lines.append(line)
-                                if sql_lines:
-                                    sql_query = '\n'.join(sql_lines).strip()
-                            # 策略4: 如果還是沒找到，嘗試更寬鬆的匹配
-                            if not sql_query:
-                                # 查找所有可能的SQL片段
-                                sql_patterns = [
-                                    r'(SELECT.*?FROM.*?)(?:\n\n|$)',
-                                    r'(WITH.*?SELECT.*?)(?:\n\n|$)',
-                                    r'SQL查詢:\s*\n(.*?)(?:\n\n|$)'
-                                ]
-                                for pattern in sql_patterns:
-                                    match = re.search(pattern, assistant_content, re.DOTALL | re.IGNORECASE)
-                                    if match:
-                                        candidate = match.group(1).strip()
-                                        # 基本驗證
-                                        if len(candidate) > 10 and ('SELECT' in candidate.upper() or 'WITH' in candidate.upper()):
-                                            sql_query = candidate
-                                            break
-                        # 清理SQL查詢
-                        if sql_query:
-                            # 移除各種標記
-                            sql_query = re.sub(r'```sql|```', '', sql_query).strip()
-                            sql_query = re.sub(r'^思考過程:.*?\n', '', sql_query, flags=re.MULTILINE).strip()
-                            sql_query = re.sub(r'^SQL查詢:\s*', '', sql_query, flags=re.MULTILINE).strip()
-                            # 移除多餘的空行
-                            sql_query = re.sub(r'\n\s*\n', '\n', sql_query).strip()
-                            # 確保SQL完整性 - 如果以分號結尾且內容合理，保留
-                            if not sql_query.endswith(';') and len(sql_query) > 20:
-                                # 檢查是否看起來像完整的SQL
-                                if 'FROM' in sql_query.upper() and sql_query.count('(') == sql_query.count(')'):
-                                    sql_query += ';'
-                        # 清理問題文本
-                        if question:
-                            question = re.sub(r'^###\s*', '', question).strip()
-                            question = re.sub(r'Your JSON Response.*', '', question).strip()
-                            # 移除多餘的上下文���息
-                            question = re.sub(r'\n上下文:.*', '', question, flags=re.DOTALL).strip()
-                        # 數據質量驗證（降低標準以提高利用率）
-                        if not question or len(question.strip()) < 3:
-                            skipped_reasons["empty_question"] += 1
-                            continue
-                        if not sql_query or len(sql_query.strip()) < 8:  # 進一步降低最小長度要求
-                            skipped_reasons["empty_sql"] += 1
-                            if idx < 10:  # 調試：顯示前10個被跳過的SQL為空的案例
-                                print(f"SQL為空案例 {idx}: 原始助手回應前100字符: {assistant_content[:100]}...")
-                            continue
-                        # 更寬鬆的SQL驗證
-                        sql_upper = sql_query.upper()
-                        if "SELECT" not in sql_upper and "WITH" not in sql_upper and "CREATE" not in sql_upper:
-                            skipped_reasons["invalid_format"] += 1
-                            if idx < 5:  # 調試：顯示前5個格式錯誤的案例
-                                print(f"格式錯誤案例 {idx}: SQL內容: {sql_query[:100]}...")
-                            continue
-                        self.questions.append(question)
-                        self.sql_answers.append(sql_query)
-                        successful_loads += 1
-                        # 調試：顯示前5個成功案例
-                        if successful_loads <= 5:
-                            print(f"✅ 成功案例 {successful_loads}:")
-                            print(f"  問題: {question[:80]}...")
-                            print(f"  SQL: {sql_query[:80]}...")
-                    else:
-                        skipped_reasons["invalid_format"] += 1
-                except json.JSONDecodeError as e:
-                    skipped_reasons["json_parse_error"] += 1
-                    continue
-                except Exception as e:
-                    skipped_reasons["parse_error"] += 1
-                    if idx < 3:  # 只顯示前3個錯誤
-                        print(f"跳過第 {idx} 項資料，錯誤: {e}")
-                    continue
-            print(f"數據加載完成: 成功載入 {successful_loads}/{total_items} 項")
-            print(f"跳過原因統計: 問題為空({skipped_reasons['empty_question']}) | SQL為空({skipped_reasons['empty_sql']}) | 格式錯誤({skipped_reasons['invalid_format']}) | JSON錯誤({skipped_reasons['json_parse_error']}) | 解析錯誤({skipped_reasons['parse_error']})")
-            return successful_loads > 0
-        except Exception as e:
-            print(f"數據集加載失敗: {e}")
-            return False
-    def load_schema(self) -> bool:
         try:
-            schema_file_path = hf_hub_download(repo_id=DATASET_REPO_ID, filename="sqlite_schema_FULL.json", repo_type='dataset', token=self.hf_token)
-            with open(schema_file_path, 'r', encoding='utf-8') as f:
-                self.schema_data = json.load(f)
-            print("Schema加載成功")
-            return True
         except Exception as e:
-            print(f"Schema加載失敗: {e}")
-            return False
-# ==================== 檢索系統 ====================
-class RetrievalSystem:
-    def __init__(self):
-        try:
-            # 根據環境選擇設備
-            device = DEVICE if 'DEVICE' in globals() else 'cpu'
-            print(f"🔧 初始化 SentenceTransformer (設備: {device})...")
-            self.embedder = SentenceTransformer('all-MiniLM-L6-v2', device=device)
-            self.question_embeddings = None
-            print("✅ SentenceTransformer 模型加載成功")
-        except Exception as e:
-            print(f"❌ SentenceTransformer 模型加載失敗: {e}")
-            self.embedder = None
-    def compute_embeddings(self, questions: List[str]):
-        if self.embedder and questions:
-            print(f"正在為 {len(questions)} 個問題計算向量...")
-            try:
-                # 雲端環境優化：分批處理以節省記憶體
-                batch_size = 32 if IS_SPACES else 64
-                self.question_embeddings = self.embedder.encode(
-                    questions,
-                    convert_to_tensor=True,
-                    show_progress_bar=True,
-                    batch_size=batch_size
-                )
-                print("向量計算完成")
-            except Exception as e:
-                print(f"向量計算失敗: {e}")
-                # 降級處理：使用更小的批次大小
-                try:
-                    print("嘗試使用較小批次大小重新計算...")
-                    self.question_embeddings = self.embedder.encode(
-                        questions,
-                        convert_to_tensor=True,
-                        show_progress_bar=True,
-                        batch_size=16
-                    )
-                    print("向量計算完成（降級模式）")
-                except Exception as e2:
-                    print(f"向量計算徹底失敗: {e2}")
-                    self.question_embeddings = None
-    def retrieve_similar(self, user_question: str, top_k: int = 1) -> List[Dict]:
-        if self.embedder is None or self.question_embeddings is None: return []
-        try:
-            question_embedding = self.embedder.encode(user_question, convert_to_tensor=True)
-            hits = util.semantic_search(question_embedding, self.question_embeddings, top_k=top_k)
-            return hits[0] if hits else []
-        except Exception as e:
-            print(f"檢索錯誤: {e}")
             return []
-# ==================== 主系統 ====================
-class CompleteTextToSQLSystem:
-    def __init__(self, hf_token: str):
-        self.hf_token = hf_token
-        self.data_loader = CompleteDataLoader(hf_token)
-        self.retrieval_system = RetrievalSystem()
-        self.initialize_system()
-    def diagnose_data_issues(self, sample_size: int = 20) -> None:
-        """診斷數據問題"""
-        try:
-            print(f"🔍 診斷數據問題 (檢查前 {sample_size} 個可能有問題的項目)...")
-            raw_dataset = load_dataset(DATASET_REPO_ID, token=self.hf_token)['train']
-            issues_found = {"no_sql_block": 0, "empty_assistant": 0, "parsing_error": 0, "other": 0}
-            for i in range(min(sample_size, len(raw_dataset))):
-                item = raw_dataset[i]
-                try:
-                    if 'messages' in item and len(item['messages']) >= 2:
-                        assistant_content = item['messages'][1]['content']
-                        # 檢查SQL代碼塊
-                        sql_block_match = re.search(r'```sql\s*(.*?)\s*```', assistant_content, re.DOTALL)
-                        if not sql_block_match:
-                            issues_found["no_sql_block"] += 1
-                            if issues_found["no_sql_block"] <= 3:
-                                print(f"\n❌ 無SQL代碼塊 #{i}: {assistant_content[:200]}...")
-                        if not assistant_content.strip():
-                            issues_found["empty_assistant"] += 1
-                except Exception as e:
-                    issues_found["parsing_error"] += 1
-                    if issues_found["parsing_error"] <= 2:
-                        print(f"\n💥 解析錯誤 #{i}: {e}")
-            print(f"\n📊 診斷結果:")
-            for issue, count in issues_found.items():
-                print(f"  {issue}: {count}")
-        except Exception as e:
-            print(f"診斷失敗: {e}")
-    def initialize_system(self):
-        print("正在初始化完整數據系統...")
-        # 首先預覽數據結構
-        self.data_loader.preview_dataset_structure(3)
-        # 診斷數據問題
-        self.data_loader.diagnose_data_issues(10)
-        # 然後加載數據
-        self.data_loader.load_complete_dataset()
-        self.data_loader.load_schema()
-        if self.data_loader.questions:
-            self.retrieval_system.compute_embeddings(self.data_loader.questions)
-        print(f"系統初始化完成，載入問題總數: {len(self.data_loader.questions)}")
-    def extract_year(self, text: str) -> str:
-        """從文字中提取年份，若無則返回當年"""
-        year_match = re.search(r'(\d{4})', text)
-        return year_match.group(1) if year_match else datetime.now().strftime('%Y')
-    def call_free_cloud_ai(self, user_question: str) -> str:
-        """調用免費雲端AI生成SQL - 當本地方法無法處理時的備選方案"""
         try:
-            # 構建包含schema的prompt
-            schema_info = json.dumps(self.data_loader.schema_data, ensure_ascii=False, indent=2)
-            prompt = f"""你是一個SQL專家。根據以下資料庫schema和用戶問題，生成準確的SQL查詢。
-資料庫Schema:
-{schema_info}
-用戶問題: {user_question}
-請分析問題並生成對應的SQL查詢。只回傳SQL代碼，不要額外解釋。
-SQL查詢:"""
-            # 使用 Hugging Face 免費 Inference API
-            headers = {"Authorization": f"Bearer {self.hf_token}"} if self.hf_token else {}
-            # 嘗試多個免費模型
-            models_to_try = [
-                "microsoft/DialoGPT-medium",  # 對話模型
-                "google/flan-t5-large",       # 指令跟隨模型
-                "bigscience/bloom-560m"       # 通用生成模型
-            ]
-            for model in models_to_try:
-                try:
-                    url = f"https://api-inference.huggingface.co/models/{model}"
-                    response = requests.post(
-                        url,
-                        headers=headers,
-                        json={"inputs": prompt, "parameters": {"max_length": 512, "temperature": 0.1}},
-                        timeout=30
-                    )
-                    if response.status_code == 200:
-                        result = response.json()
-                        if isinstance(result, list) and len(result) > 0:
-                            generated_text = result[0].get('generated_text', '')
-                            # 提取SQL部分
-                            sql_match = re.search(r'SELECT.*?;', generated_text, re.DOTALL | re.IGNORECASE)
-                            if sql_match:
-                                return f"-- 由免費雲端AI ({model}) 生成\n{sql_match.group(0)}"
-                except Exception as e:
-                    print(f"模型 {model} 調用失敗: {e}")
-                    continue
-            # 如果所有模型都失敗，返回基於意圖的本地生成
-            return self.generate_fallback_sql(user_question)
-        except Exception as e:
-            print(f"雲端AI調用失敗: {e}")
-            return self.generate_fallback_sql(user_question)
-    def generate_fallback_sql(self, user_question: str) -> str:
-        """當所有方法都失敗時的後備SQL生成"""
-        analysis = analyze_question_type(user_question)
-        # 基於關鍵詞的簡單SQL生成
-        question_lower = user_question.lower()
-        if "工作單" in question_lower or "job" in question_lower:
-            if "數量" in question_lower or "多少" in question_lower:
-                return """-- 後備方案：工作單數量查詢
-SELECT COUNT(*) as 工作單總數
-FROM TSR53SampleDescription
-WHERE ApplicantName IS NOT NULL;"""
-            else:
-                return """-- 後備方案：工作單列表查詢
-SELECT JobNo, ApplicantName, BuyerName, OverallRating
-FROM TSR53SampleDescription
-WHERE ApplicantName IS NOT NULL
-LIMIT 20;"""
-        elif "評級" in question_lower or "rating" in question_lower:
-            return """-- 後備方案：評級統計查詢
-SELECT OverallRating, COUNT(*) as 數量
-FROM TSR53SampleDescription
-WHERE OverallRating IS NOT NULL
-GROUP BY OverallRating;"""
-        elif "金額" in question_lower or "amount" in question_lower:
-            return """-- 後備方案：金額統計查詢
-SELECT JobNo, LocalAmount
-FROM TSR53Invoice
-WHERE LocalAmount IS NOT NULL
-ORDER BY LocalAmount DESC
-LIMIT 10;"""
-        # 默認通用查詢
-        return """-- 後備方案：通用查詢
-SELECT JobNo, ApplicantName, BuyerName
-FROM TSR53SampleDescription
-LIMIT 10;"""
-    def intelligent_repair_sql(self, user_question: str, similar_question: str) -> str:
-        """智能修復SQL - 基於當前使用者問題的意圖 (擴展版本)"""
-        analysis = analyze_question_type(user_question)
-        intent = analysis["specific_intent"]
-        keywords = analysis["keywords"]
-        if similar_question != "無相似問題":
-            comment = f"-- 根據類似問題 '{similar_question}' (原SQL無效) 進行智能修復\n"
         else:
-            comment = f"-- 根據問題意圖 '{intent}' 智能生成SQL\n"
-        if intent == "monthly_completion_count":
-            year = self.extract_year(user_question)
-            return comment + f"""-- 查詢 {year} 年每月完成的工作單數量
-SELECT
-    strftime('%Y-%m', jt.ReportAuthorization) as 月份,
-    COUNT(*) as 完成數量
-FROM JobTimeline jt
-WHERE strftime('%Y', jt.ReportAuthorization) = '{year}'
-    AND jt.ReportAuthorization IS NOT NULL
-GROUP BY strftime('%Y-%m', jt.ReportAuthorization)
-ORDER BY 月份;"""
-        elif intent == "lab_completion":
-            # 實驗室特定查詢
-            lab_mapping = {"a組": "TA", "b組": "TB", "c組": "TC", "d組": "TD", "e組": "TE"}
-            lab_code = None
-            for chinese, code in lab_mapping.items():
-                if chinese in user_question.lower():
-                    lab_code = code
-                    break
-            if lab_code:
-                return comment + f"""-- 查詢{lab_code}實驗室完成的測試項目
-SELECT COUNT(*) as 完成數量
-FROM JobTimeline_{lab_code}
-WHERE DATE(end_time) = DATE('now','-1 day');"""
-            else:
-                return comment + """-- 通用實驗室查詢
-SELECT COUNT(*) as 總完成數量
-FROM JobTimeline
-WHERE ReportAuthorization IS NOT NULL;"""
-        elif intent == "buyer_specific":
-            # 買方特定查詢
-            buyer_name = "Unknown"
-            for keyword in keywords:
-                if keyword in ["puma", "under_armour", "skechers", "nike", "adidas"]:
-                    buyer_name = keyword.replace("_", " ").title()
-                    break
-            return comment + f"""-- 查詢買方 {buyer_name} 的已完成工作單
-SELECT sd.JobNo, sd.BuyerName, jt.ReportAuthorization
-FROM TSR53SampleDescription sd
-JOIN JobTimeline jt ON jt.JobNo = sd.JobNo
-WHERE sd.BuyerName LIKE '%{buyer_name}%'
-    AND jt.ReportAuthorization IS NOT NULL
-ORDER BY jt.ReportAuthorization DESC;"""
-        elif intent == "duration_analysis":
-            return comment + """-- 查詢從 LabIn 到 LabOut 耗時最久的工作單
-SELECT JobNo,
-       ROUND(julianday(LabOut) - julianday(LabIn), 2) AS 耗時天數
-FROM JobTimeline
-WHERE LabIn IS NOT NULL AND LabOut IS NOT NULL
-ORDER BY 耗時天數 DESC
-LIMIT 5;"""
-        elif intent == "anomaly_detection":
-            return comment + """-- 查詢從創建到授權超過 14 天的異常工單
-SELECT JobNo,
-       ROUND(julianday(ReportAuthorization) - julianday(JobCreation), 2) AS 處理天數
-FROM JobTimeline
-WHERE JobCreation IS NOT NULL
-    AND ReportAuthorization IS NOT NULL
-    AND (julianday(ReportAuthorization) - julianday(JobCreation)) > 14
-ORDER BY 處理天數 DESC
-LIMIT 20;"""
-        elif intent == "rating_distribution":
-            return comment + """-- 查詢評級分佈統計
-SELECT
-    OverallRating as 評級,
-    COUNT(*) as 數量,
-    ROUND(COUNT(*) * 100.0 / (
-        SELECT COUNT(*)
-        FROM TSR53SampleDescription
-        WHERE OverallRating IS NOT NULL
-    ), 2) as 百分比
-FROM TSR53SampleDescription
-WHERE OverallRating IS NOT NULL
-GROUP BY OverallRating
-ORDER BY 數量 DESC;"""
-        elif intent == "amount_ranking":
-            return comment + """-- 查詢工作單金額排名
-WITH JobTotalAmount AS (
-    SELECT JobNo, SUM(LocalAmount) AS TotalAmount
-    FROM (
-        SELECT DISTINCT JobNo, InvoiceCreditNoteNo, LocalAmount
-        FROM TSR53Invoice
-        WHERE LocalAmount IS NOT NULL
-    )
-    GROUP BY JobNo
-)
-SELECT
-    jta.JobNo as 工作單號,
-    sd.ApplicantName as 申請方,
-    jta.TotalAmount as 總金額
-FROM JobTotalAmount jta
-JOIN TSR53SampleDescription sd ON sd.JobNo = jta.JobNo
-WHERE sd.ApplicantName IS NOT NULL
-ORDER BY jta.TotalAmount DESC
-LIMIT 10;"""
-        elif intent == "company_statistics":
-            return comment + """-- 查詢申請方工作單統計
-SELECT
-    ApplicantName as 申請方名稱,
-    COUNT(*) as 工作單數量
-FROM TSR53SampleDescription
-WHERE ApplicantName IS NOT NULL
-GROUP BY ApplicantName
-ORDER BY 工作單數量 DESC
-LIMIT 20;"""
-        # 通用查詢模板
-        return comment + """-- 通用查詢範本
-SELECT
-    JobNo as 工作單號,
-    ApplicantName as 申請方,
-    BuyerName as 買方,
-    OverallRating as 評級
-FROM TSR53SampleDescription
-WHERE ApplicantName IS NOT NULL
-LIMIT 20;"""
-    def generate_sql(self, user_question: str) -> Tuple[str, str]:
-        """主流程：生成SQL查詢 (雲端AI增強版本)"""
-        log_messages = [f"⏰ {get_current_time()} 開始處理問題: '{user_question[:50]}...'"]
-        if not user_question or not user_question.strip():
-            return "-- 錯誤: 請輸入有效問題\nSELECT '請輸入您的問題' as 錯誤信息;", "錯誤: 問題為空"
-        # 1. 問題分析
-        analysis = analyze_question_type(user_question)
-        log_messages.append(f"📋 問題分析 - 意圖: {analysis['specific_intent']}, 類型: {analysis['type']}")
-        # 2. 檢索最相似的問題
-        hits = self.retrieval_system.retrieve_similar(user_question)
-        if hits:
-            best_hit = hits[0]
-            similarity_score = best_hit['score']
-            corpus_id = best_hit['corpus_id']
-            similar_question = self.data_loader.questions[corpus_id]
-            log_messages.append(f"🔍 找到相似問題 (相似度: {similarity_score:.3f}): '{similar_question[:50]}...'")
-            # 降低相似度閾值，增加匹配機會
-            if similarity_score > max(SIMILARITY_THRESHOLD - 0.1, 0.5):
-                original_sql = self.data_loader.sql_answers[corpus_id]
-                validation = validate_sql(original_sql)
-                if validation["valid"] and validation["is_safe"]:
-                    log_messages.append("✅ 相似度較高且原SQL有效，直接採用")
-                    return original_sql, "\n".join(log_messages)
                 else:
-                    log_messages.append(f"⚠️ 原SQL有問題: {', '.join(validation['issues'])}")
-                    log_messages.append("🛠️ 啟用智能修復...")
-                    repaired_sql = self.intelligent_repair_sql(user_question, similar_question)
-                    log_messages.append("✅ 智能修復完成")
-                    return repaired_sql, "\n".join(log_messages)
-            else:
-                log_messages.append(f"📉 相似度 ({similarity_score:.3f}) 較低，嘗試其他方法")
-        # 3. 嘗試基於意圖的本地生成
-        if analysis["specific_intent"] != "general_query":
-            log_messages.append("🤖 使用意圖導向生成")
-            intelligent_sql = self.intelligent_repair_sql(user_question, "無相似問題")
-            validation = validate_sql(intelligent_sql)
-            if validation["valid"]:
-                log_messages.append("✅ 意圖導向生成成功")
-                return intelligent_sql, "\n".join(log_messages)
-            else:
-                log_messages.append("⚠️ 意圖導向生成結果有問題，嘗試雲端AI")
-        # 4. 調用免費雲端AI（針對未見過的問題）
-        log_messages.append("🌐 調用免費雲端AI處理未見過的問題...")
-        cloud_sql = self.call_free_cloud_ai(user_question)
-        log_messages.append("✅ 雲端AI回應完成")
-        return cloud_sql, "\n".join(log_messages)
-# ==================== 初始化系統 ====================
-if HF_TOKEN is None:
-    print("\n" + "="*60 + "\n⚠️ 警告: Hugging Face Token 未設置。\n" + "="*60 + "\n")
-    text_to_sql_system = None
-else:
-    text_to_sql_system = CompleteTextToSQLSystem(HF_TOKEN)
-# ==================== Gradio界面 ====================
-def process_query(user_question: str) -> Tuple[str, str, str]:
-    if text_to_sql_system is None:
-        error_msg = "系統因缺少 Hugging Face Token 而未成功初始化。"
-        return "系統未初始化", error_msg, error_msg
-    sql_result, log_message = text_to_sql_system.generate_sql(user_question)
-    return sql_result, "✅ 處理完成", log_message
-with gr.Blocks(title="智慧Text-to-SQL系統", theme=gr.themes.Soft()) as demo:
-    # 環境資訊顯示
-    env_info = f"🌐 運行環境: {'Hugging Face Spaces' if IS_SPACES else '本地環境'} | 💻 設備: {DEVICE}"
-    system_status = f"📊 已載入 {len(text_to_sql_system.data_loader.questions) if text_to_sql_system else 0} 個問答範例"
-    gr.Markdown("# 🚀 智慧 Text-to-SQL 系統 (雲端版)")
-    gr.Markdown("📊 **模式**: 結合「檢索驗證」與「意圖導向生成」，即使資料庫範本有誤也能提供準確查詢。")
-    gr.Markdown(f"ℹ️ {env_info} | {system_status}")
     with gr.Row():
-        question_input = gr.Textbox(
-            label="📝 請在此輸入您的問題",
-            placeholder="例如：2024年每月完成多少份報告？",
-            lines=3,
-            scale=4
-        )
-        submit_btn = gr.Button("🚀 生成SQL", variant="primary", scale=1)
-    with gr.Accordion("🔍 結果與日誌", open=True):
-        sql_output = gr.Code(label="📊 生成的SQL查詢", language="sql", lines=10)
-        status_output = gr.Textbox(label="🔍 執行狀態", interactive=False)
-        log_output = gr.Textbox(label="📋 詳細日誌", lines=6, interactive=False)
-    # 雲端環境優化的範例
     gr.Examples(
         examples=[
             "2024年每月完成多少份報告？",
@@ -892,21 +338,8 @@ if __name__ == "__main__":
             demo.launch(
                 server_name="0.0.0.0",
                 server_port=7860,
-                share=False,
-                show_error=True,
-                quiet=False
             )
         else:
             # 本地環境
-            print("🏠 在本地環境中啟動...")
-            demo.launch(
-                server_name="127.0.0.1",
-                server_port=7860,
-                share=True,  # 本地環境可以選擇分享
-                show_error=True
-            )
-    else:
-        print("❌ 無法啟動 Gradio，因為系統初始化失敗。")
-        if IS_SPACES:
-            print("💡 請檢查 Hugging Face Spaces 的環境變數設置。")

 import numpy as np
 # ==================== 配置區 ====================
+HF_TOKEN = os.environ.get("HF_TOKEN", None)
 DATASET_REPO_ID = "Paul720810/Text-to-SQL-Softline"
+# === 修改開始 ===
+# 我們不再需要硬性的相似度閾值，因為現在的策略是「參考」而非「直接採用」。
+# SIMILARITY_THRESHOLD = 0.65
+# 新增一個配置，決定要檢索多少個範例來當作參考
+FEW_SHOT_EXAMPLES_COUNT = 2 # 檢索最相似的2個範例
+# === 修改結束 ===
 # 雲端環境檢測
 IS_SPACES = os.environ.get("SPACE_ID") is not None
 # ==================== 獨立工具函數 (不依賴類別實例) ====================
 def get_current_time():
     """獲取當前時間字串"""
+    return datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+def format_log(message: str, level: str = "INFO") -> str:
+    """格式化日誌訊息"""
+    return f"[{get_current_time()}] [{level.upper()}] {message}"
+def parse_sql_from_response(response_text: str) -> Optional[str]:
+    """從API回應中提取SQL代碼"""
+    match = re.search(r"```sql\n(.*?)\n```", response_text, re.DOTALL)
+    if match:
+        return match.group(1).strip()
+    # 新增備用解析：如果找不到```sql ...```，直接嘗試解析JSON中的SQL
+    try:
+        data = json.loads(response_text)
+        if "SQL查詢" in data and "```sql" in data["SQL查詢"]:
+             match = re.search(r"```sql\n(.*?)\n```", data["SQL查詢"], re.DOTALL)
+             if match:
+                return match.group(1).strip()
+    except json.JSONDecodeError:
+        pass # 不是合法的JSON，忽略
+    return None
+# ==================== 核心 Text-to-SQL 系統類別 ====================
+class TextToSQLSystem:
+    def __init__(self, model_name='sentence-transformers/paraphrase-multilingual-mpnet-base-v2'):
+        self.log_history = []
+        self._log("初始化系統...")
+        self.schema = self._load_schema()
+        self.model = SentenceTransformer(model_name, device=DEVICE)
+        self.dataset, self.corpus_embeddings = self._load_and_encode_dataset()
+        self._log("✅ 系統初始化完成，已準備就緒。")
+    def _log(self, message: str, level: str = "INFO"):
+        self.log_history.append(format_log(message, level))
+        print(format_log(message, level))
+    def _load_schema(self) -> Dict:
+        """從JSON檔案載入資料庫結構"""
         try:
+            schema_path = hf_hub_download(repo_id=DATASET_REPO_ID, filename="sqlite_schema_FULL.json", repo_type="dataset")
+            with open(schema_path, 'r', encoding='utf-8') as f:
+                self._log("成功載入資料庫結構 (sqlite_schema_FULL.json)")
+                return json.load(f)
         except Exception as e:
+            self._log(f"❌ 載入資料庫結構失敗: {e}", "ERROR")
+            return {}
+    def _format_schema_for_prompt(self) -> str:
+        """將 schema JSON 物件格式化為清晰的字串，用於提示"""
+        formatted_string = "資料庫結構 (Database Schema):\n"
+        for table_name, columns in self.schema.items():
+            formatted_string += f"Table: {table_name}\n"
+            for col in columns:
+                col_name = col.get('name', 'N/A')
+                col_type = col.get('type', 'N/A')
+                col_desc = col.get('description', '')
+                formatted_string += f"  - {col_name} ({col_type}) # {col_desc}\n"
+            formatted_string += "\n"
+        return formatted_string
+    def _load_and_encode_dataset(self) -> Tuple[Optional[List[Dict]], Optional[torch.Tensor]]:
+        """載入訓練數據集並對問題進行編碼"""
         try:
+            dataset = load_dataset(DATASET_REPO_ID, data_files="training_data.jsonl", split="train")
+            # 提取所有 "user" 的 "content" 作為語料庫
+            corpus = [item['messages'][0]['content'] for item in dataset]
+            self._log(f"正在對 {len(corpus)} 個範例問題進行編碼...")
+            embeddings = self.model.encode(corpus, convert_to_tensor=True, device=DEVICE)
+            self._log("✅ 範例問題編碼完成。")
+            return dataset, embeddings
         except Exception as e:
+            self._log(f"❌ 載入或編碼數據集失敗: {e}", "ERROR")
+            return None, None
+    def find_most_similar(self, question: str, top_k: int) -> List[Dict]:
+        """尋找最相似的K個問題及其對應的SQL"""
+        if self.corpus_embeddings is None or self.dataset is None:
             return []
+        question_embedding = self.model.encode(question, convert_to_tensor=True, device=DEVICE)
+        cos_scores = util.cos_sim(question_embedding, self.corpus_embeddings)[0]
+        top_results = torch.topk(cos_scores, k=min(top_k, len(self.corpus_embeddings)))
+        similar_examples = []
+        for score, idx in zip(top_results[0], top_results[1]):
+            item = self.dataset[idx.item()]
+            user_content = item['messages'][0]['content']
+            assistant_content = item['messages'][1]['content']
+            # 從 assistant_content 中提取純 SQL
+            sql_query = parse_sql_from_response(assistant_content)
+            if not sql_query:
+                # 如果解析失敗，可能是格式問題，這裡做個備份
+                sql_query = "無法解析範例SQL"
+            similar_examples.append({
+                "similarity": score.item(),
+                "question": user_content,
+                "sql": sql_query
+            })
+        return similar_examples
+    def huggingface_api_call(self, prompt: str) -> str:
+        """呼叫 Hugging Face Inference API"""
+        API_URL = "[https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B-Instruct-v0.1](https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B-Instruct-v0.1)"
+        headers = {"Authorization": f"Bearer {HF_TOKEN}"}
+        payload = {
+            "inputs": prompt,
+            "parameters": {
+                "max_new_tokens": 1024,
+                "return_full_text": False
+            }
+        }
         try:
+            self._log("正在呼叫 Hugging Face API...")
+            response = requests.post(API_URL, headers=headers, json=payload, timeout=60)
+            response.raise_for_status()
+            self._log("✅ API 成功回應。")
+            return response.json()[0]['generated_text']
+        except requests.exceptions.RequestException as e:
+            self._log(f"❌ API 呼叫失敗: {e}", "ERROR")
+            return f"API 錯誤: {e}"
+    # === 修改開始: 重寫核心處理邏輯 ===
+    def _build_prompt_for_generation(self, user_question: str, examples: List[Dict]) -> str:
+        """
+        **新增的函數**
+        根據我們的「檢索-增強-生成」策略，建立一個豐富的提示(Prompt)。
+        """
+        # 1. 任務指令 (System Instruction)
+        #    明確告訴 AI 它的角色和目標。
+        system_instruction = (
+            "你是一位頂尖的資料庫專家，精通 SQLite。你的任務是根據使用者提出的問題，"
+            "參考提供的資料庫結構和相似的 SQL 查詢範例，生成��個精確、高效的 SQLite 查詢語法。\n"
+            "請將最終的 SQL 查詢語法包裝在 ```sql ... ``` 區塊中。"
+        )
+        # 2. 資料庫結構 (Database Schema)
+        #    讓 AI 了解有哪些資料表和欄位可用。
+        schema_string = self._format_schema_for_prompt()
+        # 3. 參考範例 (Few-shot Examples)
+        #    給 AI 看「過去的優良作業」，讓它學習語法風格和邏輯。
+        examples_string = "--- 參考範例 ---\n"
+        if not examples:
+            examples_string += "無\n"
         else:
+            for i, example in enumerate(examples, 1):
+                # 為了讓提示更清晰，我們只取範例中的 `指令` 部分
+                clean_question = re.search(r"指令:\s*(.*)", example['question'])
+                if clean_question:
+                    question_to_show = clean_question.group(1).strip()
                 else:
+                    question_to_show = example['question'] # 如果格式不符，顯示原文
+                examples_string += f"範例 {i}:\n"
+                examples_string += f"  - 使用者問題: \"{question_to_show}\"\n"
+                examples_string += f"  - SQL 查詢:\n```sql\n{example['sql']}\n```\n\n"
+        # 4. 新的使用者問題 (User's New Question)
+        #    這是 AI 這次需要解決的核心問題。
+        final_question_section = (
+            "--- 任務開始 ---\n"
+            f"請根據以上的資料庫結構和參考範例，為以下使用者問題生成 SQL 查詢：\n"
+            f"使用者問題: \"{user_question}\""
+        )
+        # 組合完整的提示
+        full_prompt = (
+            f"{system_instruction}\n\n"
+            f"{schema_string}\n"
+            f"{examples_string}"
+            f"{final_question_section}"
+        )
+        self._log("已建立給 AI 的完整提示 (Prompt):\n" + "="*20 + f"\n{full_prompt}\n" + "="*20)
+        return full_prompt
+    def process_question(self, question: str) -> Tuple[str, str]:
+        """
+        處理使用者問題的核心函數。
+        採用「檢索-增強-生成」(RAG) 流程。
+        """
+        self.log_history = [] # 清空上次日誌
+        self._log(f"⏰ 開始處理問題: '{question}'")
+        # 步驟 1: 檢索 (Retrieval)
+        # 無論如何，都先尋找最相似的範例作為參考資料。
+        self._log(f"🔍 正在從 {len(self.dataset)} 個範例中尋找最相似的 {FEW_SHOT_EXAMPLES_COUNT} 個參考...")
+        similar_examples = self.find_most_similar(question, top_k=FEW_SHOT_EXAMPLES_COUNT)
+        if similar_examples:
+            for ex in similar_examples:
+                 self._log(f"  - 找到相似範例 (相似度: {ex['similarity']:.3f}): '{ex['question'][:50]}...'")
+        else:
+            self._log("  - 未找到相似範例。", "WARNING")
+        # 步驟 2: 增強 (Augmentation)
+        # 建立一個包含所有必要資訊的豐富提示。
+        self._log("📝 正在建立給 AI 的完整提示 (Prompt)...")
+        prompt = self._build_prompt_for_generation(question, similar_examples)
+        # 步驟 3: 生成 (Generation)
+        # 將判斷權交給 AI，讓它根據完整的上下文生成 SQL。
+        self._log("🧠 將判斷權交給 AI，開始生成 SQL...")
+        api_response = self.huggingface_api_call(prompt)
+        # 處理並回傳結果
+        sql_query = parse_sql_from_response(api_response)
+        if sql_query:
+            self._log(f"✅ 成功從 AI 回應中解析出 SQL！")
+            status = "生成成功"
+            return sql_query, status
+        else:
+            self._log("❌ 未能從 AI 回應中解析出有效的 SQL。", "ERROR")
+            self._log(f"  - AI 原始回應: {api_response}", "DEBUG")
+            status = "生成失敗"
+            return f"無法從 AI 的回應中提取 SQL。\n\n原始回應:\n{api_response}", status
+    # === 修改結束 ===
+# ==================== Gradio 介面設定 ====================
+text_to_sql_system = None
+try:
+    text_to_sql_system = TextToSQLSystem()
+except Exception as e:
+    print(f"初始化 TextToSQLSystem 失敗: {e}")
+def process_query(question: str) -> Tuple[str, str, str]:
+    """Gradio 的處理函數"""
+    if not text_to_sql_system:
+        error_msg = "系統初始化失敗，無法處理請求。"
+        return error_msg, "失敗", error_msg
+    if not question.strip():
+        return "", "等待輸入", "請輸入您的問題。"
+    sql_result, status = text_to_sql_system.process_question(question)
+    log_output = "\n".join(text_to_sql_system.log_history)
+    return sql_result, status, log_output
+# Gradio 介面佈局
+with gr.Blocks(theme=gr.themes.Soft(), title="Text-to-SQL 智能查詢系統") as demo:
+    gr.Markdown("# 📊 Text-to-SQL 智能查詢系統")
+    gr.Markdown("輸入您的自然語言問題，系統將自動轉換為 SQL 查詢語法。")
     with gr.Row():
+        with gr.Column(scale=2):
+            question_input = gr.Textbox(
+                lines=3,
+                label="💬 您的問題",
+                placeholder="例如：2024年每月完成了多少份報告？"
+            )
+            submit_btn = gr.Button("🚀 生成 SQL", variant="primary")
+            status_output = gr.Textbox(label="處理狀態", interactive=False)
+        with gr.Column(scale=3):
+            sql_output = gr.Code(label="🤖 生成的 SQL 查詢", language="sql")
+    with gr.Accordion("🔍 顯示詳細處理日誌", open=False):
+        log_output = gr.Textbox(lines=15, label="日誌", interactive=False)
+    # 優化的範例
     gr.Examples(
         examples=[
             "2024年每月完成多少份報告？",
             demo.launch(
                 server_name="0.0.0.0",
                 server_port=7860,
             )
         else:
             # 本地環境
+            print("🏠 在本地環境中啟動 ([http://127.0.0.1:7860](http://127.0.0.1:7860))...")
+            demo.launch()