Spaces:

Paul720810
/

Softline-SQL-Assistant

Sleeping

App Files Files Community

Paul720810 commited on Sep 3, 2025

Commit

b69d84b

verified ·

1 Parent(s): afb724a

Update app.py

Browse files

Files changed (1) hide show

app.py +326 -93

app.py CHANGED Viewed

@@ -66,26 +66,53 @@ def analyze_question_type(question: str) -> Dict:
     analysis = {
         "type": "unknown",
         "keywords": [],
-        "has_count": "多少" in question_lower or "幾個" in question_lower or "數量" in question_lower,
-        "has_date": "時間" in question_lower or "日期" in question_lower or "月份" in question_lower or "年" in question_lower,
-        "has_group": "每" in question_lower or "各" in question_lower or "分組" in question_lower,
         "specific_intent": "general_query"  # 新增：具體意圖，預設為通用查詢
     }
-    # **更精確的意圖識別**
-    if "每月" in question_lower and ("完成" in question_lower or "報告" in question_lower or "工作單" in question_lower):
         analysis["specific_intent"] = "monthly_completion_count"
         analysis["type"] = "time_series"
-    elif ("評級" in question_lower or "pass" in question_lower or "fail" in question_lower) and ("統計" in question_lower or "分佈" in question_lower or "多少" in question_lower):
         analysis["specific_intent"] = "rating_distribution"
         analysis["type"] = "statistics"
-    elif "金額" in question_lower and ("最高" in question_lower or "top" in question_lower or "排名" in question_lower):
         analysis["specific_intent"] = "amount_ranking"
         analysis["type"] = "ranking"
-    elif ("公司" in question_lower or "客戶" in question_lower or "申請方" in question_lower) and ("統計" in question_lower or "數量" in question_lower or "排名" in question_lower):
         analysis["specific_intent"] = "company_statistics"
         analysis["type"] = "statistics"
     return analysis
 # ==================== 完整數據加載模塊 ====================
@@ -112,52 +139,100 @@ class CompleteDataLoader:
                         user_content = item['messages'][0]['content']
                         assistant_content = item['messages'][1]['content']
-                        # 改進的問題提取邏輯
                         question_match = re.search(r'指令:\s*(.*?)(?:\n|$)', user_content)
                         if question_match:
                             question = question_match.group(1).strip()
-                        else:
-                            # 如果沒有找到「指令:」格式，嘗試直接使用內容
                             question = user_content.strip()
-                        # 改進的SQL提取邏輯
                         sql_match = re.search(r'SQL查詢:\s*(.*?)(?:\n|$)', assistant_content, re.DOTALL)
                         if sql_match:
                             sql_query = sql_match.group(1).strip()
-                        else:
-                            # 如果沒有找到「SQL查詢:」格式，嘗試提取SQL代碼塊
                             sql_block_match = re.search(r'```sql\s*(.*?)\s*```', assistant_content, re.DOTALL)
                             if sql_block_match:
                                 sql_query = sql_block_match.group(1).strip()
-                            else:
-                                sql_query = assistant_content.strip()
                         # 清理SQL查詢
-                        sql_query = re.sub(r'```sql|```', '', sql_query).strip()
-                        # 驗證數據質量
                         if not question or len(question.strip()) < 3:
                             skipped_reasons["empty_question"] += 1
                             continue
-                        if not sql_query or len(sql_query.strip()) < 10:
                             skipped_reasons["empty_sql"] += 1
                             continue
-                        # 基本SQL驗證
-                        if "SELECT" not in sql_query.upper():
                             skipped_reasons["invalid_format"] += 1
                             continue
                         self.questions.append(question)
                         self.sql_answers.append(sql_query)
                         successful_loads += 1
                     else:
                         skipped_reasons["invalid_format"] += 1
                 except Exception as e:
                     skipped_reasons["parse_error"] += 1
-                    if idx < 5:  # 只顯示前5個錯誤
                         print(f"跳過第 {idx} 項資料，錯誤: {e}")
                     continue
@@ -253,21 +328,109 @@ class CompleteTextToSQLSystem:
         year_match = re.search(r'(\d{4})', text)
         return year_match.group(1) if year_match else datetime.now().strftime('%Y')
-    def generate_sql_from_question(self, question: str, analysis: Dict) -> str:
-        """通用SQL生成器 (作為最終備用)"""
-        # 此函數現在作為無法識別具體意圖時的通用後備方案
-        return f"""-- 通用查詢範本
-SELECT
-    JobNo as 工作單號,
-    ApplicantName as 申請方,
-    OverallRating as 評級
 FROM TSR53SampleDescription
 LIMIT 20;"""
     def intelligent_repair_sql(self, user_question: str, similar_question: str) -> str:
-        """智能修復SQL - 基於當前使用者問題的意圖"""
         analysis = analyze_question_type(user_question)
         intent = analysis["specific_intent"]
         if similar_question != "無相似問題":
             comment = f"-- 根據類似問題 '{similar_question}' (原SQL無效) 進行智能修復\n"
@@ -277,75 +440,131 @@ LIMIT 20;"""
         if intent == "monthly_completion_count":
             year = self.extract_year(user_question)
             return comment + f"""-- 查詢 {year} 年每月完成的工作單數量
-    SELECT
-        strftime('%Y-%m', jt.ReportAuthorization) as 月份,
-        COUNT(*) as 完成數量
-    FROM JobTimeline jt
-    WHERE strftime('%Y', jt.ReportAuthorization) = '{year}'
-        AND jt.ReportAuthorization IS NOT NULL
-    GROUP BY strftime('%Y-%m', jt.ReportAuthorization)
-    ORDER BY 月份;"""
         elif intent == "rating_distribution":
             return comment + """-- 查詢評級分佈統計
-    SELECT
-        OverallRating as 評級,
-        COUNT(*) as 數量,
-        ROUND(COUNT(*) * 100.0 / (
-            SELECT COUNT(*)
-            FROM TSR53SampleDescription
-            WHERE OverallRating IS NOT NULL
-        ), 2) as 百分比
-    FROM TSR53SampleDescription
-    WHERE OverallRating IS NOT NULL
-    GROUP BY OverallRating
-    ORDER BY 數量 DESC;"""
         elif intent == "amount_ranking":
             return comment + """-- 查詢工作單金額排名
-    WITH JobTotalAmount AS (
-        SELECT JobNo, SUM(LocalAmount) AS TotalAmount
-        FROM (
-            SELECT DISTINCT JobNo, InvoiceCreditNoteNo, LocalAmount
-            FROM TSR53Invoice
-            WHERE LocalAmount IS NOT NULL
-        )
-        GROUP BY JobNo
     )
-    SELECT
-        jta.JobNo as 工作單號,
-        sd.ApplicantName as 申請方,
-        jta.TotalAmount as 總金額
-    FROM JobTotalAmount jta
-    JOIN TSR53SampleDescription sd ON sd.JobNo = jta.JobNo
-    WHERE sd.ApplicantName IS NOT NULL
-    ORDER BY jta.TotalAmount DESC
-    LIMIT 10;"""
         elif intent == "company_statistics":
             return comment + """-- 查詢申請方工作單統計
-    SELECT
-        ApplicantName as 申請方名稱,
-        COUNT(*) as 工作單數量
-    FROM TSR53SampleDescription
-    WHERE ApplicantName IS NOT NULL
-    GROUP BY ApplicantName
-    ORDER BY 工作單數量 DESC
-    LIMIT 20;"""
         # 通用查詢模板
         return comment + """-- 通用查詢範本
-    SELECT
-        JobNo as 工作單號,
-        ApplicantName as 申請方,
-        BuyerName as 買方,
-        OverallRating as 評級
-    FROM TSR53SampleDescription
-    WHERE ApplicantName IS NOT NULL
-    LIMIT 20;"""
     def generate_sql(self, user_question: str) -> Tuple[str, str]:
-        """主流程：生成SQL查詢 (改進版本)"""
         log_messages = [f"⏰ {get_current_time()} 開始處理問題: '{user_question[:50]}...'"]
         if not user_question or not user_question.strip():
@@ -366,12 +585,13 @@ LIMIT 20;"""
             log_messages.append(f"🔍 找到相似問題 (相似度: {similarity_score:.3f}): '{similar_question[:50]}...'")
-            if similarity_score > SIMILARITY_THRESHOLD:
                 original_sql = self.data_loader.sql_answers[corpus_id]
                 validation = validate_sql(original_sql)
                 if validation["valid"] and validation["is_safe"]:
-                    log_messages.append("✅ 相似度高且原SQL有效，直接採用")
                     return original_sql, "\n".join(log_messages)
                 else:
                     log_messages.append(f"⚠️ 原SQL有問題: {', '.join(validation['issues'])}")
@@ -380,13 +600,26 @@ LIMIT 20;"""
                     log_messages.append("✅ 智能修復完成")
                     return repaired_sql, "\n".join(log_messages)
             else:
-                log_messages.append(f"📉 相似度 ({similarity_score:.3f}) 低於閾值 ({SIMILARITY_THRESHOLD})")
-        log_messages.append("🤖 未找到合適範本，使用意圖生成")
-        intelligent_sql = self.intelligent_repair_sql(user_question, "無相似問題")
-        log_messages.append("✅ 智能生成完成")
-        return intelligent_sql, "\n".join(log_messages)
 # ==================== 初始化系統 ====================
 if HF_TOKEN is None:

     analysis = {
         "type": "unknown",
         "keywords": [],
+        "has_count": "多少" in question_lower or "幾個" in question_lower or "數量" in question_lower or "count" in question_lower,
+        "has_date": "時間" in question_lower or "日期" in question_lower or "月份" in question_lower or "年" in question_lower or "yesterday" in question_lower or "昨天" in question_lower,
+        "has_group": "每" in question_lower or "各" in question_lower or "分組" in question_lower or "group" in question_lower,
         "specific_intent": "general_query"  # 新增：具體意圖，預設為通用查詢
     }
+    # **更精確的意圖識別 - 增加更多模式**
+    if ("每月" in question_lower or "monthly" in question_lower) and ("完成" in question_lower or "completed" in question_lower or "報告" in question_lower or "工作單" in question_lower):
         analysis["specific_intent"] = "monthly_completion_count"
         analysis["type"] = "time_series"
+    elif ("評級" in question_lower or "pass" in question_lower or "fail" in question_lower or "rating" in question_lower) and ("統計" in question_lower or "分佈" in question_lower or "多少" in question_lower or "distribution" in question_lower):
         analysis["specific_intent"] = "rating_distribution"
         analysis["type"] = "statistics"
+    elif ("金額" in question_lower or "amount" in question_lower or "價格" in question_lower or "費用" in question_lower) and ("最高" in question_lower or "top" in question_lower or "排名" in question_lower or "highest" in question_lower):
         analysis["specific_intent"] = "amount_ranking"
         analysis["type"] = "ranking"
+    elif ("公司" in question_lower or "客戶" in question_lower or "申請方" in question_lower or "company" in question_lower or "client" in question_lower) and ("統計" in question_lower or "數量" in question_lower or "排名" in question_lower or "count" in question_lower):
         analysis["specific_intent"] = "company_statistics"
         analysis["type"] = "statistics"
+    elif ("實驗室" in question_lower or "lab" in question_lower or "組" in question_lower) and ("完成" in question_lower or "completed" in question_lower):
+        analysis["specific_intent"] = "lab_completion"
+        analysis["type"] = "lab_specific"
+    elif ("異常" in question_lower or "超過" in question_lower or "延遲" in question_lower or "slow" in question_lower or "long" in question_lower):
+        analysis["specific_intent"] = "anomaly_detection"
+        analysis["type"] = "analysis"
+    elif ("買方" in question_lower or "buyer" in question_lower) and ("完成" in question_lower or "completed" in question_lower):
+        analysis["specific_intent"] = "buyer_specific"
+        analysis["type"] = "buyer_analysis"
+    elif ("耗時" in question_lower or "時間" in question_lower or "duration" in question_lower or "time" in question_lower) and ("最久" in question_lower or "longest" in question_lower):
+        analysis["specific_intent"] = "duration_analysis"
+        analysis["type"] = "time_analysis"
+    # 提取關鍵詞以供後續使用
+    keywords = []
+    # 公司/品牌名稱
+    brand_patterns = [r"puma", r"under armour", r"skechers", r"nike", r"adidas"]
+    for pattern in brand_patterns:
+        if re.search(pattern, question_lower):
+            keywords.append(pattern.replace(" ", "_"))
+    # 實驗室組別
+    lab_patterns = [r"[a-e]組", r"ta", r"tb", r"tc", r"td", r"te"]
+    for pattern in lab_patterns:
+        if re.search(pattern, question_lower):
+            keywords.append(pattern)
+    analysis["keywords"] = keywords
     return analysis
 # ==================== 完整數據加載模塊 ====================
                         user_content = item['messages'][0]['content']
                         assistant_content = item['messages'][1]['content']
+                        # 多種問題提取策略
+                        question = None
+                        # 策略1: 標準「指令:」格式
                         question_match = re.search(r'指令:\s*(.*?)(?:\n|$)', user_content)
                         if question_match:
                             question = question_match.group(1).strip()
+                        # 策略2: 如果沒找到，嘗試提取最後一行非空內容
+                        if not question:
+                            lines = [line.strip() for line in user_content.split('\n') if line.strip()]
+                            if lines:
+                                question = lines[-1]
+                        # 策略3: 直接使用整個內容（作為最後手段）
+                        if not question:
                             question = user_content.strip()
+                        # 多種SQL提取策略
+                        sql_query = None
+                        # 策略1: 標準「SQL查詢:」格式
                         sql_match = re.search(r'SQL查詢:\s*(.*?)(?:\n|$)', assistant_content, re.DOTALL)
                         if sql_match:
                             sql_query = sql_match.group(1).strip()
+                        # 策略2: SQL代碼塊格式
+                        if not sql_query:
                             sql_block_match = re.search(r'```sql\s*(.*?)\s*```', assistant_content, re.DOTALL)
                             if sql_block_match:
                                 sql_query = sql_block_match.group(1).strip()
+                        # 策略3: 查找任何包含 SELECT 的行
+                        if not sql_query:
+                            for line in assistant_content.split('\n'):
+                                if 'SELECT' in line.upper():
+                                    # 從這行開始提取到最後或到下個非SQL行
+                                    sql_lines = []
+                                    found_start = False
+                                    for l in assistant_content.split('\n'):
+                                        if 'SELECT' in l.upper():
+                                            found_start = True
+                                        if found_start:
+                                            if l.strip() and not l.strip().startswith('```'):
+                                                sql_lines.append(l)
+                                            elif l.strip() == '' and sql_lines:
+                                                continue
+                                            elif found_start and len(sql_lines) > 0:
+                                                break
+                                    if sql_lines:
+                                        sql_query = '\n'.join(sql_lines).strip()
+                                        break
+                        # 策略4: 如果還是沒找到，使用整個assistant內容
+                        if not sql_query:
+                            sql_query = assistant_content.strip()
                         # 清理SQL查詢
+                        if sql_query:
+                            sql_query = re.sub(r'```sql|```', '', sql_query).strip()
+                            sql_query = re.sub(r'^思考過程:.*?\n', '', sql_query, flags=re.MULTILINE).strip()
+                            sql_query = re.sub(r'^SQL查詢:\s*', '', sql_query, flags=re.MULTILINE).strip()
+                        # 數據質量驗證（降低標準以提高利用率）
                         if not question or len(question.strip()) < 3:
                             skipped_reasons["empty_question"] += 1
                             continue
+                        if not sql_query or len(sql_query.strip()) < 5:  # 降低最小長度要求
                             skipped_reasons["empty_sql"] += 1
                             continue
+                        # 更寬鬆的SQL驗證
+                        sql_upper = sql_query.upper()
+                        if "SELECT" not in sql_upper and "WITH" not in sql_upper:
                             skipped_reasons["invalid_format"] += 1
                             continue
                         self.questions.append(question)
                         self.sql_answers.append(sql_query)
                         successful_loads += 1
+                        # 調試：顯示前幾個成功案例
+                        if successful_loads <= 3:
+                            print(f"成功案例 {successful_loads}:")
+                            print(f"  問題: {question[:50]}...")
+                            print(f"  SQL: {sql_query[:50]}...")
                     else:
                         skipped_reasons["invalid_format"] += 1
                 except Exception as e:
                     skipped_reasons["parse_error"] += 1
+                    if idx < 3:  # 只顯示前3個錯誤
                         print(f"跳過第 {idx} 項資料，錯誤: {e}")
                     continue
         year_match = re.search(r'(\d{4})', text)
         return year_match.group(1) if year_match else datetime.now().strftime('%Y')
+    def call_free_cloud_ai(self, user_question: str) -> str:
+        """調用免費雲端AI生成SQL - 當本地方法無法處理時的備選方案"""
+        try:
+            # 構建包含schema的prompt
+            schema_info = json.dumps(self.data_loader.schema_data, ensure_ascii=False, indent=2)
+            prompt = f"""你是一個SQL專家。根據以下資料庫schema和用戶問題，生成準確的SQL查詢。
+資料庫Schema:
+{schema_info}
+用戶問題: {user_question}
+請分析問題並生成對應的SQL查詢。只回傳SQL代碼，不要額外解釋。
+SQL查詢:"""
+            # 使用 Hugging Face 免費 Inference API
+            headers = {"Authorization": f"Bearer {self.hf_token}"} if self.hf_token else {}
+            # 嘗試多個免費模型
+            models_to_try = [
+                "microsoft/DialoGPT-medium",  # 對話模型
+                "google/flan-t5-large",       # 指令跟隨模型
+                "bigscience/bloom-560m"       # 通用生成模型
+            ]
+            for model in models_to_try:
+                try:
+                    url = f"https://api-inference.huggingface.co/models/{model}"
+                    response = requests.post(
+                        url,
+                        headers=headers,
+                        json={"inputs": prompt, "parameters": {"max_length": 512, "temperature": 0.1}},
+                        timeout=30
+                    )
+                    if response.status_code == 200:
+                        result = response.json()
+                        if isinstance(result, list) and len(result) > 0:
+                            generated_text = result[0].get('generated_text', '')
+                            # 提取SQL部分
+                            sql_match = re.search(r'SELECT.*?;', generated_text, re.DOTALL | re.IGNORECASE)
+                            if sql_match:
+                                return f"-- 由免費雲端AI ({model}) 生成\n{sql_match.group(0)}"
+                except Exception as e:
+                    print(f"模型 {model} 調用失敗: {e}")
+                    continue
+            # 如果所有模型都失敗，返回基於意圖的本地生成
+            return self.generate_fallback_sql(user_question)
+        except Exception as e:
+            print(f"雲端AI調用失敗: {e}")
+            return self.generate_fallback_sql(user_question)
+    def generate_fallback_sql(self, user_question: str) -> str:
+        """當所有方法都失敗時的後備SQL生成"""
+        analysis = analyze_question_type(user_question)
+        # 基於關鍵詞的簡單SQL生成
+        question_lower = user_question.lower()
+        if "工作單" in question_lower or "job" in question_lower:
+            if "數量" in question_lower or "多少" in question_lower:
+                return """-- 後備方案：工作單數量查詢
+SELECT COUNT(*) as 工作單總數
+FROM TSR53SampleDescription
+WHERE ApplicantName IS NOT NULL;"""
+            else:
+                return """-- 後備方案：工作單列表查詢
+SELECT JobNo, ApplicantName, BuyerName, OverallRating
 FROM TSR53SampleDescription
+WHERE ApplicantName IS NOT NULL
 LIMIT 20;"""
+        elif "評級" in question_lower or "rating" in question_lower:
+            return """-- 後備方案：評級統計查詢
+SELECT OverallRating, COUNT(*) as 數量
+FROM TSR53SampleDescription
+WHERE OverallRating IS NOT NULL
+GROUP BY OverallRating;"""
+        elif "金額" in question_lower or "amount" in question_lower:
+            return """-- 後備方案：金額統計查詢
+SELECT JobNo, LocalAmount
+FROM TSR53Invoice
+WHERE LocalAmount IS NOT NULL
+ORDER BY LocalAmount DESC
+LIMIT 10;"""
+        # 默認通用查詢
+        return """-- 後備方案：通用查詢
+SELECT JobNo, ApplicantName, BuyerName
+FROM TSR53SampleDescription
+LIMIT 10;"""
     def intelligent_repair_sql(self, user_question: str, similar_question: str) -> str:
+        """智能修復SQL - 基於當前使用者問題的意圖 (擴展版本)"""
         analysis = analyze_question_type(user_question)
         intent = analysis["specific_intent"]
+        keywords = analysis["keywords"]
         if similar_question != "無相似問題":
             comment = f"-- 根據類似問題 '{similar_question}' (原SQL無效) 進行智能修復\n"
         if intent == "monthly_completion_count":
             year = self.extract_year(user_question)
             return comment + f"""-- 查詢 {year} 年每月完成的工作單數量
+SELECT
+    strftime('%Y-%m', jt.ReportAuthorization) as 月份,
+    COUNT(*) as 完成數量
+FROM JobTimeline jt
+WHERE strftime('%Y', jt.ReportAuthorization) = '{year}'
+    AND jt.ReportAuthorization IS NOT NULL
+GROUP BY strftime('%Y-%m', jt.ReportAuthorization)
+ORDER BY 月份;"""
+        elif intent == "lab_completion":
+            # 實驗室特定查詢
+            lab_mapping = {"a組": "TA", "b組": "TB", "c組": "TC", "d組": "TD", "e組": "TE"}
+            lab_code = None
+            for chinese, code in lab_mapping.items():
+                if chinese in user_question.lower():
+                    lab_code = code
+                    break
+            if lab_code:
+                return comment + f"""-- 查詢{lab_code}實驗室完成的測試項目
+SELECT COUNT(*) as 完成數量
+FROM JobTimeline_{lab_code}
+WHERE DATE(end_time) = DATE('now','-1 day');"""
+            else:
+                return comment + """-- 通用實驗室查詢
+SELECT COUNT(*) as 總完成數量
+FROM JobTimeline
+WHERE ReportAuthorization IS NOT NULL;"""
+        elif intent == "buyer_specific":
+            # 買方特定查詢
+            buyer_name = "Unknown"
+            for keyword in keywords:
+                if keyword in ["puma", "under_armour", "skechers", "nike", "adidas"]:
+                    buyer_name = keyword.replace("_", " ").title()
+                    break
+            return comment + f"""-- 查詢買方 {buyer_name} 的已完成工作單
+SELECT sd.JobNo, sd.BuyerName, jt.ReportAuthorization
+FROM TSR53SampleDescription sd
+JOIN JobTimeline jt ON jt.JobNo = sd.JobNo
+WHERE sd.BuyerName LIKE '%{buyer_name}%'
+    AND jt.ReportAuthorization IS NOT NULL
+ORDER BY jt.ReportAuthorization DESC;"""
+        elif intent == "duration_analysis":
+            return comment + """-- 查詢從 LabIn 到 LabOut 耗時最久的工作單
+SELECT JobNo,
+       ROUND(julianday(LabOut) - julianday(LabIn), 2) AS 耗時天數
+FROM JobTimeline
+WHERE LabIn IS NOT NULL AND LabOut IS NOT NULL
+ORDER BY 耗時天數 DESC
+LIMIT 5;"""
+        elif intent == "anomaly_detection":
+            return comment + """-- 查詢從創建到授權超過 14 天的異常工單
+SELECT JobNo,
+       ROUND(julianday(ReportAuthorization) - julianday(JobCreation), 2) AS 處理天數
+FROM JobTimeline
+WHERE JobCreation IS NOT NULL
+    AND ReportAuthorization IS NOT NULL
+    AND (julianday(ReportAuthorization) - julianday(JobCreation)) > 14
+ORDER BY 處理天數 DESC
+LIMIT 20;"""
         elif intent == "rating_distribution":
             return comment + """-- 查詢評級分佈統計
+SELECT
+    OverallRating as 評級,
+    COUNT(*) as 數量,
+    ROUND(COUNT(*) * 100.0 / (
+        SELECT COUNT(*)
+        FROM TSR53SampleDescription
+        WHERE OverallRating IS NOT NULL
+    ), 2) as 百分比
+FROM TSR53SampleDescription
+WHERE OverallRating IS NOT NULL
+GROUP BY OverallRating
+ORDER BY 數量 DESC;"""
         elif intent == "amount_ranking":
             return comment + """-- 查詢工作單金額排名
+WITH JobTotalAmount AS (
+    SELECT JobNo, SUM(LocalAmount) AS TotalAmount
+    FROM (
+        SELECT DISTINCT JobNo, InvoiceCreditNoteNo, LocalAmount
+        FROM TSR53Invoice
+        WHERE LocalAmount IS NOT NULL
     )
+    GROUP BY JobNo
+)
+SELECT
+    jta.JobNo as 工作單號,
+    sd.ApplicantName as 申請方,
+    jta.TotalAmount as 總金額
+FROM JobTotalAmount jta
+JOIN TSR53SampleDescription sd ON sd.JobNo = jta.JobNo
+WHERE sd.ApplicantName IS NOT NULL
+ORDER BY jta.TotalAmount DESC
+LIMIT 10;"""
         elif intent == "company_statistics":
             return comment + """-- 查詢申請方工作單統計
+SELECT
+    ApplicantName as 申請方名稱,
+    COUNT(*) as 工作單數量
+FROM TSR53SampleDescription
+WHERE ApplicantName IS NOT NULL
+GROUP BY ApplicantName
+ORDER BY 工作單數量 DESC
+LIMIT 20;"""
         # 通用查詢模板
         return comment + """-- 通用查詢範本
+SELECT
+    JobNo as 工作單號,
+    ApplicantName as 申請方,
+    BuyerName as 買方,
+    OverallRating as 評級
+FROM TSR53SampleDescription
+WHERE ApplicantName IS NOT NULL
+LIMIT 20;"""
     def generate_sql(self, user_question: str) -> Tuple[str, str]:
+        """主流程：生成SQL查詢 (雲端AI增強版本)"""
         log_messages = [f"⏰ {get_current_time()} 開始處理問題: '{user_question[:50]}...'"]
         if not user_question or not user_question.strip():
             log_messages.append(f"🔍 找到相似問題 (相似度: {similarity_score:.3f}): '{similar_question[:50]}...'")
+            # 降低相似度閾值，增加匹配機會
+            if similarity_score > max(SIMILARITY_THRESHOLD - 0.1, 0.5):
                 original_sql = self.data_loader.sql_answers[corpus_id]
                 validation = validate_sql(original_sql)
                 if validation["valid"] and validation["is_safe"]:
+                    log_messages.append("✅ 相似度較高且原SQL有效，直接採用")
                     return original_sql, "\n".join(log_messages)
                 else:
                     log_messages.append(f"⚠️ 原SQL有問題: {', '.join(validation['issues'])}")
                     log_messages.append("✅ 智能修復完成")
                     return repaired_sql, "\n".join(log_messages)
             else:
+                log_messages.append(f"📉 相似度 ({similarity_score:.3f}) 較低，嘗試其他方法")
+        # 3. 嘗試基於意圖的本地生成
+        if analysis["specific_intent"] != "general_query":
+            log_messages.append("🤖 使用意圖導向生成")
+            intelligent_sql = self.intelligent_repair_sql(user_question, "無相似問題")
+            validation = validate_sql(intelligent_sql)
+            if validation["valid"]:
+                log_messages.append("✅ 意圖導向生成成功")
+                return intelligent_sql, "\n".join(log_messages)
+            else:
+                log_messages.append("⚠️ 意圖導向生成結果有問題，嘗試雲端AI")
+        # 4. 調用免費雲端AI（針對未見過的問題）
+        log_messages.append("🌐 調用免費雲端AI處理未見過的問題...")
+        cloud_sql = self.call_free_cloud_ai(user_question)
+        log_messages.append("✅ 雲端AI回應完成")
+        return cloud_sql, "\n".join(log_messages)
 # ==================== 初始化系統 ====================
 if HF_TOKEN is None: