Spaces:

Paul720810
/

Softline-SQL-Assistant

Sleeping

App Files Files Community

Paul720810 commited on Sep 5, 2025

Commit

d254318

verified ·

1 Parent(s): 89729e6

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -74

app.py CHANGED Viewed

@@ -589,61 +589,45 @@ class TextToSQLSystem:
         return fixed_sql
-    # 更新 _format_relevant_schema 以提供更準確的信息
     def _format_relevant_schema(self, table_names: List[str]) -> str:
-        """根據實際 Schema 格式化相關表格結構，並加入 SQL 註解"""
         if not self.schema:
-            return "-- No database schema available.\n"
-         # 建立一個從全小寫表名到實際大小寫表名的映射
         actual_table_names_map = {name.lower(): name for name in self.schema.keys()}
         real_table_names = []
         for table in table_names:
-            # 嘗試用小寫名稱去映射，找到正確的大小寫
             actual_name = actual_table_names_map.get(table.lower())
             if actual_name:
                 real_table_names.append(actual_name)
-            # 如果映射失敗，但原始名稱剛好存在，也加入 (作為備用)
             elif table in self.schema:
                 real_table_names.append(table)
-        # 如果根據問題分析後，沒有找到任何相關的表格，則使用預設的核心表格
         if not real_table_names:
             self._log("未識別到相關表格，使用預設核心表格。", "WARNING")
             real_table_names = ['TSR53SampleDescription', 'JobTimeline', 'JobsInProgress']
-        # --- END: 修正 NameError 的關鍵程式碼 ---
-        formatted = "## Relevant Table Schema:\n"
-        formatted += "-- Use ONLY the following tables and columns for the query.\n\n"
-        # 現在這個迴圈可以正常執行了，因為 real_table_names 已經被定義
         for table in real_table_names:
             if table in self.schema:
-                # 使用 CREATE TABLE 語法，模型更熟悉
-                formatted += f"CREATE TABLE {table} (\n"
-                # 只顯示前 15 個最關鍵的欄位，避免過多雜訊
-                for col in self.schema[table][:15]:
                     col_name = col['name']
                     col_type = col['type']
-                    # 清理描述中的換行符，避免破壞格式
-                    col_desc = col.get('description', '').replace('\n', ' ').replace('\r', '')
-                    # 將欄位描述變成行內註解
-                    formatted += f"    {col_name} {col_type}, -- {col_desc}\n"
-                formatted += ");\n\n"
-        # 針對性的重要提示
-        formatted += """-- Important Notes:
--- - Customer name is in `TSR53SampleDescription.InvoiceToName`.
--- - Buyer name is in `JobsInProgress.BuyerName` or `TSR53SampleDescription.BuyerName`.
--- - To get the year from a date, use `strftime('%Y', date_column) = '2024'`.
--- - Report completion is determined by `JobTimeline.ReportAuthorization`.
--- - Report rating (Pass/Fail) is in `TSR53SampleDescription.OverallRating`.
-"""
-        return formatted
     def find_most_similar(self, question: str, top_k: int) -> List[Dict]:
         """使用 FAISS 快速檢索相似問題"""
@@ -694,58 +678,44 @@ class TextToSQLSystem:
             self._log(f"❌ 檢索失敗: {e}", "ERROR")
             return []
     def _build_prompt(self, user_q: str, examples: List[Dict]) -> str:
         """
-        建立結構更清晰的提示詞，明確區分上下文和最終指令，避免模型混淆。(已修正 TypeError)
         """
         relevant_tables = self._identify_relevant_tables(user_q)
-        # 1. 提供背景上下文 (Context)
-        system_context = "You are an expert AI assistant that generates SQLite queries based on a database schema and a user's question."
         schema_str = self._format_relevant_schema(relevant_tables)
-        # 2. 提供一個清晰的範例 (Few-shot Example)
-        ex_str = ""
-        # 檢查 examples 列表是否為空，避免出錯
         if examples:
-            # --- START: 這裡是修正的地方 ---
-            # 從列表中取出第一個元素 (它是一個字典)
-            best_example = examples[0]
-            # --- END: 修正完成 ---
-            # 使用 [QUESTION] 和 [SQL] 標籤來強化結構
-            ex_str = f"Here is an example of how to answer:\n[QUESTION]: {best_example['question']}\n[SQL]:\n```sql\n{best_example['sql']}\n```\n\n---\n\n"
-        # 3. 給出最終的、不可混淆的指令 (Final, Imperative Instruction)
-        final_task_instruction = f"""Now, based on the schema and the new user question below, generate a single, valid SQLite query.
-**CRITICAL RULES TO FOLLOW:**
-- You **must** only use the tables and columns provided in the schema above.
-- You **must** use SQLite syntax (e.g., use `strftime('%Y', date_column)` for years).
-- You **must** output **nothing** else, only the SQL query inside a single ```sql code block.
-[QUESTION]: {user_q}
-[SQL]:
 ```sql
 """
-        # 4. 組合最終的 Prompt
-        prompt = f"""{system_context}
-{schema_str}
-{ex_str}
-{final_task_instruction}
-"""
-        # 5. 限制總長度 (這個邏輯保持不變)
-        # 確保在截斷時，最後的指令部分是完整的
-        if len(prompt) > 1500:
-            # 找到 final_task_instruction 在 prompt 中的起始位置
-            instruction_start_index = prompt.find("Now, based on the schema")
-            # 保留 schema 和一部分範例，然後接上完整的最終指令
-            allowed_context_len = 1500 - len(final_task_instruction)
-            prompt = prompt[:allowed_context_len] + "...\n\n" + final_task_instruction
         return prompt

         return fixed_sql
     def _format_relevant_schema(self, table_names: List[str]) -> str:
+        """
+        生成一個簡化的、不易被模型錯誤模仿的 Schema 字符串。
+        """
         if not self.schema:
+            return "No schema available.\n"
         actual_table_names_map = {name.lower(): name for name in self.schema.keys()}
         real_table_names = []
         for table in table_names:
             actual_name = actual_table_names_map.get(table.lower())
             if actual_name:
                 real_table_names.append(actual_name)
             elif table in self.schema:
                 real_table_names.append(table)
         if not real_table_names:
             self._log("未識別到相關表格，使用預設核心表格。", "WARNING")
             real_table_names = ['TSR53SampleDescription', 'JobTimeline', 'JobsInProgress']
+        formatted = ""
         for table in real_table_names:
             if table in self.schema:
+                # 使用簡單的 "Table: ..." 和 "Columns: ..." 格式
+                formatted += f"Table: {table}\n"
+                cols_str = []
+                # 只顯示前 10 個關鍵欄位
+                for col in self.schema[table][:10]:
                     col_name = col['name']
                     col_type = col['type']
+                    col_desc = col.get('description', '').replace('\n', ' ')
+                    # 將描述信息放在括號裡
+                    if col_desc:
+                        cols_str.append(f"{col_name} ({col_type}, {col_desc})")
+                    else:
+                        cols_str.append(f"{col_name} ({col_type})")
+                formatted += f"Columns: {', '.join(cols_str)}\n\n"
+        return formatted.strip()
     def find_most_similar(self, question: str, top_k: int) -> List[Dict]:
         """使用 FAISS 快速檢索相似問題"""
             self._log(f"❌ 檢索失敗: {e}", "ERROR")
             return []
+    # in class TextToSQLSystem:
     def _build_prompt(self, user_q: str, examples: List[Dict]) -> str:
         """
+        建立一個高度結構化、以任務為導向的提示詞，使用清晰的標題分隔符。
         """
         relevant_tables = self._identify_relevant_tables(user_q)
+        # 使用我們新的、更簡單的 schema 格式化函數
         schema_str = self._format_relevant_schema(relevant_tables)
+        example_str = "No example available."
         if examples:
+            best_example = examples[0]
+            example_str = f"Question: {best_example['question']}\nSQL:\n```sql\n{best_example['sql']}\n```"
+        # 使用強分隔符和清晰的標題來構建 prompt
+        prompt = f"""### INSTRUCTIONS ###
+You are a SQLite expert. Your only job is to generate a single, valid SQLite query based on the provided schema and question.
+- ONLY use the tables and columns from the schema below.
+- ALWAYS use SQLite syntax (e.g., `strftime('%Y', date_column)` for years).
+- The report completion date is the `ReportAuthorization` column in the `JobTimeline` table.
+- Your output MUST be ONLY the SQL query inside a ```sql code block.
+### SCHEMA ###
+{schema_str}
+### EXAMPLE ###
+{example_str}
+### TASK ###
+Generate a SQLite query for the following question.
+Question: {user_q}
+SQL:
 ```sql
 """
+        self._log(f"📏 Prompt 長度: {len(prompt)} 字符")
+        # 不再需要複雜的長度截斷邏輯，因為 schema 已經被簡化
         return prompt