Spaces:

cormort
/

apec-query-system

Sleeping

App Files Files Community

cormort commited on Jan 29

Commit

aac27e4

verified ·

1 Parent(s): e133104

Update app.py

Browse files

Files changed (1) hide show

app.py +162 -150

app.py CHANGED Viewed

@@ -15,7 +15,7 @@ from pdfminer.high_level import extract_text
 from docx import Document
 from bs4 import BeautifulSoup
-# --- 新增：翻譯套件引用 (容錯處理) ---
 try:
     from deep_translator import GoogleTranslator
     HAS_TRANSLATOR = True
@@ -23,7 +23,7 @@ except ImportError:
     HAS_TRANSLATOR = False
     print("Warning: deep-translator not installed. Translation features will be limited.")
-# Global lock for file access to prevent race conditions during log/load
 data_lock = threading.Lock()
 # Config
@@ -31,7 +31,10 @@ DATA_FSMM = "fsmm_data.json"
 DATA_PROPOSALS = "proposals_data.json"
 DATA_QUERIES = "user_queries.json"
-# --- FSMM Extraction Logic ---
 def extract_from_pdf(file_path):
     try:
         text = extract_text(file_path)
@@ -48,14 +51,76 @@ def extract_from_docx(file_path):
         print(f"Error reading DOCX {file_path}: {e}")
         return ""
-def parse_fsmm_filename(filename):
     """
-    解析檔名以獲取年份與類型。
-    包含針對特定重要文件的硬編碼對照表。
     """
-    fn_lower = filename.lower()
-    # 1. 針對已知的重要 APEC 文件建立關鍵字對照表
     known_docs = {
         "putrajaya vision": (2020, "Leaders' Declaration"),
         "trujillo principles": (2024, "FSMM Principles"),
@@ -66,35 +131,30 @@ def parse_fsmm_filename(filename):
         "bangkok goals": (2022, "Leaders' Declaration")
     }
-    # 檢查是否命中特定文件
     for key, (year, doc_type) in known_docs.items():
         if key in fn_lower:
             return year, doc_type
-    # 2. 原有的正規表達式邏輯 (YY_fsmm_type)
     match = re.search(r'(\d+)_fsmm_(\w+)', filename)
     if match:
         year_short = match.group(1)
         type_code = match.group(2)
         year = int(year_short) + 2000 if len(year_short) == 2 else int(year_short)
         type_map = {
-            'jms': 'Joint Ministerial Statement (聯合部長聲明)',
-            'stmt': 'Statement (聲明)',
-            'declaration': 'Declaration (宣言)',
-            'roadmap': 'Roadmap (路徑圖)'
         }
         type_name = type_map.get(type_code.lower(), type_code.upper())
         return year, type_name
-    # 3. 如果都失敗，嘗試抓取年份作為最後手段
     year_match = re.search(r'(20\d{2})|19\d{2}', filename)
     if year_match:
         return int(year_match.group(0)), "Other Document"
     return None, None
-# --- Proposal Extraction Logic ---
 def parse_proposal(file_path):
     try:
         with open(file_path, 'r', encoding='utf-8') as f:
@@ -115,7 +175,10 @@ def parse_proposal(file_path):
         print(f"Error parsing proposal {file_path}: {e}")
         return None
-# --- Data Management ---
 def load_json(filepath):
     with data_lock:
         if not os.path.exists(filepath):
@@ -136,13 +199,13 @@ def log_query(query):
         return
     logs = load_json(DATA_QUERIES)
     now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-    logs.append({
-        "timestamp": now,
-        "query": str(query).strip()
-    })
     save_json(DATA_QUERIES, logs)
-# --- Upload Logic (Enhanced for complex APEC docs) ---
 def handle_unified_upload(file_objs):
     if not file_objs:
         return "請選擇檔案上傳。"
@@ -172,7 +235,6 @@ def handle_unified_upload(file_objs):
         # --- 處理文件 (PDF/DOCX) ---
         else:
             year, doc_type = parse_fsmm_filename(original_filename)
             if not year:
                 results.append(f"⚠️ {original_filename}: 無法辨識年份，已跳過。")
                 continue
@@ -190,40 +252,8 @@ def handle_unified_upload(file_objs):
                 results.append(f"❌ {original_filename}: 內容提取失敗。")
                 continue
-            # === [升級版] 文字清理與切分邏輯 ===
-            # 1. 清理頁碼與雜訊
-            content = re.sub(r'(?m)^\s*\d+\s*$', '', content)
-            content = re.sub(r'--- PAGE \d+ ---', '', content)
-            # 2. 統一換行符號
-            lines = content.split('\n')
-            clean_lines = [l.strip() for l in lines if l.strip()]
-            full_text = '\n'.join(clean_lines)
-            # 3. 智慧切分 Pattern
-            split_pattern = r'(?m)^((?:PART\s+[A-Z]+|SECTION\s+[A-Z]|[IVX]+\.|[a-z]\.|(?<!\d)\d{1,2}\.)\s+)'
-            parts = re.split(split_pattern, full_text)
-            composed_paras = []
-            if parts[0].strip():
-                composed_paras.append(parts[0].strip())
-            i = 1
-            while i < len(parts) - 1:
-                header_marker = parts[i].strip()
-                body_text = parts[i+1].strip() if i + 1 < len(parts) else ""
-                full_para = f"{header_marker} {body_text}"
-                composed_paras.append(full_para)
-                i += 2
-            # 4. Fallback slicing
-            if len(composed_paras) < 3 and len(full_text) > 1000:
-                chunk_size = 800
-                composed_paras = [full_text[j:j+chunk_size] for j in range(0, len(full_text), chunk_size)]
-                composed_paras = [f"[Auto-Segment {idx+1}] {t}" for idx, t in enumerate(composed_paras)]
             new_entries = []
             for idx, para in enumerate(composed_paras):
@@ -244,23 +274,18 @@ def handle_unified_upload(file_objs):
     return "\n".join(results)
-# --- Translation Logic (New) ---
 def perform_translation(text, target_lang='zh-TW'):
-    """
-    執行翻譯功能。如果 deep-translator 未安裝，返回提示。
-    """
-    if not text:
-        return ""
-    # 建立 Google Translate 連結 (Fallback)
     encoded_text = urllib.parse.quote(text)
     google_trans_url = f"https://translate.google.com/?sl=auto&tl={target_lang}&text={encoded_text}&op=translate"
     trans_result = ""
     if HAS_TRANSLATOR:
         try:
-            # 限制長度以防 API 報錯，若太長則建議用連結
             if len(text) > 4500:
                  trans_result = "⚠️ 文本過長，請使用下方按鈕前往 Google 翻譯。"
             else:
@@ -270,42 +295,44 @@ def perform_translation(text, target_lang='zh-TW'):
             trans_result = f"⚠️ 翻譯服務暫時不可用 ({str(e)})。請使用下方按鈕。"
     else:
         trans_result = "⚠️ 伺服器未安裝 deep-translator 套件。請使用下方按鈕。"
     return trans_result, google_trans_url
 def translate_ui_action(text):
     t_text, t_url = perform_translation(text)
-    # 返回: 翻譯結果文本, 顯示按鈕(HTML)
-    btn_html = f"""
-    <div style="margin-top: 10px;">
-        <a href="{t_url}" target="_blank" style="
-            background-color: #4285F4; color: white; padding: 8px 16px;
-            text-decoration: none; border-radius: 4px; font-weight: bold;
-            display: inline-block;">
-            🌍 在 Google 翻譯中開啟
-        </a>
-    </div>
-    """
     return t_text, btn_html
-# --- Search Logic ---
 def search_proposals(query, year, economy):
     log_query(f"Proposals Q:{query}|Y:{year}|E:{economy}")
     data = load_json(DATA_PROPOSALS)
     filtered = []
-    q = query.lower() if query else ""
     y = str(year) if year else ""
     e = str(economy) if economy else ""
     for item in data:
-        if q:
-            found_q = False
-            for k, v in item.items():
-                if q in str(v).lower():
-                    found_q = True
-                    break
-            if not found_q: continue
         if y and str(item.get('Project Year', '')).strip() != y:
             continue
@@ -319,9 +346,19 @@ def search_proposals(query, year, economy):
     html = ""
     for p in filtered[:20]:
         html += f"""
         <div style="border: 1px solid #cbd5e1; padding: 15px; border-radius: 8px; margin-bottom: 15px; background: #fff; box-shadow: 0 1px 3px rgba(0,0,0,0.1);">
-            <div style="font-weight: bold; color: #1e293b; margin-bottom: 8px; font-size: 1.1em;">{p.get('Project Title', '無標題')}</div>
             <div style="display: flex; gap: 8px; flex-wrap: wrap; font-size: 0.85em; margin-bottom: 10px;">
                 <span style="background: #e2e8f0; padding: 2px 8px; border-radius: 4px;">{p.get('Project No.', '無編號')}</span>
                 <span style="background: #f1f5f9; padding: 2px 8px; border-radius: 4px;">{p.get('Proposing Economy(ies)', '未知經濟體')}</span>
@@ -329,7 +366,7 @@ def search_proposals(query, year, economy):
             <details style="font-size: 0.9em; color: #475569;">
                 <summary style="cursor: pointer; color: #2563eb;">詳細資訊</summary>
                 <div style="margin-top: 10px; display: grid; grid-template-columns: 1fr 2fr; gap:5px;">
-                    {"".join([f"<b>{k}:</b> <div>{v}</div>" for k, v in p.items() if k not in ['Project Title', 'Project No.']])}
                 </div>
             </details>
         </div>
@@ -337,13 +374,24 @@ def search_proposals(query, year, economy):
     return html
 def search_fsmm(query, year, doc_type, filename_filter):
     log_query(f"FSMM {query}")
     data = load_json(DATA_FSMM)
     filtered = []
-    q = query.lower() if query else ""
     for item in data:
-        if q and q not in item['content'].lower(): continue
         if year and str(item['year']) != year: continue
         if doc_type and item['type'] != doc_type: continue
         if filename_filter and item['filename'] != filename_filter: continue
@@ -356,18 +404,11 @@ def search_fsmm(query, year, doc_type, filename_filter):
     else:
         filtered.sort(key=lambda x: (x['year'], x['type'], x['paragraph_index']), reverse=True)
-    def highlight_text(text, keyword):
-        if not keyword:
-            return text
-        pattern = re.compile(re.escape(keyword), re.IGNORECASE)
-        return pattern.sub(
-            f'<span style="background: #fef08a; padding: 1px 3px; border-radius: 3px; font-weight: bold;">{keyword}</span>',
-            text
-        )
     html = ""
     for item in filtered[:100]:
-        content = highlight_text(item['content'], query) if query else item['content']
         html += f"""
         <div style="border: 1px solid #e2e8f0; padding: 15px; border-radius: 8px; margin-bottom: 15px; background: white; box-shadow: 0 1px 3px rgba(0,0,0,0.1);">
             <div style="display: flex; gap: 8px; margin-bottom: 8px; flex-wrap: wrap;">
@@ -397,7 +438,10 @@ def get_fsmm_full_text(filename):
     return full_text
-# --- Dashboard Logic ---
 def get_dashboard_stats():
     proposals = load_json(DATA_PROPOSALS)
     fsmm = load_json(DATA_FSMM)
@@ -465,12 +509,10 @@ def get_dashboard_stats():
     return summary_html, fig_p_year, fig_p_econ, fig_queries, df_ye_stats, df_ec_stats, df_qu_stats
-# --- UI Helpers ---
 def get_p_choices_raw(year_filter=None):
     try:
         data = load_json(DATA_PROPOSALS)
-        if not data:
-            return ([""],), ([""],)
         years = sorted(list(set(str(it.get('Project Year', '')).strip() for it in data if it.get('Project Year'))), reverse=True)
         if year_filter:
             econs = sorted(list(set(str(it.get('Proposing Economy(ies)', '')).strip()
@@ -478,21 +520,18 @@ def get_p_choices_raw(year_filter=None):
         else:
             econs = sorted(list(set(str(it.get('Proposing Economy(ies)', '')).strip() for it in data if it.get('Proposing Economy(ies)'))))
         return ([""] + years), ([""] + econs)
-    except Exception as e:
-        print(f"Error in get_p_choices_raw: {e}")
         return ([""],), ([""],)
 def get_fsmm_choices_raw():
     try:
         data = load_json(DATA_FSMM)
-        if not data:
-            return ([""],), ([""],), ([""],)
         years = sorted(list(set(str(it.get('year', '')) for it in data if it.get('year'))), reverse=True)
         types = sorted(list(set(str(it.get('type', '')) for it in data if it.get('type'))))
         filenames = sorted(list(set(str(it.get('filename', '')) for it in data if it.get('filename'))), reverse=True)
         return ([""] + years), ([""] + types), ([""] + filenames)
-    except Exception as e:
-        print(f"Error in get_fsmm_choices_raw: {e}")
         return ([""],), ([""],), ([""],)
 def refresh_p_choices(year_filter=None):
@@ -517,42 +556,22 @@ def handle_drilldown(evt: gr.SelectData):
         res_fsmm = search_fsmm(selected_val, "", "", "")
         return gr.update(selected="fsmm_tab"), res_fsmm, ""
-    except Exception as e:
-        print(f"Error in handle_drilldown: {e}")
         return gr.update(), "", ""
-# --- UI UI UI ---
 DASHBOARD_CSS = """
-    .drilldown-df table tr:hover {
-        cursor: pointer !important;
-        background-color: #f0f9ff !important;
-        position: relative;
-    }
-    .drilldown-df table tr:hover::after {
-        content: "🖱️ 點擊跳轉查詢";
-        position: absolute;
-        right: 10px;
-        top: 50%;
-        transform: translateY(-50%);
-        font-size: 0.8em;
-        color: #3b82f6;
-        background: #fff;
-        padding: 2px 6px;
-        border-radius: 4px;
-        box-shadow: 0 2px 4px rgba(0,0,0,0.1);
-        pointer-events: none;
-    }
-    .drilldown-df table tr:hover td {
-        color: #2563eb !important;
-        font-weight: bold;
-    }
 """
 with gr.Blocks(title="APEC 綜合查詢系統", css=DASHBOARD_CSS) as demo:
     gr.Markdown("# 🌐 APEC 綜合查詢系統")
     gr.Markdown("整�� APEC 提案項目與重要宣言內容的統一檢索平台。")
-    # --- 新增：快速翻譯工具區塊 ---
     with gr.Accordion("🛠️ 快速翻譯工具箱 (Translation Tool)", open=False):
         gr.Markdown("將英文內容貼在此處，可快速翻譯成繁體中文。若內容過長建議使用 Google 翻譯按鈕。")
         with gr.Row():
@@ -561,9 +580,7 @@ with gr.Blocks(title="APEC 綜合查詢系統", css=DASHBOARD_CSS) as demo:
         with gr.Row():
             t_btn = gr.Button("🔄 執行翻譯 (Translate)", variant="secondary")
             t_link_html = gr.HTML()
         t_btn.click(translate_ui_action, inputs=[t_input], outputs=[t_output, t_link_html])
-    # ---------------------------
     with gr.Tabs() as tabs:
         with gr.Tab("📊 統計儀表板") as dash_tab:
@@ -588,7 +605,7 @@ with gr.Blocks(title="APEC 綜合查詢系統", css=DASHBOARD_CSS) as demo:
         with gr.Tab("🍎 政策文件查詢", id="fsmm_tab") as fsmm_tab:
             gr.Markdown("### 步驟 1：設定篩選條件 (FSMM / 領袖宣言 / 路線圖)")
             with gr.Row():
-                f_query = gr.Textbox(label="🔍 關鍵字搜尋", placeholder="例如：resilience, climate, women...")
                 f_year = gr.Dropdown(label="📅 年份", choices=[""])
                 f_type = gr.Dropdown(label="📝 類型", choices=[""])
                 f_doc = gr.Dropdown(label="📄 特定檔案 (必選以查看全文)", choices=[""])
@@ -601,7 +618,6 @@ with gr.Blocks(title="APEC 綜合查詢系統", css=DASHBOARD_CSS) as demo:
             gr.Markdown("---")
             f_out = gr.HTML(label="輸出區域")
-            # Events
             f_btn.click(search_fsmm, inputs=[f_query, f_year, f_type, f_doc], outputs=[f_out])
             f_full_btn.click(get_fsmm_full_text, inputs=[f_doc], outputs=[f_out])
             f_refresh_btn.click(refresh_fsmm_choices, outputs=[f_year, f_type, f_doc])
@@ -609,7 +625,7 @@ with gr.Blocks(title="APEC 綜合查詢系統", css=DASHBOARD_CSS) as demo:
         with gr.Tab("📊 APEC 提案查詢", id="proposal_tab") as proposal_tab:
             with gr.Row():
-                p_query = gr.Textbox(label="關鍵字搜尋所有欄位", placeholder="例如：Thailand, agriculture, energy...")
                 p_year = gr.Dropdown(label="📅 年份", choices=[""])
                 p_econ = gr.Dropdown(label="Proposing Economy", choices=[""])
@@ -619,7 +635,6 @@ with gr.Blocks(title="APEC 綜合查詢系統", css=DASHBOARD_CSS) as demo:
             p_out = gr.HTML()
-            # Events
             p_btn.click(search_proposals, inputs=[p_query, p_year, p_econ], outputs=[p_out])
             p_refresh_btn.click(refresh_p_choices, outputs=[p_year, p_econ])
             p_year.change(lambda y: refresh_p_choices(y)[1], inputs=[p_year], outputs=[p_econ])
@@ -634,19 +649,16 @@ with gr.Blocks(title="APEC 綜合查詢系統", css=DASHBOARD_CSS) as demo:
             u_status = gr.Textbox(label="處理結果")
             u_btn.click(handle_unified_upload, inputs=[u_file], outputs=[u_status])
-            # Drill-down Events: outputs=[tabs, f_out, p_out]
             d_ye_df.select(handle_drilldown, outputs=[tabs, f_out, p_out])
             d_ec_df.select(handle_drilldown, outputs=[tabs, f_out, p_out])
             d_qu_df.select(handle_drilldown, outputs=[tabs, f_out, p_out])
-    # Initial load of choices
     def init_choices():
         try:
             fc = refresh_fsmm_choices()
             pc = refresh_p_choices()
             return fc + pc
-        except Exception as e:
-            print(f"Error in init_choices: {e}")
             return (gr.update(), gr.update(), gr.update(), gr.update(), gr.update())
     demo.load(init_choices, outputs=[f_year, f_type, f_doc, p_year, p_econ])

 from docx import Document
 from bs4 import BeautifulSoup
+# --- 翻譯套件引用 (容錯處理) ---
 try:
     from deep_translator import GoogleTranslator
     HAS_TRANSLATOR = True
     HAS_TRANSLATOR = False
     print("Warning: deep-translator not installed. Translation features will be limited.")
+# Global lock for file access
 data_lock = threading.Lock()
 # Config
 DATA_PROPOSALS = "proposals_data.json"
 DATA_QUERIES = "user_queries.json"
+# ==========================================
+# 核心邏輯區：文字處理與智能分段
+# ==========================================
 def extract_from_pdf(file_path):
     try:
         text = extract_text(file_path)
         print(f"Error reading DOCX {file_path}: {e}")
         return ""
+def process_and_segment_text(raw_text):
     """
+    針對 PDF/Word 內容進行清洗與智能分段：
+    1. 去除多餘頁碼與雜訊
+    2. 修復 PDF 斷行 (Un-breaking lines)
+    3. 依據 APEC 常見的段落編號 (1., 2., a., i.) 或雙換行進行切分
     """
+    if not raw_text: return []
+    # 1. 清理基本雜訊
+    text = re.sub(r'(?m)^\s*\d+\s*$', '', raw_text) # 去除純數字行(頁碼)
+    text = re.sub(r'--- PAGE \d+ ---', '', text)
+    # 2. 智能合併斷行 (Fix PDF Line Breaks)
+    # 邏輯：如果一行結尾不是標點符號 (.!?:;"”’)，通常代表這句還沒講完，應該跟下一行合併
+    lines = text.split('\n')
+    merged_lines = []
+    buffer = ""
+    for line in lines:
+        line = line.strip()
+        if not line:
+            # 遇到空行，如果 buffer 有東西就先存起來 (視為一個段落結束)
+            if buffer:
+                merged_lines.append(buffer)
+                buffer = ""
+            continue
+        if not buffer:
+            buffer = line
+        else:
+            # 判斷上一行是否結束了？(檢查結尾字符)
+            if buffer.endswith(('.', '!', '?', ':', ';', '"', '”', '’')):
+                merged_lines.append(buffer)
+                buffer = line
+            else:
+                # 處理連字號 (Hyphenation)，例如 "co-\noperate" -> "cooperate"
+                if buffer.endswith('-'):
+                    buffer = buffer[:-1] + line
+                else:
+                    buffer += " " + line
+    if buffer: merged_lines.append(buffer)
+    # 3. 針對 APEC 格式進行結構化切分
+    full_text = "\n".join(merged_lines)
+    # Pattern: 匹配章節標題或編號 (PART, SECTION, 1., (a), I.)
+    split_pattern = r'(?m)^((?:PART\s+[A-Z]+|SECTION\s+[A-Z]|[IVX]+\.|(?:\d{1,2}\.)|(?:\([a-z]\)))\s+.*)'
+    parts = re.split(split_pattern, full_text)
+    final_segments = []
+    if parts[0].strip():
+        final_segments.append(parts[0].strip())
+    for part in parts[1:]:
+        s = part.strip()
+        if len(s) > 10: # 過濾太短的雜訊
+            final_segments.append(s)
+    # 如果 Regex 切分失敗（段落太少），退回使用雙換行切分
+    if len(final_segments) < 3:
+        final_segments = [p.strip() for p in full_text.split('\n') if len(p.strip()) > 20]
+    return final_segments
+def parse_fsmm_filename(filename):
+    """解析檔名以獲取年份與類型"""
+    fn_lower = filename.lower()
     known_docs = {
         "putrajaya vision": (2020, "Leaders' Declaration"),
         "trujillo principles": (2024, "FSMM Principles"),
         "bangkok goals": (2022, "Leaders' Declaration")
     }
     for key, (year, doc_type) in known_docs.items():
         if key in fn_lower:
             return year, doc_type
     match = re.search(r'(\d+)_fsmm_(\w+)', filename)
     if match:
         year_short = match.group(1)
         type_code = match.group(2)
         year = int(year_short) + 2000 if len(year_short) == 2 else int(year_short)
         type_map = {
+            'jms': 'Joint Ministerial Statement',
+            'stmt': 'Statement',
+            'declaration': 'Declaration',
+            'roadmap': 'Roadmap'
         }
         type_name = type_map.get(type_code.lower(), type_code.upper())
         return year, type_name
     year_match = re.search(r'(20\d{2})|19\d{2}', filename)
     if year_match:
         return int(year_match.group(0)), "Other Document"
     return None, None
 def parse_proposal(file_path):
     try:
         with open(file_path, 'r', encoding='utf-8') as f:
         print(f"Error parsing proposal {file_path}: {e}")
         return None
+# ==========================================
+# 資料存取區
+# ==========================================
 def load_json(filepath):
     with data_lock:
         if not os.path.exists(filepath):
         return
     logs = load_json(DATA_QUERIES)
     now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    logs.append({"timestamp": now, "query": str(query).strip()})
     save_json(DATA_QUERIES, logs)
+# ==========================================
+# 上傳處理區 (應用智能分段)
+# ==========================================
 def handle_unified_upload(file_objs):
     if not file_objs:
         return "請選擇檔案上傳。"
         # --- 處理文件 (PDF/DOCX) ---
         else:
             year, doc_type = parse_fsmm_filename(original_filename)
             if not year:
                 results.append(f"⚠️ {original_filename}: 無法辨識年份，已跳過。")
                 continue
                 results.append(f"❌ {original_filename}: 內容提取失敗。")
                 continue
+            # 使用新的智能分段邏輯
+            composed_paras = process_and_segment_text(content)
             new_entries = []
             for idx, para in enumerate(composed_paras):
     return "\n".join(results)
+# ==========================================
+# 搜尋與工具邏輯 (多關鍵字支援)
+# ==========================================
 def perform_translation(text, target_lang='zh-TW'):
+    if not text: return ""
     encoded_text = urllib.parse.quote(text)
     google_trans_url = f"https://translate.google.com/?sl=auto&tl={target_lang}&text={encoded_text}&op=translate"
     trans_result = ""
     if HAS_TRANSLATOR:
         try:
             if len(text) > 4500:
                  trans_result = "⚠️ 文本過長，請使用下方按鈕前往 Google 翻譯。"
             else:
             trans_result = f"⚠️ 翻譯服務暫時不可用 ({str(e)})。請使用下方按鈕。"
     else:
         trans_result = "⚠️ 伺服器未安裝 deep-translator 套件。請使用下方按鈕。"
     return trans_result, google_trans_url
 def translate_ui_action(text):
     t_text, t_url = perform_translation(text)
+    btn_html = f"""<div style="margin-top: 10px;"><a href="{t_url}" target="_blank" style="background-color: #4285F4; color: white; padding: 8px 16px; text-decoration: none; border-radius: 4px; font-weight: bold; display: inline-block;">🌍 在 Google 翻譯中開啟</a></div>"""
     return t_text, btn_html
+def highlight_keywords(text, keywords):
+    """將文字中的關鍵字標上黃色底色 (忽略大小寫)"""
+    if not keywords or not text: return text
+    val = str(text)
+    for k in keywords:
+        pattern = re.compile(re.escape(k), re.IGNORECASE)
+        val = pattern.sub(f'<span style="background: #fef08a; font-weight: bold;">\g<0></span>', val)
+    return val
 def search_proposals(query, year, economy):
+    """
+    提案搜尋：支援多關鍵字 AND 搜尋
+    例如輸入 "Thailand agriculture" -> 找出同時包含 Thailand 和 agriculture 的提案
+    """
     log_query(f"Proposals Q:{query}|Y:{year}|E:{economy}")
     data = load_json(DATA_PROPOSALS)
     filtered = []
+    # 1. 關鍵字預處理
+    keywords = query.lower().split() if query else []
     y = str(year) if year else ""
     e = str(economy) if economy else ""
     for item in data:
+        # 建立全文字串以供檢查
+        full_text_search_content = " ".join([str(v) for v in item.values()]).lower()
+        # 2. 關鍵字 AND 邏輯檢查
+        if keywords:
+            if not all(k in full_text_search_content for k in keywords):
+                continue
         if y and str(item.get('Project Year', '')).strip() != y:
             continue
     html = ""
     for p in filtered[:20]:
+        # 3. 顯示時 Highlight 標題
+        title = highlight_keywords(p.get('Project Title', '無標題'), keywords)
+        # 4. 組裝詳細資訊 HTML (含 Highlight)
+        details_html = ""
+        for k, v in p.items():
+            if k not in ['Project Title', 'Project No.']:
+                val_highlighted = highlight_keywords(str(v), keywords)
+                details_html += f"<b>{k}:</b> <div>{val_highlighted}</div>"
         html += f"""
         <div style="border: 1px solid #cbd5e1; padding: 15px; border-radius: 8px; margin-bottom: 15px; background: #fff; box-shadow: 0 1px 3px rgba(0,0,0,0.1);">
+            <div style="font-weight: bold; color: #1e293b; margin-bottom: 8px; font-size: 1.1em;">{title}</div>
             <div style="display: flex; gap: 8px; flex-wrap: wrap; font-size: 0.85em; margin-bottom: 10px;">
                 <span style="background: #e2e8f0; padding: 2px 8px; border-radius: 4px;">{p.get('Project No.', '無編號')}</span>
                 <span style="background: #f1f5f9; padding: 2px 8px; border-radius: 4px;">{p.get('Proposing Economy(ies)', '未知經濟體')}</span>
             <details style="font-size: 0.9em; color: #475569;">
                 <summary style="cursor: pointer; color: #2563eb;">詳細資訊</summary>
                 <div style="margin-top: 10px; display: grid; grid-template-columns: 1fr 2fr; gap:5px;">
+                    {details_html}
                 </div>
             </details>
         </div>
     return html
 def search_fsmm(query, year, doc_type, filename_filter):
+    """
+    文件搜尋：支援多關鍵字 AND 搜尋
+    """
     log_query(f"FSMM {query}")
     data = load_json(DATA_FSMM)
     filtered = []
+    # 1. 關鍵字預處理
+    keywords = query.lower().split() if query else []
     for item in data:
+        content_lower = item['content'].lower()
+        # 2. AND 邏輯檢查
+        if keywords:
+            if not all(k in content_lower for k in keywords):
+                continue
         if year and str(item['year']) != year: continue
         if doc_type and item['type'] != doc_type: continue
         if filename_filter and item['filename'] != filename_filter: continue
     else:
         filtered.sort(key=lambda x: (x['year'], x['type'], x['paragraph_index']), reverse=True)
     html = ""
     for item in filtered[:100]:
+        # 3. Highlight 內容
+        content = highlight_keywords(item['content'], keywords)
         html += f"""
         <div style="border: 1px solid #e2e8f0; padding: 15px; border-radius: 8px; margin-bottom: 15px; background: white; box-shadow: 0 1px 3px rgba(0,0,0,0.1);">
             <div style="display: flex; gap: 8px; margin-bottom: 8px; flex-wrap: wrap;">
     return full_text
+# ==========================================
+# Dashboard & UI Helpers
+# ==========================================
 def get_dashboard_stats():
     proposals = load_json(DATA_PROPOSALS)
     fsmm = load_json(DATA_FSMM)
     return summary_html, fig_p_year, fig_p_econ, fig_queries, df_ye_stats, df_ec_stats, df_qu_stats
 def get_p_choices_raw(year_filter=None):
     try:
         data = load_json(DATA_PROPOSALS)
+        if not data: return ([""],), ([""],)
         years = sorted(list(set(str(it.get('Project Year', '')).strip() for it in data if it.get('Project Year'))), reverse=True)
         if year_filter:
             econs = sorted(list(set(str(it.get('Proposing Economy(ies)', '')).strip()
         else:
             econs = sorted(list(set(str(it.get('Proposing Economy(ies)', '')).strip() for it in data if it.get('Proposing Economy(ies)'))))
         return ([""] + years), ([""] + econs)
+    except:
         return ([""],), ([""],)
 def get_fsmm_choices_raw():
     try:
         data = load_json(DATA_FSMM)
+        if not data: return ([""],), ([""],), ([""],)
         years = sorted(list(set(str(it.get('year', '')) for it in data if it.get('year'))), reverse=True)
         types = sorted(list(set(str(it.get('type', '')) for it in data if it.get('type'))))
         filenames = sorted(list(set(str(it.get('filename', '')) for it in data if it.get('filename'))), reverse=True)
         return ([""] + years), ([""] + types), ([""] + filenames)
+    except:
         return ([""],), ([""],), ([""],)
 def refresh_p_choices(year_filter=None):
         res_fsmm = search_fsmm(selected_val, "", "", "")
         return gr.update(selected="fsmm_tab"), res_fsmm, ""
+    except:
         return gr.update(), "", ""
+# ==========================================
+# Main UI
+# ==========================================
 DASHBOARD_CSS = """
+    .drilldown-df table tr:hover { cursor: pointer !important; background-color: #f0f9ff !important; position: relative; }
+    .drilldown-df table tr:hover::after { content: "🖱️ 點擊跳轉查詢"; position: absolute; right: 10px; top: 50%; transform: translateY(-50%); font-size: 0.8em; color: #3b82f6; background: #fff; padding: 2px 6px; border-radius: 4px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); pointer-events: none; }
+    .drilldown-df table tr:hover td { color: #2563eb !important; font-weight: bold; }
 """
 with gr.Blocks(title="APEC 綜合查詢系統", css=DASHBOARD_CSS) as demo:
     gr.Markdown("# 🌐 APEC 綜合查詢系統")
     gr.Markdown("整�� APEC 提案項目與重要宣言內容的統一檢索平台。")
     with gr.Accordion("🛠️ 快速翻譯工具箱 (Translation Tool)", open=False):
         gr.Markdown("將英文內容貼在此處，可快速翻譯成繁體中文。若內容過長建議使用 Google 翻譯按鈕。")
         with gr.Row():
         with gr.Row():
             t_btn = gr.Button("🔄 執行翻譯 (Translate)", variant="secondary")
             t_link_html = gr.HTML()
         t_btn.click(translate_ui_action, inputs=[t_input], outputs=[t_output, t_link_html])
     with gr.Tabs() as tabs:
         with gr.Tab("📊 統計儀表板") as dash_tab:
         with gr.Tab("🍎 政策文件查詢", id="fsmm_tab") as fsmm_tab:
             gr.Markdown("### 步驟 1：設定篩選條件 (FSMM / 領袖宣言 / 路線圖)")
             with gr.Row():
+                f_query = gr.Textbox(label="🔍 關鍵字搜尋 (支援多詞組, 如: climate resilience)", placeholder="例如：resilience climate women...")
                 f_year = gr.Dropdown(label="📅 年份", choices=[""])
                 f_type = gr.Dropdown(label="📝 類型", choices=[""])
                 f_doc = gr.Dropdown(label="📄 特定檔案 (必選以查看全文)", choices=[""])
             gr.Markdown("---")
             f_out = gr.HTML(label="輸出區域")
             f_btn.click(search_fsmm, inputs=[f_query, f_year, f_type, f_doc], outputs=[f_out])
             f_full_btn.click(get_fsmm_full_text, inputs=[f_doc], outputs=[f_out])
             f_refresh_btn.click(refresh_fsmm_choices, outputs=[f_year, f_type, f_doc])
         with gr.Tab("📊 APEC 提案查詢", id="proposal_tab") as proposal_tab:
             with gr.Row():
+                p_query = gr.Textbox(label="關鍵字搜尋 (支援多詞組, 如: Thailand agriculture)", placeholder="例如：Thailand agriculture energy...")
                 p_year = gr.Dropdown(label="📅 年份", choices=[""])
                 p_econ = gr.Dropdown(label="Proposing Economy", choices=[""])
             p_out = gr.HTML()
             p_btn.click(search_proposals, inputs=[p_query, p_year, p_econ], outputs=[p_out])
             p_refresh_btn.click(refresh_p_choices, outputs=[p_year, p_econ])
             p_year.change(lambda y: refresh_p_choices(y)[1], inputs=[p_year], outputs=[p_econ])
             u_status = gr.Textbox(label="處理結果")
             u_btn.click(handle_unified_upload, inputs=[u_file], outputs=[u_status])
             d_ye_df.select(handle_drilldown, outputs=[tabs, f_out, p_out])
             d_ec_df.select(handle_drilldown, outputs=[tabs, f_out, p_out])
             d_qu_df.select(handle_drilldown, outputs=[tabs, f_out, p_out])
     def init_choices():
         try:
             fc = refresh_fsmm_choices()
             pc = refresh_p_choices()
             return fc + pc
+        except:
             return (gr.update(), gr.update(), gr.update(), gr.update(), gr.update())
     demo.load(init_choices, outputs=[f_year, f_type, f_doc, p_year, p_econ])