cormort commited on
Commit
02b318d
·
verified ·
1 Parent(s): 6a40f18

Upload 5 files

Browse files
Files changed (5) hide show
  1. README.md +28 -6
  2. app.py +546 -0
  3. fsmm_data.json +0 -0
  4. proposals_data.json +0 -0
  5. requirements.txt +7 -0
README.md CHANGED
@@ -1,12 +1,34 @@
1
  ---
2
- title: Apec Query System
3
- emoji: 🔥
4
- colorFrom: indigo
5
- colorTo: gray
6
  sdk: gradio
7
- sdk_version: 6.2.0
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: APEC 綜合查詢系統
3
+ emoji: 🌐
4
+ colorFrom: blue
5
+ colorTo: green
6
  sdk: gradio
7
+ sdk_version: 5.9.1
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
  ---
12
 
13
+ # 🌐 APEC 綜合查詢系統
14
+
15
+ 整合 APEC 提案項目與 FSMM 宣言內容的統一檢索平台。
16
+
17
+ ## 功能特色
18
+
19
+ - **📊 統計儀表板**:即時顯示提案統計、經濟體分佈、熱門搜尋關鍵字
20
+ - **🍎 FSMM 宣言查詢**:搜尋歷年 FSMM 部長級會議宣言與聲明
21
+ - **📋 APEC 提案查詢**:依年份、經濟體、關鍵字搜尋 APEC 提案
22
+ - **☁️ 上傳更新**:支援批次上傳 PDF/DOCX/HTML 文件自動解析
23
+
24
+ ## 技術架構
25
+
26
+ - **前端**: Gradio
27
+ - **資料處理**: pandas, pdfminer, python-docx
28
+ - **視覺化**: Plotly
29
+
30
+ ## 使用方式
31
+
32
+ 1. 點擊「統計儀表板」查看整體數據概覽
33
+ 2. 點擊儀表板中的年份或經濟體可直接跳轉查詢
34
+ 3. 使用搜尋功能輸入關鍵字進行全文檢索
app.py ADDED
@@ -0,0 +1,546 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import re
4
+ import gradio as gr
5
+ import pandas as pd
6
+ import plotly.express as px
7
+ import plotly.graph_objects as go
8
+ from datetime import datetime
9
+ import threading
10
+ from pdfminer.high_level import extract_text
11
+ from docx import Document
12
+ from bs4 import BeautifulSoup
13
+
14
+ # Global lock for file access to prevent race conditions during log/load
15
+ data_lock = threading.Lock()
16
+
17
+ # Config
18
+ DATA_FSMM = "fsmm_data.json"
19
+ DATA_PROPOSALS = "proposals_data.json"
20
+ DATA_QUERIES = "user_queries.json"
21
+
22
+ # --- FSMM Extraction Logic ---
23
+ def extract_from_pdf(file_path):
24
+ try:
25
+ text = extract_text(file_path)
26
+ return text
27
+ except Exception as e:
28
+ print(f"Error reading PDF {file_path}: {e}")
29
+ return ""
30
+
31
+ def extract_from_docx(file_path):
32
+ try:
33
+ doc = Document(file_path)
34
+ return "\n".join([para.text for para in doc.paragraphs])
35
+ except Exception as e:
36
+ print(f"Error reading DOCX {file_path}: {e}")
37
+ return ""
38
+
39
+ def parse_fsmm_filename(filename):
40
+ match = re.search(r'(\d+)_fsmm_(\w+)', filename)
41
+ if match:
42
+ year_short = match.group(1)
43
+ type_code = match.group(2)
44
+ year = int(year_short) + 2000
45
+
46
+ type_map = {
47
+ 'jms': 'Joint Ministerial Statement (聯合部長聲明)',
48
+ 'stmt': 'Statement (聲明)',
49
+ 'declaration': 'Declaration (宣言)'
50
+ }
51
+ type_name = type_map.get(type_code.lower(), type_code.upper())
52
+ return year, type_name
53
+ return None, None
54
+
55
+ # --- Proposal Extraction Logic ---
56
+ def parse_proposal(file_path):
57
+ try:
58
+ with open(file_path, 'r', encoding='utf-8') as f:
59
+ html_content = f.read()
60
+ soup = BeautifulSoup(html_content, 'html.parser')
61
+ data = {'file_id': os.path.basename(file_path).replace('.html', '')}
62
+ rows = soup.find_all('tr')
63
+ for row in rows:
64
+ label_td = row.find('td', class_='ms-formlabel')
65
+ value_td = row.find('td', class_='ms-formbody')
66
+ if label_td and value_td:
67
+ label = label_td.get_text(strip=True).replace(':', '')
68
+ value = value_td.get_text(separator=' ', strip=True)
69
+ value = re.sub(r'\s+', ' ', value)
70
+ data[label] = value
71
+ return data
72
+ except Exception as e:
73
+ print(f"Error parsing proposal {file_path}: {e}")
74
+ return None
75
+
76
+ # --- Data Management ---
77
+ def load_json(filepath):
78
+ with data_lock:
79
+ if not os.path.exists(filepath):
80
+ return []
81
+ try:
82
+ with open(filepath, 'r', encoding='utf-8') as f:
83
+ return json.load(f)
84
+ except:
85
+ return []
86
+
87
+ def save_json(filepath, data):
88
+ with data_lock:
89
+ with open(filepath, 'w', encoding='utf-8') as f:
90
+ json.dump(data, f, ensure_ascii=False, indent=2)
91
+
92
+ def log_query(query):
93
+ if not query or not str(query).strip():
94
+ return
95
+ logs = load_json(DATA_QUERIES)
96
+ now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
97
+ logs.append({
98
+ "timestamp": now,
99
+ "query": str(query).strip()
100
+ })
101
+ save_json(DATA_QUERIES, logs)
102
+
103
+ # --- Upload Logic ---
104
+ def handle_unified_upload(file_objs):
105
+ if not file_objs:
106
+ return "請選擇檔案上傳。"
107
+
108
+ # Gradio might return a single file or a list depending on configuration
109
+ if not isinstance(file_objs, list):
110
+ file_objs = [file_objs]
111
+
112
+ results = []
113
+
114
+ for file_obj in file_objs:
115
+ filename = os.path.basename(file_obj.name)
116
+
117
+ if filename.startswith('Proposal_') and filename.endswith('.html'):
118
+ data = parse_proposal(file_obj.name)
119
+ if not data:
120
+ results.append(f"❌ {filename}: 提案解析失敗。")
121
+ continue
122
+
123
+ proposals = load_json(DATA_PROPOSALS)
124
+ proposals = [p for p in proposals if p.get('file_id') != data.get('file_id')]
125
+ proposals.append(data)
126
+ save_json(DATA_PROPOSALS, proposals)
127
+ results.append(f"✅ {filename}: 已加入提案 {data.get('Project Title', filename)}")
128
+
129
+ elif '_fsmm_' in filename:
130
+ year, doc_type = parse_fsmm_filename(filename)
131
+ if not year:
132
+ results.append(f"❌ {filename}: 無法解析 FSMM 檔名格式。")
133
+ continue
134
+
135
+ if filename.endswith('.pdf'): content = extract_from_pdf(file_obj.name)
136
+ elif filename.endswith('.docx'): content = extract_from_docx(file_obj.name)
137
+ else:
138
+ results.append(f"❌ {filename}: 僅支援 .pdf 或 .docx")
139
+ continue
140
+
141
+ if not content:
142
+ results.append(f"❌ {filename}: 內容提取失敗。")
143
+ continue
144
+
145
+ # --- Improved Paragraph Re-assembly ---
146
+ # Paragraphs are numbered like "1.", "2.", "3." etc.
147
+ # Each paragraph runs from one number to the next.
148
+
149
+ # First, normalize content by replacing multiple newlines with single space
150
+ lines = content.split('\n')
151
+ full_text = ' '.join([l.strip() for l in lines if l.strip()])
152
+
153
+ # Split by numbered paragraph pattern using re.split with capturing group
154
+ # This splits text like: "1. Text here 2. More text" into ['', '1', '. Text here ', '2', '. More text']
155
+ parts = re.split(r'\b(\d+)\.\s', full_text)
156
+
157
+ composed_paras = []
158
+ i = 0
159
+ # First part (before first number) might be preamble
160
+ if parts[0].strip() and len(parts[0].strip()) > 20:
161
+ composed_paras.append(parts[0].strip())
162
+
163
+ # Process numbered paragraphs: parts[1] is number, parts[2] is content, etc.
164
+ i = 1
165
+ while i < len(parts) - 1:
166
+ if parts[i].isdigit():
167
+ para_num = parts[i]
168
+ para_content = parts[i + 1].strip() if i + 1 < len(parts) else ""
169
+ # The content should NOT start with the number again
170
+ # Just combine: "1. content"
171
+ full_para = f"{para_num}. {para_content}"
172
+ composed_paras.append(full_para)
173
+ i += 2
174
+ else:
175
+ i += 1
176
+
177
+ new_entries = []
178
+ for idx, para in enumerate(composed_paras):
179
+ if len(para) < 10: continue
180
+ new_entries.append({
181
+ 'year': year,
182
+ 'type': doc_type,
183
+ 'content': para,
184
+ 'paragraph_index': idx + 1,
185
+ 'filename': filename
186
+ })
187
+
188
+ fsmm_data = load_json(DATA_FSMM)
189
+ fsmm_data = [item for item in fsmm_data if item['filename'] != filename]
190
+ fsmm_data.extend(new_entries)
191
+ save_json(DATA_FSMM, fsmm_data)
192
+ results.append(f"✅ {filename}: 整合了 {len(new_entries)} 個完整段落。")
193
+ else:
194
+ results.append(f"❓ {filename}: 不支援的檔案格式或命名。")
195
+
196
+ return "\n".join(results)
197
+
198
+ # --- Search Logic ---
199
+ def search_proposals(query, year, economy):
200
+ log_query(f"Proposals Q:{query}|Y:{year}|E:{economy}")
201
+ data = load_json(DATA_PROPOSALS)
202
+
203
+ filtered = []
204
+ q = query.lower() if query else ""
205
+ y = str(year) if year else ""
206
+ e = str(economy) if economy else ""
207
+
208
+ for item in data:
209
+ # Keyword search
210
+ if q:
211
+ found_q = False
212
+ for k, v in item.items():
213
+ if q in str(v).lower():
214
+ found_q = True
215
+ break
216
+ if not found_q: continue
217
+
218
+ # Year filter
219
+ if y and str(item.get('Project Year', '')).strip() != y:
220
+ continue
221
+
222
+ # Economy filter
223
+ if e and str(item.get('Proposing Economy(ies)', '')).strip() != e:
224
+ continue
225
+
226
+ filtered.append(item)
227
+
228
+ if not filtered: return "找不到符合的提案。"
229
+
230
+ html = ""
231
+ for p in filtered[:20]:
232
+ html += f"""
233
+ <div style="border: 1px solid #cbd5e1; padding: 15px; border-radius: 8px; margin-bottom: 15px; background: #fff; box-shadow: 0 1px 3px rgba(0,0,0,0.1);">
234
+ <div style="font-weight: bold; color: #1e293b; margin-bottom: 8px; font-size: 1.1em;">{p.get('Project Title', '無標題')}</div>
235
+ <div style="display: flex; gap: 8px; flex-wrap: wrap; font-size: 0.85em; margin-bottom: 10px;">
236
+ <span style="background: #e2e8f0; padding: 2px 8px; border-radius: 4px;">{p.get('Project No.', '無編號')}</span>
237
+ <span style="background: #f1f5f9; padding: 2px 8px; border-radius: 4px;">{p.get('Proposing Economy(ies)', '未知經濟體')}</span>
238
+ </div>
239
+ <details style="font-size: 0.9em; color: #475569;">
240
+ <summary style="cursor: pointer; color: #2563eb;">詳細資訊</summary>
241
+ <div style="margin-top: 10px; display: grid; grid-template-columns: 1fr 2fr; gap:5px;">
242
+ {"".join([f"<b>{k}:</b> <div>{v}</div>" for k, v in p.items() if k not in ['Project Title', 'Project No.']])}
243
+ </div>
244
+ </details>
245
+ </div>
246
+ """
247
+ return html
248
+
249
+ def search_fsmm(query, year, doc_type, filename_filter):
250
+ log_query(f"FSMM {query}")
251
+ data = load_json(DATA_FSMM)
252
+ filtered = []
253
+ q = query.lower() if query else ""
254
+
255
+ for item in data:
256
+ if q and q not in item['content'].lower(): continue
257
+ if year and str(item['year']) != year: continue
258
+ if doc_type and item['type'] != doc_type: continue
259
+ if filename_filter and item['filename'] != filename_filter: continue
260
+ filtered.append(item)
261
+
262
+ if not filtered: return "找不到符合的宣言段���。"
263
+
264
+ # Sort by paragraph index if a specific file is selected, otherwise by year
265
+ if filename_filter:
266
+ filtered.sort(key=lambda x: x['paragraph_index'])
267
+ else:
268
+ filtered.sort(key=lambda x: (x['year'], x['type'], x['paragraph_index']), reverse=True)
269
+
270
+ html = ""
271
+ for item in filtered[:100]:
272
+ citation = f"(APEC FSMM, {item['year']}, {item['type']}, Para {item['paragraph_index']})"
273
+ html += f"""
274
+ <div style="border: 1px solid #e2e8f0; padding: 15px; border-radius: 8px; margin-bottom: 15px; background: white; box-shadow: 0 1px 3px rgba(0,0,0,0.1);">
275
+ <div style="display: flex; gap: 8px; margin-bottom: 8px;">
276
+ <span style="background: #e0e7ff; color: #4338ca; padding: 2px 10px; border-radius: 15px; font-size: 0.8em; font-weight: bold;">{item['year']}</span>
277
+ <span style="background: #fef3c7; color: #92400e; padding: 2px 10px; border-radius: 15px; font-size: 0.8em; font-weight: bold;">{item['type']}</span>
278
+ <span style="background: #f1f5f9; color: #475569; padding: 2px 10px; border-radius: 15px; font-size: 0.8em; font-weight: bold;">Para {item['paragraph_index']}</span>
279
+ </div>
280
+ <p style="margin-bottom: 12px; line-height: 1.6;">{item['content']}</p>
281
+ <div style="text-align: right; font-size: 0.85em; color: #64748b; margin-top: 5px;">
282
+ 檔案: {item['filename']} | <code style="background: #f4f4f5; padding: 2px 6px; border-radius: 4px;">{citation}</code>
283
+ </div>
284
+ </div>
285
+ """
286
+ return html
287
+
288
+ def get_fsmm_full_text(filename):
289
+ if not filename: return "請先選擇一個檔案。"
290
+ data = load_json(DATA_FSMM)
291
+ doc_paras = [item for item in data if item['filename'] == filename]
292
+ doc_paras.sort(key=lambda x: x['paragraph_index'])
293
+
294
+ if not doc_paras: return "找不到該檔案的文件內容。"
295
+
296
+ full_text = f"<h3>{filename} 全文內容</h3><hr>"
297
+ for p in doc_paras:
298
+ full_text += f"<p style='margin-bottom: 10px; line-height: 1.8;'>[{p['paragraph_index']}] {p['content']}</p>"
299
+
300
+ return full_text
301
+
302
+ # --- Dashboard Logic ---
303
+ def get_dashboard_stats():
304
+ # 1. Proposals Stats
305
+ proposals = load_json(DATA_PROPOSALS)
306
+ fsmm = load_json(DATA_FSMM)
307
+ queries = load_json(DATA_QUERIES)
308
+
309
+ total_proposals = len(proposals)
310
+ # FSMM is stored as paragraphs, we need to count unique filenames
311
+ unique_fsmm_files = len(set(p['filename'] for p in fsmm))
312
+ total_fsmm_paras = len(fsmm)
313
+
314
+ summary_html = f"""
315
+ <div style="display: flex; gap: 20px; justify-content: space-around; margin-bottom: 20px;">
316
+ <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; border-radius: 12px; flex: 1; text-align: center; box-shadow: 0 4px 6px rgba(0,0,0,0.1);">
317
+ <div style="font-size: 0.9em; opacity: 0.9;">總提案數量</div>
318
+ <div style="font-size: 2.5em; font-weight: bold;">{total_proposals}</div>
319
+ </div>
320
+ <div style="background: linear-gradient(135deg, #2af598 0%, #009efd 100%); color: white; padding: 20px; border-radius: 12px; flex: 1; text-align: center; box-shadow: 0 4px 6px rgba(0,0,0,0.1);">
321
+ <div style="font-size: 0.9em; opacity: 0.9;">FSMM 文件數</div>
322
+ <div style="font-size: 2.5em; font-weight: bold;">{unique_fsmm_files}</div>
323
+ </div>
324
+ <div style="background: linear-gradient(135deg, #ff9a9e 0%, #fecfef 100%); color: #4338ca; padding: 20px; border-radius: 12px; flex: 1; text-align: center; box-shadow: 0 4px 6px rgba(0,0,0,0.1);">
325
+ <div style="font-size: 0.9em; opacity: 0.9;">FSMM 總段落</div>
326
+ <div style="font-size: 2.5em; font-weight: bold;">{total_fsmm_paras}</div>
327
+ </div>
328
+ </div>
329
+ """
330
+
331
+ # 2. Proposals by Year Chart
332
+ df_p = pd.DataFrame(proposals)
333
+ fig_p_year = go.Figure()
334
+ if not df_p.empty and 'Project Year' in df_p.columns:
335
+ year_counts = df_p['Project Year'].value_counts().reset_index()
336
+ year_counts.columns = ['年份', '提案數量']
337
+ year_counts['年份'] = year_counts['年份'].astype(str)
338
+ year_counts = year_counts.sort_values('年份')
339
+ fig_p_year = px.bar(year_counts, x='年份', y='提案數量', title="APEC 提案歷年數量分佈",
340
+ labels={'提案數量': '數量', '年份': '年份'}, color_discrete_sequence=['#6366f1'])
341
+ fig_p_year.update_layout(template="plotly_white")
342
+
343
+ # 3. Proposals by Economy Pie Chart
344
+ fig_p_econ = go.Figure()
345
+ if not df_p.empty and 'Proposing Economy(ies)' in df_p.columns:
346
+ econ_counts = df_p['Proposing Economy(ies)'].value_counts().reset_index()
347
+ econ_counts.columns = ['經濟體', '提案數量']
348
+ # Take top 10 and group others
349
+ if len(econ_counts) > 10:
350
+ top_10 = econ_counts.head(10)
351
+ others_sum = econ_counts.iloc[10:]['提案數量'].sum()
352
+ others_df = pd.DataFrame([{'經濟體': '其他', '提案數量': others_sum}])
353
+ econ_counts = pd.concat([top_10, others_df])
354
+
355
+ fig_p_econ = px.pie(econ_counts, values='提案數量', names='經濟體', title="提案主辦經濟體分佈",
356
+ color_discrete_sequence=px.colors.sequential.RdBu)
357
+ fig_p_econ.update_layout(template="plotly_white")
358
+
359
+ # 4. User Query Stats
360
+ fig_queries = go.Figure()
361
+ if queries:
362
+ df_q = pd.DataFrame(queries)
363
+ q_counts = df_q['query'].value_counts().head(10).reset_index()
364
+ q_counts.columns = ['關鍵字', '搜尋次數']
365
+ fig_queries = px.bar(q_counts, x='搜尋次數', y='關鍵字', orientation='h', title="熱門搜尋關鍵字 TOP 10",
366
+ labels={'搜尋次數': '次數', '關鍵字': '關鍵字'}, color='搜尋次數',
367
+ color_continuous_scale='Viridis')
368
+ fig_queries.update_layout(template="plotly_white", yaxis={'categoryorder':'total ascending'})
369
+
370
+ # 5. Dataframes for Drill-down
371
+ df_ye_stats = pd.DataFrame(year_counts) if not df_p.empty and 'Project Year' in df_p.columns else pd.DataFrame()
372
+ df_ec_stats = pd.DataFrame(econ_counts) if not df_p.empty and 'Proposing Economy(ies)' in df_p.columns else pd.DataFrame()
373
+ df_qu_stats = pd.DataFrame(q_counts) if queries else pd.DataFrame()
374
+
375
+ return summary_html, fig_p_year, fig_p_econ, fig_queries, df_ye_stats, df_ec_stats, df_qu_stats
376
+
377
+ # --- UI Helpers ---
378
+ def get_p_choices_raw(year_filter=None):
379
+ data = load_json(DATA_PROPOSALS)
380
+ years = sorted(list(set(str(it.get('Project Year', '')).strip() for it in data if it.get('Project Year'))), reverse=True)
381
+ if year_filter:
382
+ econs = sorted(list(set(str(it.get('Proposing Economy(ies)', '')).strip()
383
+ for it in data if str(it.get('Project Year', '')).strip() == str(year_filter))))
384
+ else:
385
+ econs = sorted(list(set(str(it.get('Proposing Economy(ies)', '')).strip() for it in data if it.get('Proposing Economy(ies)'))))
386
+ return ([""] + years), ([""] + econs)
387
+
388
+ def get_fsmm_choices_raw():
389
+ data = load_json(DATA_FSMM)
390
+ years = sorted(list(set(str(it['year']) for it in data)), reverse=True)
391
+ types = sorted(list(set(it['type'] for it in data)))
392
+ filenames = sorted(list(set(it['filename'] for it in data)), reverse=True)
393
+ return ([""] + years), ([""] + types), ([""] + filenames)
394
+
395
+ def refresh_p_choices(year_filter=None):
396
+ years, econs = get_p_choices_raw(year_filter)
397
+ return gr.update(choices=years), gr.update(choices=econs)
398
+
399
+ def refresh_fsmm_choices():
400
+ years, types, files = get_fsmm_choices_raw()
401
+ return gr.update(choices=years), gr.update(choices=types), gr.update(choices=files)
402
+
403
+ def handle_drilldown(evt: gr.SelectData):
404
+ try:
405
+ selected_val = str(evt.value).strip()
406
+ print(f"Drill-down triggered: {selected_val}")
407
+
408
+ # Check if it's a Year (4 digits)
409
+ if re.match(r'^\d{4}$', selected_val):
410
+ print("Detected: Year -> FSMM search")
411
+ res_fsmm = search_fsmm("", selected_val, "", "")
412
+ return gr.update(selected="fsmm_tab"), res_fsmm, ""
413
+
414
+ # Check if it's an Economy or keyword (fallback)
415
+ economies = ["China", "Chinese Taipei", "Australia", "Thailand", "United States", "Korea", "Japan", "Malaysia", "Viet Nam", "Philippines", "Others", "其他"]
416
+ if selected_val in economies:
417
+ print("Detected: Economy -> Proposals search")
418
+ res_p = search_proposals("", "", selected_val)
419
+ return gr.update(selected="proposal_tab"), "", res_p
420
+
421
+ # Fallback: Keyword search on FSMM
422
+ print("Detected: Keyword -> FSMM search")
423
+ res_fsmm = search_fsmm(selected_val, "", "", "")
424
+ return gr.update(selected="fsmm_tab"), res_fsmm, ""
425
+ except Exception as e:
426
+ print(f"Error in handle_drilldown: {e}")
427
+ import traceback
428
+ traceback.print_exc()
429
+ return gr.update(), "", ""
430
+
431
+ # --- UI UI UI ---
432
+ DASHBOARD_CSS = """
433
+ .drilldown-df table tr:hover {
434
+ cursor: pointer !important;
435
+ background-color: #f0f9ff !important;
436
+ position: relative;
437
+ }
438
+ .drilldown-df table tr:hover::after {
439
+ content: "🖱️ 點擊跳轉查詢";
440
+ position: absolute;
441
+ right: 10px;
442
+ top: 50%;
443
+ transform: translateY(-50%);
444
+ font-size: 0.8em;
445
+ color: #3b82f6;
446
+ background: #fff;
447
+ padding: 2px 6px;
448
+ border-radius: 4px;
449
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
450
+ pointer-events: none;
451
+ }
452
+ .drilldown-df table tr:hover td {
453
+ color: #2563eb !important;
454
+ font-weight: bold;
455
+ }
456
+ """
457
+
458
+ with gr.Blocks(title="APEC 綜合查詢系統") as demo:
459
+ gr.Markdown("# 🌐 APEC 綜合查詢系統")
460
+ gr.Markdown("整合 APEC 提案項目與 FSMM 宣言宣言內容的統一檢索平台。")
461
+
462
+ with gr.Tabs() as tabs:
463
+ with gr.Tab("📊 統計儀表板") as dash_tab:
464
+ gr.Markdown("### 📈 案件統計與搜尋熱度分析")
465
+ d_refresh_btn = gr.Button("🔄 重新整理數據", variant="primary")
466
+ d_summary = gr.HTML()
467
+ with gr.Row():
468
+ with gr.Column():
469
+ d_p_year = gr.Plot(label="歷年分佈")
470
+ d_ye_df = gr.Dataframe(label="📅 年度排行榜 (點擊可下鑽查詢)", interactive=False, elem_classes=["drilldown-df"])
471
+ with gr.Column():
472
+ d_p_econ = gr.Plot(label="經濟體分佈")
473
+ d_ec_df = gr.Dataframe(label="🌍 經濟體排行榜 (點擊可下鑽查詢)", interactive=False, elem_classes=["drilldown-df"])
474
+ with gr.Row():
475
+ with gr.Column():
476
+ d_queries = gr.Plot(label="熱門關鍵字")
477
+ d_qu_df = gr.Dataframe(label="🔍 熱門關鍵字 (點擊可下鑽查詢)", interactive=False, elem_classes=["drilldown-df"])
478
+
479
+ dash_tab.select(get_dashboard_stats, outputs=[d_summary, d_p_year, d_p_econ, d_queries, d_ye_df, d_ec_df, d_qu_df])
480
+ d_refresh_btn.click(get_dashboard_stats, outputs=[d_summary, d_p_year, d_p_econ, d_queries, d_ye_df, d_ec_df, d_qu_df])
481
+
482
+ with gr.Tab("🍎 FSMM 宣言查詢", id="fsmm_tab") as fsmm_tab:
483
+ gr.Markdown("### 步驟 1:設定篩選條件 (可選擇年份、類型或特定檔案)")
484
+ with gr.Row():
485
+ f_query = gr.Textbox(label="🔍 關鍵字搜尋", placeholder="例如:resilience, climate...")
486
+ f_year = gr.Dropdown(label="📅 年份", choices=[""], allow_custom_value=True)
487
+ f_type = gr.Dropdown(label="📝 類型", choices=[""], allow_custom_value=True)
488
+ f_doc = gr.Dropdown(label="📄 特定檔案 (必選以查看全文)", choices=[""], allow_custom_value=True)
489
+
490
+ with gr.Row():
491
+ f_btn = gr.Button("🔎 查詢相關段落", variant="primary")
492
+ f_full_btn = gr.Button("📖 查看所選檔案全文")
493
+ f_refresh_btn = gr.Button("🔄 重新整理選單")
494
+
495
+ gr.Markdown("---")
496
+ f_out = gr.HTML(label="輸出區域")
497
+
498
+ # Events
499
+ f_btn.click(search_fsmm, inputs=[f_query, f_year, f_type, f_doc], outputs=[f_out])
500
+ f_full_btn.click(get_fsmm_full_text, inputs=[f_doc], outputs=[f_out])
501
+ f_refresh_btn.click(refresh_fsmm_choices, outputs=[f_year, f_type, f_doc])
502
+ fsmm_tab.select(refresh_fsmm_choices, outputs=[f_year, f_type, f_doc])
503
+
504
+ with gr.Tab("📊 APEC 提案查詢", id="proposal_tab") as proposal_tab:
505
+ with gr.Row():
506
+ p_query = gr.Textbox(label="關鍵字搜尋所有欄位", placeholder="例如:Thailand, agriculture, energy...")
507
+ p_year = gr.Dropdown(label="📅 年份", choices=[""], allow_custom_value=True)
508
+ p_econ = gr.Dropdown(label="Proposing Economy", choices=[""], allow_custom_value=True)
509
+
510
+ with gr.Row():
511
+ p_btn = gr.Button("搜尋提案", variant="primary")
512
+ p_refresh_btn = gr.Button("🔄 重新整理選單")
513
+
514
+ p_out = gr.HTML()
515
+
516
+ # Events
517
+ p_btn.click(search_proposals, inputs=[p_query, p_year, p_econ], outputs=[p_out])
518
+ p_refresh_btn.click(refresh_p_choices, outputs=[p_year, p_econ])
519
+ p_year.change(lambda y: refresh_p_choices(y)[1], inputs=[p_year], outputs=[p_econ])
520
+ proposal_tab.select(refresh_p_choices, outputs=[p_year, p_econ])
521
+
522
+ with gr.Tab("☁️ 上傳更新"):
523
+ gr.Markdown("### 上傳文件自動更新資料庫")
524
+ gr.Markdown("- **提案 (Proposals)**: 上傳 `Proposal_*.html` 檔案。")
525
+ gr.Markdown("- **宣言 (FSMM)**: 上傳 `YY_fsmm_*.pdf` 或 `.docx` 檔案。")
526
+ u_file = gr.File(label="拖曳或選擇多個檔案", file_count="multiple")
527
+ u_btn = gr.Button("上傳解析")
528
+ u_status = gr.Textbox(label="處理結果")
529
+ u_btn.click(handle_unified_upload, inputs=[u_file], outputs=[u_status])
530
+
531
+ # Drill-down Events: outputs=[tabs, f_out, p_out]
532
+ d_ye_df.select(handle_drilldown, outputs=[tabs, f_out, p_out])
533
+ d_ec_df.select(handle_drilldown, outputs=[tabs, f_out, p_out])
534
+ d_qu_df.select(handle_drilldown, outputs=[tabs, f_out, p_out])
535
+
536
+ # Initial load of choices for all tabs and dashboard
537
+ def init_all():
538
+ fc = refresh_fsmm_choices()
539
+ pc = refresh_p_choices()
540
+ db = get_dashboard_stats()
541
+ return fc + pc + db
542
+
543
+ demo.load(init_all, outputs=[f_year, f_type, f_doc, p_year, p_econ, d_summary, d_p_year, d_p_econ, d_queries, d_ye_df, d_ec_df, d_qu_df])
544
+
545
+ if __name__ == "__main__":
546
+ demo.launch(css=DASHBOARD_CSS)
fsmm_data.json ADDED
The diff for this file is too large to render. See raw diff
 
proposals_data.json ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio
2
+ pdfminer.six
3
+ python-docx
4
+ pandas
5
+ beautifulsoup4
6
+ plotly
7
+