lianghsun commited on
Commit
9754d99
·
1 Parent(s): 681d5f0

Build w/ love

Browse files
Files changed (2) hide show
  1. requirements.txt +2 -1
  2. src/streamlit_app.py +162 -37
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  altair
2
  pandas
3
- streamlit
 
 
1
  altair
2
  pandas
3
+ streamlit
4
+ requests
src/streamlit_app.py CHANGED
@@ -1,40 +1,165 @@
1
- import altair as alt
2
- import numpy as np
3
  import pandas as pd
4
  import streamlit as st
5
 
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import requests
3
  import pandas as pd
4
  import streamlit as st
5
 
6
+ API_URL = "https://taic.moda.gov.tw/api/v1/dataset.search.export"
7
+
8
+ st.set_page_config(page_title="TAIC Pulse", layout="wide")
9
+
10
+ st.title("臺灣主權AI訓練語料庫 Explorer")
11
+ st.caption("⚡ 即時資料:本頁面會在啟動/手動刷新時從來源 API 抓取最新 JSON,並提供互動式篩選與檢視。(不會背景持續輪詢)")
12
+
13
+ # ---------------------------
14
+ # Sidebar
15
+ # ---------------------------
16
+ st.sidebar.header("資料設定")
17
+ timeout_sec = st.sidebar.slider("API timeout(秒)", 5, 60, 20)
18
+ st.sidebar.caption("提示:本工具不會持續打 API,只在首次載入或你按下刷新時抓一次。")
19
+
20
+ # ---------------------------
21
+ # Fetch + cache (one-time per session unless refreshed)
22
+ # ---------------------------
23
+
24
+
25
+ @st.cache_data(show_spinner=False)
26
+ def fetch_json_once(timeout: int):
27
+ r = requests.get(API_URL, timeout=timeout)
28
+ r.raise_for_status()
29
+ return r.json()
30
+
31
+
32
+ def load_data():
33
+ with st.spinner("從來源 API 抓取資料中..."):
34
+ data = fetch_json_once(timeout_sec)
35
+ fetched_at = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
36
+ return data, fetched_at
37
+
38
+
39
+ # Buttons
40
+ colA, colB, colC = st.columns([1, 1, 2])
41
+ with colA:
42
+ refresh = st.button("🔄 手動刷新(重新抓取)")
43
+ with colB:
44
+ clear_cache = st.button("🧹 清除快取")
45
+
46
+ if clear_cache:
47
+ st.cache_data.clear()
48
+ st.toast("已清除快取", icon="🧹")
49
+
50
+ # 用 session_state 記錄「這個 session 已載入過」
51
+ if "payload" not in st.session_state or refresh:
52
+ if refresh:
53
+ # 只清掉這個 function 的 cache(簡單作法是全清)
54
+ st.cache_data.clear()
55
+ payload, fetched_at = load_data()
56
+ st.session_state["payload"] = payload
57
+ st.session_state["fetched_at"] = fetched_at
58
+
59
+ data = st.session_state["payload"]
60
+ fetched_at = st.session_state["fetched_at"]
61
+
62
+ with colC:
63
+ st.metric("最後更新時間", fetched_at)
64
+
65
+ st.divider()
66
+
67
+ # ---------------------------
68
+ # Normalize JSON to dataframe
69
+ # ---------------------------
70
+ items = data if isinstance(data, list) else data.get("data", data)
71
+ df = pd.json_normalize(items)
72
+
73
+ # ---------------------------
74
+ # Dynamic filters
75
+ # ---------------------------
76
+ st.sidebar.header("篩選條件(選單)")
77
+
78
+ # 優先嘗試常見欄位
79
+ common_names = {"category", "theme", "publisher",
80
+ "organization", "org", "format", "license", "city"}
81
+ candidate_fields = [c for c in df.columns if c.lower() in common_names]
82
+
83
+ if not candidate_fields:
84
+ candidate_fields = st.sidebar.multiselect(
85
+ "選擇要做成下拉選單的欄位",
86
+ options=df.columns.tolist(),
87
+ default=df.columns[:2].tolist() if len(
88
+ df.columns) >= 2 else df.columns.tolist()
89
+ )
90
+
91
+ filters = {}
92
+ for field in candidate_fields:
93
+ values = sorted(
94
+ [v for v in df[field].dropna().astype(str).unique().tolist()])
95
+ if not values:
96
+ continue
97
+ choice = st.sidebar.selectbox(f"{field} 篩選", ["(全部)"] + values, index=0)
98
+ if choice != "(全部)":
99
+ filters[field] = choice
100
+
101
+ filtered = df.copy()
102
+ for k, v in filters.items():
103
+ filtered = filtered[filtered[k].astype(str) == v]
104
+
105
+ q = st.sidebar.text_input("全文關鍵字(contains)", "")
106
+ if q.strip():
107
+ mask = filtered.astype(str).apply(
108
+ lambda row: row.str.contains(q, case=False, na=False)
109
+ ).any(axis=1)
110
+ filtered = filtered[mask]
111
+
112
+ # ---------------------------
113
+ # Preview
114
+ # ---------------------------
115
+ st.subheader("資料預覽")
116
+ st.write(f"共 {len(filtered):,} 筆(原始 {len(df):,} 筆)")
117
+ st.dataframe(filtered, use_container_width=True)
118
+
119
+ # ---------------------------
120
+ # Download UX (prepare first, then download)
121
+ # ---------------------------
122
+ st.subheader("下載")
123
+
124
+ # 用 session_state 暫存已生成的檔案 bytes,避免重算
125
+ if "csv_bytes" not in st.session_state:
126
+ st.session_state["csv_bytes"] = None
127
+ st.session_state["csv_name"] = None
128
+
129
+ prep_col1, prep_col2 = st.columns([1, 2])
130
+
131
+ with prep_col1:
132
+ prepare = st.button("📦 準備下載 CSV(含進度)")
133
+
134
+ with prep_col2:
135
+ st.caption("下載按鈕會在「準備完成」後出現。")
136
+
137
+ if prepare:
138
+ # 進度條是「體感進度」:用幾個步驟讓 UX 更順
139
+ progress = st.progress(0)
140
+ with st.spinner("正在準備檔案..."):
141
+ progress.progress(10)
142
+ time.sleep(0.15)
143
+
144
+ progress.progress(35)
145
+ # 生成 CSV bytes(這步是主要成本)
146
+ csv_bytes = filtered.to_csv(index=False).encode("utf-8-sig")
147
+ time.sleep(0.1)
148
+
149
+ progress.progress(75)
150
+ # 你也可以在這裡加上壓縮、欄位整理等
151
+ time.sleep(0.1)
152
+
153
+ progress.progress(100)
154
+
155
+ st.session_state["csv_bytes"] = csv_bytes
156
+ st.session_state["csv_name"] = f"taic_pulse_filtered_{time.strftime('%Y%m%d_%H%M%S')}.csv"
157
+ st.success("✅ 檔案已準備完成,請使用下方按鈕下載。")
158
+
159
+ if st.session_state["csv_bytes"] is not None:
160
+ st.download_button(
161
+ "⬇️ 下載篩選後 CSV",
162
+ data=st.session_state["csv_bytes"],
163
+ file_name=st.session_state["csv_name"],
164
+ mime="text/csv",
165
+ )