Kung-Hsun commited on
Commit
1b27451
·
verified ·
1 Parent(s): 653d65b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -23
app.py CHANGED
@@ -17,27 +17,17 @@ def letters_to_index_zero_based(letter: str) -> int:
17
  TARGET_INDICES = [letters_to_index_zero_based(L) for L in EXCEL_LETTERS]
18
 
19
  def get_lower_name(file_input: Union[str, os.PathLike, io.BytesIO, bytes, object]) -> str:
20
- """
21
- 取得副檔名判斷用的檔名(若是路徑取 basename;若是物件則取 .name,否則空字串)
22
- """
23
  if isinstance(file_input, (str, os.PathLike)):
24
  return os.path.basename(str(file_input)).lower()
25
- # Gradio 某些情況會傳回 NamedString(str 子類),上一行已涵蓋
26
  name_attr = getattr(file_input, "name", None)
27
  if isinstance(name_attr, (str, os.PathLike)):
28
  return os.path.basename(str(name_attr)).lower()
29
  return ""
30
 
31
  def load_dataframe(file_input) -> pd.DataFrame:
32
- """
33
- 同時支援:
34
- - 路徑字串(Gradio 預設)
35
- - 檔案物件(具 .read())
36
- - Bytes(較少見)
37
- """
38
  lower_name = get_lower_name(file_input)
39
 
40
- # 1) 若是路徑(含 NamedString),直接交給 pandas
41
  if isinstance(file_input, (str, os.PathLike)):
42
  path = str(file_input)
43
  if lower_name.endswith((".xlsx", ".xls")):
@@ -48,7 +38,6 @@ def load_dataframe(file_input) -> pd.DataFrame:
48
  except Exception:
49
  return pd.read_csv(path)
50
  else:
51
- # 不確定副檔名時,先嘗試 Excel 再 CSV
52
  try:
53
  return pd.read_excel(path, engine="openpyxl")
54
  except Exception:
@@ -57,7 +46,7 @@ def load_dataframe(file_input) -> pd.DataFrame:
57
  except Exception:
58
  return pd.read_csv(path)
59
 
60
- # 2) 若是檔案物件(具 .read)
61
  if hasattr(file_input, "read"):
62
  raw = file_input.read()
63
  bio = io.BytesIO(raw)
@@ -72,7 +61,6 @@ def load_dataframe(file_input) -> pd.DataFrame:
72
  bio.seek(0)
73
  return pd.read_csv(bio)
74
  else:
75
- # 嘗試 Excel -> CSV
76
  try:
77
  bio.seek(0)
78
  return pd.read_excel(bio, engine="openpyxl")
@@ -84,10 +72,9 @@ def load_dataframe(file_input) -> pd.DataFrame:
84
  bio.seek(0)
85
  return pd.read_csv(bio)
86
 
87
- # 3) 其他類型(例如 bytes
88
  if isinstance(file_input, (bytes, bytearray)):
89
  bio = io.BytesIO(file_input)
90
- # 不知道副檔名時,比照上
91
  try:
92
  bio.seek(0)
93
  return pd.read_excel(bio, engine="openpyxl")
@@ -117,18 +104,70 @@ def extract_and_rename(df: pd.DataFrame) -> pd.DataFrame:
117
  out.columns = name_map
118
  return out
119
 
120
- with gr.Blocks(title="Excel/CSV 指定欄位擷取器") as demo:
121
- gr.Markdown("### 指定欄位擷取(A,B,K,L,M,V,W,X,Y)→ 重新命名data,time,⊿Ptop,⊿Pmid,⊿Pbot,H2%,CO%,CO2%,CH4%")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
- # 指定 type="filepath" 讓輸入穩定為路徑字串(NamedString)
124
  inp = gr.File(label="上傳 .xlsx 或 .csv 檔案", file_types=[".xlsx", ".csv"], type="filepath")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  run_btn = gr.Button("開始處理", variant="primary")
126
 
127
  file_out = gr.File(label="下載處理後的 Excel", visible=False)
128
  msg = gr.Markdown()
129
  preview = gr.Dataframe(label="預覽(前 20 列)", wrap=True)
130
 
131
- def run_pipeline(file_path_str):
132
  if not file_path_str:
133
  return gr.update(visible=False), "請先上傳檔案。", pd.DataFrame()
134
 
@@ -138,13 +177,54 @@ with gr.Blocks(title="Excel/CSV 指定欄位擷取器") as demo:
138
  except Exception as e:
139
  return gr.update(visible=False), f"處理失敗:{e}", pd.DataFrame()
140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  ts = datetime.now().strftime("%Y%m%d_%H%M%S")
142
  out_path = f"/tmp/extracted_columns_{ts}.xlsx"
143
- out.to_excel(out_path, index=False, engine="openpyxl")
 
 
 
 
 
 
 
 
 
144
 
145
- return gr.update(value=out_path, visible=True), "完成!下方預覽、右側可下載 Excel。", out.head(20)
146
 
147
- run_btn.click(run_pipeline, inputs=[inp], outputs=[file_out, msg, preview])
 
 
 
 
148
 
149
  if __name__ == "__main__":
150
  demo.launch()
 
17
  TARGET_INDICES = [letters_to_index_zero_based(L) for L in EXCEL_LETTERS]
18
 
19
  def get_lower_name(file_input: Union[str, os.PathLike, io.BytesIO, bytes, object]) -> str:
 
 
 
20
  if isinstance(file_input, (str, os.PathLike)):
21
  return os.path.basename(str(file_input)).lower()
 
22
  name_attr = getattr(file_input, "name", None)
23
  if isinstance(name_attr, (str, os.PathLike)):
24
  return os.path.basename(str(name_attr)).lower()
25
  return ""
26
 
27
  def load_dataframe(file_input) -> pd.DataFrame:
 
 
 
 
 
 
28
  lower_name = get_lower_name(file_input)
29
 
30
+ # 1) 路徑(含 NamedString)
31
  if isinstance(file_input, (str, os.PathLike)):
32
  path = str(file_input)
33
  if lower_name.endswith((".xlsx", ".xls")):
 
38
  except Exception:
39
  return pd.read_csv(path)
40
  else:
 
41
  try:
42
  return pd.read_excel(path, engine="openpyxl")
43
  except Exception:
 
46
  except Exception:
47
  return pd.read_csv(path)
48
 
49
+ # 2) 檔案物件(具 .read)
50
  if hasattr(file_input, "read"):
51
  raw = file_input.read()
52
  bio = io.BytesIO(raw)
 
61
  bio.seek(0)
62
  return pd.read_csv(bio)
63
  else:
 
64
  try:
65
  bio.seek(0)
66
  return pd.read_excel(bio, engine="openpyxl")
 
72
  bio.seek(0)
73
  return pd.read_csv(bio)
74
 
75
+ # 3) bytes
76
  if isinstance(file_input, (bytes, bytearray)):
77
  bio = io.BytesIO(file_input)
 
78
  try:
79
  bio.seek(0)
80
  return pd.read_excel(bio, engine="openpyxl")
 
104
  out.columns = name_map
105
  return out
106
 
107
+ def clamp_int(x, lo, hi):
108
+ """將輸入轉為 int,並夾在 [lo, hi] 範圍;若None/空字串則回傳 None。"""
109
+ if x is None or (isinstance(x, str) and x.strip() == ""):
110
+ return None
111
+ try:
112
+ xi = int(float(x))
113
+ except Exception:
114
+ raise ValueError("時間欄位需為數字(整數)")
115
+ return max(lo, min(hi, xi))
116
+
117
+ def parse_time_to_seconds(h, m, s):
118
+ """將 (h, m, s) 轉成一天內的秒數;若任一為 None,回傳 None(表示不啟用過濾)。"""
119
+ h = clamp_int(h, 0, 23)
120
+ m = clamp_int(m, 0, 59)
121
+ s = clamp_int(s, 0, 59)
122
+ if h is None or m is None or s is None:
123
+ return None
124
+ return h * 3600 + m * 60 + s
125
+
126
+ def series_time_to_seconds_of_day(series: pd.Series) -> pd.Series:
127
+ """
128
+ 將 'time' 欄位轉成 0~86399 的秒數。
129
+ 支援:
130
+ - datetime / 時間字串(使用 to_datetime 解析)
131
+ - Excel time 小數(0~1 表示一天的小數)
132
+ 解析失敗者回傳 NaN。
133
+ """
134
+ dt = pd.to_datetime(series, errors="coerce", infer_datetime_format=True)
135
+ seconds = dt.dt.hour * 3600 + dt.dt.minute * 60 + dt.dt.second
136
+
137
+ # 對於解析失敗且為 0~1 的數值(Excel 時間),轉成秒
138
+ num = pd.to_numeric(series, errors="coerce")
139
+ mask_frac = seconds.isna() & num.notna() & (num >= 0) & (num < 1.0)
140
+ if mask_frac.any():
141
+ seconds.loc[mask_frac] = (num.loc[mask_frac] * 86400).round().astype(int)
142
+
143
+ return seconds # 可能含 NaN
144
+
145
+ with gr.Blocks(title="Excel/CSV 指定欄位擷取器(含時間區段)") as demo:
146
+ gr.Markdown("### 指定欄位擷取(A,B,K,L,M,V,W,X,Y)→ 重新命名為 data,time,⊿Ptop,⊿Pmid,⊿Pbot,H2%,CO%,CO2%,CH4% ;可依 **時間區段 (hh:mm:ss)** 過濾。")
147
 
 
148
  inp = gr.File(label="上傳 .xlsx 或 .csv 檔案", file_types=[".xlsx", ".csv"], type="filepath")
149
+
150
+ with gr.Row():
151
+ gr.Markdown("**開始時間 (hh:mm:ss)** — 三個欄位:")
152
+ with gr.Row():
153
+ sh = gr.Number(label="Start HH (0-23)", value=None)
154
+ sm = gr.Number(label="Start MM (0-59)", value=None)
155
+ ss = gr.Number(label="Start SS (0-59)", value=None)
156
+
157
+ with gr.Row():
158
+ gr.Markdown("**結束時間 (hh:mm:ss)** — 三個欄位:")
159
+ with gr.Row():
160
+ eh = gr.Number(label="End HH (0-23)", value=None)
161
+ em = gr.Number(label="End MM (0-59)", value=None)
162
+ es = gr.Number(label="End SS (0-59)", value=None)
163
+
164
  run_btn = gr.Button("開始處理", variant="primary")
165
 
166
  file_out = gr.File(label="下載處理後的 Excel", visible=False)
167
  msg = gr.Markdown()
168
  preview = gr.Dataframe(label="預覽(前 20 列)", wrap=True)
169
 
170
+ def run_pipeline(file_path_str, sh_, sm_, ss_, eh_, em_, es_):
171
  if not file_path_str:
172
  return gr.update(visible=False), "請先上傳檔案。", pd.DataFrame()
173
 
 
177
  except Exception as e:
178
  return gr.update(visible=False), f"處理失敗:{e}", pd.DataFrame()
179
 
180
+ # 嘗試解析時間區段
181
+ try:
182
+ start_sec = parse_time_to_seconds(sh_, sm_, ss_)
183
+ end_sec = parse_time_to_seconds(eh_, em_, es_)
184
+ except Exception as e:
185
+ return gr.update(visible=False), f"時間輸入錯誤:{e}", pd.DataFrame()
186
+
187
+ # 若兩端都有填,才進行過濾;否則略過過濾
188
+ if (start_sec is not None) and (end_sec is not None):
189
+ if "time" not in out.columns:
190
+ return gr.update(visible=False), "找不到 'time' 欄,無法做時間過濾。", pd.DataFrame()
191
+
192
+ secs = series_time_to_seconds_of_day(out["time"])
193
+
194
+ # 無法解析時間的列,不納入過濾(視為 False)
195
+ valid_mask = secs.notna()
196
+ secs_valid = secs.where(valid_mask, other=-1) # -1 代表無效
197
+
198
+ if start_sec <= end_sec:
199
+ # 一般區段:start ~ end
200
+ keep = valid_mask & (secs_valid >= start_sec) & (secs_valid <= end_sec)
201
+ else:
202
+ # 跨午夜:例如 23:30:00 → 00:30:00
203
+ keep = valid_mask & ((secs_valid >= start_sec) | (secs_valid <= end_sec))
204
+
205
+ out = out.loc[keep].reset_index(drop=True)
206
+
207
+ # 寫檔輸出
208
  ts = datetime.now().strftime("%Y%m%d_%H%M%S")
209
  out_path = f"/tmp/extracted_columns_{ts}.xlsx"
210
+ try:
211
+ out.to_excel(out_path, index=False, engine="openpyxl")
212
+ except Exception as e:
213
+ return gr.update(visible=False), f"輸出 Excel 失敗:{e}", pd.DataFrame()
214
+
215
+ # 訊息摘要
216
+ note = "完成!"
217
+ if (start_sec is not None) and (end_sec is not None):
218
+ note += f" 已套用時間過濾({sh_}:{sm_}:{ss_} → {eh_}:{em_}:{es_})。"
219
+ note += " 下方預覽、右側可下載 Excel。"
220
 
221
+ return gr.update(value=out_path, visible=True), note, out.head(20)
222
 
223
+ run_btn.click(
224
+ run_pipeline,
225
+ inputs=[inp, sh, sm, ss, eh, em, es],
226
+ outputs=[file_out, msg, preview]
227
+ )
228
 
229
  if __name__ == "__main__":
230
  demo.launch()