Kung-Hsun commited on
Commit
dc15dde
·
verified ·
1 Parent(s): 1b27451

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -26
app.py CHANGED
@@ -1,8 +1,9 @@
1
  import gradio as gr
2
  import pandas as pd
 
3
  import io
4
  import os
5
- from datetime import datetime
6
  from typing import Union
7
 
8
  EXCEL_LETTERS = ["A", "B", "K", "L", "M", "V", "W", "X", "Y"]
@@ -105,7 +106,6 @@ def extract_and_rename(df: pd.DataFrame) -> pd.DataFrame:
105
  return out
106
 
107
  def clamp_int(x, lo, hi):
108
- """將輸入轉為 int,並夾在 [lo, hi] 範圍;若為 None/空字串則回傳 None。"""
109
  if x is None or (isinstance(x, str) and x.strip() == ""):
110
  return None
111
  try:
@@ -115,7 +115,6 @@ def clamp_int(x, lo, hi):
115
  return max(lo, min(hi, xi))
116
 
117
  def parse_time_to_seconds(h, m, s):
118
- """將 (h, m, s) 轉成一天內的秒數;若任一為 None,回傳 None(表示不啟用過濾)。"""
119
  h = clamp_int(h, 0, 23)
120
  m = clamp_int(m, 0, 59)
121
  s = clamp_int(s, 0, 59)
@@ -123,24 +122,83 @@ def parse_time_to_seconds(h, m, s):
123
  return None
124
  return h * 3600 + m * 60 + s
125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  def series_time_to_seconds_of_day(series: pd.Series) -> pd.Series:
127
  """
128
  將 'time' 欄位轉成 0~86399 的秒數。
129
  支援:
130
- - datetime / 時間字串(使用 to_datetime 解析)
131
- - Excel time 小數(0~1 表示一天的小數)
132
- 解析失敗者回傳 NaN。
 
 
 
 
133
  """
134
- dt = pd.to_datetime(series, errors="coerce", infer_datetime_format=True)
135
- seconds = dt.dt.hour * 3600 + dt.dt.minute * 60 + dt.dt.second
136
-
137
- # 對於解析失敗且為 0~1 的數值(Excel 時間),轉成秒
138
- num = pd.to_numeric(series, errors="coerce")
139
- mask_frac = seconds.isna() & num.notna() & (num >= 0) & (num < 1.0)
140
- if mask_frac.any():
141
- seconds.loc[mask_frac] = (num.loc[mask_frac] * 86400).round().astype(int)
142
-
143
- return seconds # 可能含 NaN
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
  with gr.Blocks(title="Excel/CSV 指定欄位擷取器(含時間區段)") as demo:
146
  gr.Markdown("### 指定欄位擷取(A,B,K,L,M,V,W,X,Y)→ 重新命名為 data,time,⊿Ptop,⊿Pmid,⊿Pbot,H2%,CO%,CO2%,CH4% ;可依 **時間區段 (hh:mm:ss)** 過濾。")
@@ -177,29 +235,29 @@ with gr.Blocks(title="Excel/CSV 指定欄位擷取器(含時間區段)") as
177
  except Exception as e:
178
  return gr.update(visible=False), f"處理失敗:{e}", pd.DataFrame()
179
 
180
- # 嘗試解析時間區段
 
 
181
  try:
182
  start_sec = parse_time_to_seconds(sh_, sm_, ss_)
183
  end_sec = parse_time_to_seconds(eh_, em_, es_)
184
  except Exception as e:
185
  return gr.update(visible=False), f"時間輸入錯誤:{e}", pd.DataFrame()
186
 
187
- # 若兩端都有填,才進行過濾;否則略過過濾
188
  if (start_sec is not None) and (end_sec is not None):
189
  if "time" not in out.columns:
190
  return gr.update(visible=False), "找不到 'time' 欄,無法做時間過濾。", pd.DataFrame()
191
 
192
  secs = series_time_to_seconds_of_day(out["time"])
 
193
 
194
- # 無法解析時間的列,不納入過濾(視為 False)
195
  valid_mask = secs.notna()
196
- secs_valid = secs.where(valid_mask, other=-1) # -1 代表無效
197
 
198
  if start_sec <= end_sec:
199
- # 一般區段:start ~ end
200
  keep = valid_mask & (secs_valid >= start_sec) & (secs_valid <= end_sec)
201
  else:
202
- # 跨午夜:例如 23:30:00 → 00:30:00
203
  keep = valid_mask & ((secs_valid >= start_sec) | (secs_valid <= end_sec))
204
 
205
  out = out.loc[keep].reset_index(drop=True)
@@ -213,10 +271,13 @@ with gr.Blocks(title="Excel/CSV 指定欄位擷取器(含時間區段)") as
213
  return gr.update(visible=False), f"輸出 Excel 失敗:{e}", pd.DataFrame()
214
 
215
  # 訊息摘要
216
- note = "完成!"
217
- if (start_sec is not None) and (end_sec is not None):
218
- note += f" 已套用時間過濾({sh_}:{sm_}:{ss_} → {eh_}:{em_}:{es_})。"
219
- note += " 下方預覽、右側可下載 Excel。"
 
 
 
220
 
221
  return gr.update(value=out_path, visible=True), note, out.head(20)
222
 
 
1
  import gradio as gr
2
  import pandas as pd
3
+ import numpy as np
4
  import io
5
  import os
6
+ from datetime import datetime, time, timedelta
7
  from typing import Union
8
 
9
  EXCEL_LETTERS = ["A", "B", "K", "L", "M", "V", "W", "X", "Y"]
 
106
  return out
107
 
108
  def clamp_int(x, lo, hi):
 
109
  if x is None or (isinstance(x, str) and x.strip() == ""):
110
  return None
111
  try:
 
115
  return max(lo, min(hi, xi))
116
 
117
  def parse_time_to_seconds(h, m, s):
 
118
  h = clamp_int(h, 0, 23)
119
  m = clamp_int(m, 0, 59)
120
  s = clamp_int(s, 0, 59)
 
122
  return None
123
  return h * 3600 + m * 60 + s
124
 
125
+ def _hhmmss_int_to_seconds(n: int):
126
+ """將整數 HHMMSS(例如 93005)轉成秒;不合法回傳 pd.NA。"""
127
+ if n < 0 or n > 235959:
128
+ return pd.NA
129
+ ss = n % 100
130
+ n //= 100
131
+ mm = n % 100
132
+ n //= 100
133
+ hh = n % 100
134
+ if 0 <= hh <= 23 and 0 <= mm <= 59 and 0 <= ss <= 59:
135
+ return hh * 3600 + mm * 60 + ss
136
+ return pd.NA
137
+
138
  def series_time_to_seconds_of_day(series: pd.Series) -> pd.Series:
139
  """
140
  將 'time' 欄位轉成 0~86399 的秒數。
141
  支援:
142
+ - pandas datetime64[ns] / datetime64[ns, tz]
143
+ - timedelta64[ns]
144
+ - 文字:'YYYY-mm-dd HH:MM:SS' / 'HH:MM:SS(.fff)' / 'AM/PM'
145
+ - Excel 序列(包含日期+時間,像 45213.5)
146
+ - 純數字 HHMMSS(93005 -> 09:30:05)
147
+ - Python datetime.time
148
+ 未能解析者回傳 NaN。
149
  """
150
+ s = series.copy()
151
+
152
+ # 1) 若已是 datetime64,直接取時分秒
153
+ if pd.api.types.is_datetime64_any_dtype(s):
154
+ sec = (s.dt.hour * 3600 + s.dt.minute * 60 + s.dt.second).astype("float")
155
+ return sec
156
+
157
+ # 2) 若是 timedelta(少見),取一天內秒數
158
+ if pd.api.types.is_timedelta64_dtype(s):
159
+ total_sec = s.dt.total_seconds()
160
+ return (total_sec % 86400).astype("float")
161
+
162
+ # 3) 嘗試一般字串/物件 → datetime
163
+ parsed = pd.to_datetime(s, errors="coerce")
164
+ sec_parsed = (parsed.dt.hour * 3600 + parsed.dt.minute * 60 + parsed.dt.second).astype("float")
165
+
166
+ # 4) Excel 序列時間(含日期部分),任何數值都取小數部分 * 86400
167
+ num = pd.to_numeric(s, errors="coerce")
168
+ sec_excel = ((num % 1) * 86400).round().astype("float") # 45213.5 -> 0.5 天 -> 43200 秒
169
+ # 僅在 parsed 失敗時使用 excel 轉換
170
+ result = sec_parsed.where(~sec_parsed.isna(), other=np.nan)
171
+ result = np.where(np.isnan(result), sec_excel, result)
172
+ result = pd.Series(result, index=s.index, dtype="float")
173
+
174
+ # 5) 純數字 HHMMSS(ex: 93005)
175
+ mask_intlike = num.notna() & (num == np.floor(num))
176
+ sec_hhmmss = pd.Series(np.nan, index=s.index, dtype="float")
177
+ if mask_intlike.any():
178
+ ints = num[mask_intlike].astype("int64")
179
+ sec_hhmmss.loc[mask_intlike] = ints.map(_hhmmss_int_to_seconds).astype("float")
180
+
181
+ # 僅在前兩招皆 NaN 時,採用 HHMMSS 轉換
182
+ fill_mask = result.isna() & sec_hhmmss.notna()
183
+ result.loc[fill_mask] = sec_hhmmss.loc[fill_mask]
184
+
185
+ # 6) Python datetime.time 物件
186
+ if result.isna().any():
187
+ obj_mask = result.isna()
188
+ subset = s[obj_mask]
189
+ def time_obj_to_sec(x):
190
+ if isinstance(x, time):
191
+ return x.hour * 3600 + x.minute * 60 + x.second
192
+ return np.nan
193
+ result.loc[obj_mask] = subset.map(time_obj_to_sec)
194
+
195
+ # 最終返回(仍可能有 NaN,代表無法解析)
196
+ return result.astype("float")
197
+
198
+ def pad_time(h, m, s):
199
+ def to2(x):
200
+ return "??" if x is None else f"{int(x):02d}"
201
+ return f"{to2(h)}:{to2(m)}:{to2(s)}"
202
 
203
  with gr.Blocks(title="Excel/CSV 指定欄位擷取器(含時間區段)") as demo:
204
  gr.Markdown("### 指定欄位擷取(A,B,K,L,M,V,W,X,Y)→ 重新命名為 data,time,⊿Ptop,⊿Pmid,⊿Pbot,H2%,CO%,CO2%,CH4% ;可依 **時間區段 (hh:mm:ss)** 過濾。")
 
235
  except Exception as e:
236
  return gr.update(visible=False), f"處理失敗:{e}", pd.DataFrame()
237
 
238
+ original_rows = len(out)
239
+
240
+ # 嘗試時間過濾
241
  try:
242
  start_sec = parse_time_to_seconds(sh_, sm_, ss_)
243
  end_sec = parse_time_to_seconds(eh_, em_, es_)
244
  except Exception as e:
245
  return gr.update(visible=False), f"時間輸入錯誤:{e}", pd.DataFrame()
246
 
247
+ parsed_ok = None
248
  if (start_sec is not None) and (end_sec is not None):
249
  if "time" not in out.columns:
250
  return gr.update(visible=False), "找不到 'time' 欄,無法做時間過濾。", pd.DataFrame()
251
 
252
  secs = series_time_to_seconds_of_day(out["time"])
253
+ parsed_ok = int(secs.notna().sum())
254
 
 
255
  valid_mask = secs.notna()
256
+ secs_valid = secs.where(valid_mask, other=-1)
257
 
258
  if start_sec <= end_sec:
 
259
  keep = valid_mask & (secs_valid >= start_sec) & (secs_valid <= end_sec)
260
  else:
 
261
  keep = valid_mask & ((secs_valid >= start_sec) | (secs_valid <= end_sec))
262
 
263
  out = out.loc[keep].reset_index(drop=True)
 
271
  return gr.update(visible=False), f"輸出 Excel 失敗:{e}", pd.DataFrame()
272
 
273
  # 訊息摘要
274
+ note_lines = [f"完成!原始列數:**{original_rows}**"]
275
+ if parsed_ok is not None:
276
+ note_lines.append(f"可解析時間列數:**{parsed_ok}**")
277
+ note_lines.append(f"時間區段:**{pad_time(sh_, sm_, ss_)} {pad_time(eh_, em_, es_)}**")
278
+ note_lines.append(f"輸出列數:**{len(out)}**")
279
+ note_lines.append("下方預覽、右側可下載 Excel。")
280
+ note = "|".join(note_lines)
281
 
282
  return gr.update(value=out_path, visible=True), note, out.head(20)
283