Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
|
|
|
| 3 |
import io
|
| 4 |
import os
|
| 5 |
-
from datetime import datetime
|
| 6 |
from typing import Union
|
| 7 |
|
| 8 |
EXCEL_LETTERS = ["A", "B", "K", "L", "M", "V", "W", "X", "Y"]
|
|
@@ -105,7 +106,6 @@ def extract_and_rename(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 105 |
return out
|
| 106 |
|
| 107 |
def clamp_int(x, lo, hi):
|
| 108 |
-
"""將輸入轉為 int,並夾在 [lo, hi] 範圍;若為 None/空字串則回傳 None。"""
|
| 109 |
if x is None or (isinstance(x, str) and x.strip() == ""):
|
| 110 |
return None
|
| 111 |
try:
|
|
@@ -115,7 +115,6 @@ def clamp_int(x, lo, hi):
|
|
| 115 |
return max(lo, min(hi, xi))
|
| 116 |
|
| 117 |
def parse_time_to_seconds(h, m, s):
|
| 118 |
-
"""將 (h, m, s) 轉成一天內的秒數;若任一為 None,回傳 None(表示不啟用過濾)。"""
|
| 119 |
h = clamp_int(h, 0, 23)
|
| 120 |
m = clamp_int(m, 0, 59)
|
| 121 |
s = clamp_int(s, 0, 59)
|
|
@@ -123,24 +122,83 @@ def parse_time_to_seconds(h, m, s):
|
|
| 123 |
return None
|
| 124 |
return h * 3600 + m * 60 + s
|
| 125 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
def series_time_to_seconds_of_day(series: pd.Series) -> pd.Series:
|
| 127 |
"""
|
| 128 |
將 'time' 欄位轉成 0~86399 的秒數。
|
| 129 |
支援:
|
| 130 |
-
-
|
| 131 |
-
-
|
| 132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
"""
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
|
| 145 |
with gr.Blocks(title="Excel/CSV 指定欄位擷取器(含時間區段)") as demo:
|
| 146 |
gr.Markdown("### 指定欄位擷取(A,B,K,L,M,V,W,X,Y)→ 重新命名為 data,time,⊿Ptop,⊿Pmid,⊿Pbot,H2%,CO%,CO2%,CH4% ;可依 **時間區段 (hh:mm:ss)** 過濾。")
|
|
@@ -177,29 +235,29 @@ with gr.Blocks(title="Excel/CSV 指定欄位擷取器(含時間區段)") as
|
|
| 177 |
except Exception as e:
|
| 178 |
return gr.update(visible=False), f"處理失敗:{e}", pd.DataFrame()
|
| 179 |
|
| 180 |
-
|
|
|
|
|
|
|
| 181 |
try:
|
| 182 |
start_sec = parse_time_to_seconds(sh_, sm_, ss_)
|
| 183 |
end_sec = parse_time_to_seconds(eh_, em_, es_)
|
| 184 |
except Exception as e:
|
| 185 |
return gr.update(visible=False), f"時間輸入錯誤:{e}", pd.DataFrame()
|
| 186 |
|
| 187 |
-
|
| 188 |
if (start_sec is not None) and (end_sec is not None):
|
| 189 |
if "time" not in out.columns:
|
| 190 |
return gr.update(visible=False), "找不到 'time' 欄,無法做時間過濾。", pd.DataFrame()
|
| 191 |
|
| 192 |
secs = series_time_to_seconds_of_day(out["time"])
|
|
|
|
| 193 |
|
| 194 |
-
# 無法解析時間的列,不納入過濾(視為 False)
|
| 195 |
valid_mask = secs.notna()
|
| 196 |
-
secs_valid = secs.where(valid_mask, other=-1)
|
| 197 |
|
| 198 |
if start_sec <= end_sec:
|
| 199 |
-
# 一般區段:start ~ end
|
| 200 |
keep = valid_mask & (secs_valid >= start_sec) & (secs_valid <= end_sec)
|
| 201 |
else:
|
| 202 |
-
# 跨午夜:例如 23:30:00 → 00:30:00
|
| 203 |
keep = valid_mask & ((secs_valid >= start_sec) | (secs_valid <= end_sec))
|
| 204 |
|
| 205 |
out = out.loc[keep].reset_index(drop=True)
|
|
@@ -213,10 +271,13 @@ with gr.Blocks(title="Excel/CSV 指定欄位擷取器(含時間區段)") as
|
|
| 213 |
return gr.update(visible=False), f"輸出 Excel 失敗:{e}", pd.DataFrame()
|
| 214 |
|
| 215 |
# 訊息摘要
|
| 216 |
-
|
| 217 |
-
if
|
| 218 |
-
|
| 219 |
-
|
|
|
|
|
|
|
|
|
|
| 220 |
|
| 221 |
return gr.update(value=out_path, visible=True), note, out.head(20)
|
| 222 |
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
import io
|
| 5 |
import os
|
| 6 |
+
from datetime import datetime, time, timedelta
|
| 7 |
from typing import Union
|
| 8 |
|
| 9 |
EXCEL_LETTERS = ["A", "B", "K", "L", "M", "V", "W", "X", "Y"]
|
|
|
|
| 106 |
return out
|
| 107 |
|
| 108 |
def clamp_int(x, lo, hi):
|
|
|
|
| 109 |
if x is None or (isinstance(x, str) and x.strip() == ""):
|
| 110 |
return None
|
| 111 |
try:
|
|
|
|
| 115 |
return max(lo, min(hi, xi))
|
| 116 |
|
| 117 |
def parse_time_to_seconds(h, m, s):
|
|
|
|
| 118 |
h = clamp_int(h, 0, 23)
|
| 119 |
m = clamp_int(m, 0, 59)
|
| 120 |
s = clamp_int(s, 0, 59)
|
|
|
|
| 122 |
return None
|
| 123 |
return h * 3600 + m * 60 + s
|
| 124 |
|
| 125 |
+
def _hhmmss_int_to_seconds(n: int):
|
| 126 |
+
"""將整數 HHMMSS(例如 93005)轉成秒;不合法回傳 pd.NA。"""
|
| 127 |
+
if n < 0 or n > 235959:
|
| 128 |
+
return pd.NA
|
| 129 |
+
ss = n % 100
|
| 130 |
+
n //= 100
|
| 131 |
+
mm = n % 100
|
| 132 |
+
n //= 100
|
| 133 |
+
hh = n % 100
|
| 134 |
+
if 0 <= hh <= 23 and 0 <= mm <= 59 and 0 <= ss <= 59:
|
| 135 |
+
return hh * 3600 + mm * 60 + ss
|
| 136 |
+
return pd.NA
|
| 137 |
+
|
| 138 |
def series_time_to_seconds_of_day(series: pd.Series) -> pd.Series:
|
| 139 |
"""
|
| 140 |
將 'time' 欄位轉成 0~86399 的秒數。
|
| 141 |
支援:
|
| 142 |
+
- pandas datetime64[ns] / datetime64[ns, tz]
|
| 143 |
+
- timedelta64[ns]
|
| 144 |
+
- 文字:'YYYY-mm-dd HH:MM:SS' / 'HH:MM:SS(.fff)' / 'AM/PM'
|
| 145 |
+
- Excel 序列(包含日期+時間,像 45213.5)
|
| 146 |
+
- 純數字 HHMMSS(93005 -> 09:30:05)
|
| 147 |
+
- Python datetime.time
|
| 148 |
+
未能解析者回傳 NaN。
|
| 149 |
"""
|
| 150 |
+
s = series.copy()
|
| 151 |
+
|
| 152 |
+
# 1) 若已是 datetime64,直接取時分秒
|
| 153 |
+
if pd.api.types.is_datetime64_any_dtype(s):
|
| 154 |
+
sec = (s.dt.hour * 3600 + s.dt.minute * 60 + s.dt.second).astype("float")
|
| 155 |
+
return sec
|
| 156 |
+
|
| 157 |
+
# 2) 若是 timedelta(少見),取一天內秒數
|
| 158 |
+
if pd.api.types.is_timedelta64_dtype(s):
|
| 159 |
+
total_sec = s.dt.total_seconds()
|
| 160 |
+
return (total_sec % 86400).astype("float")
|
| 161 |
+
|
| 162 |
+
# 3) 嘗試一般字串/物件 → datetime
|
| 163 |
+
parsed = pd.to_datetime(s, errors="coerce")
|
| 164 |
+
sec_parsed = (parsed.dt.hour * 3600 + parsed.dt.minute * 60 + parsed.dt.second).astype("float")
|
| 165 |
+
|
| 166 |
+
# 4) Excel 序列時間(含日期部分),任何數值都取小數部分 * 86400
|
| 167 |
+
num = pd.to_numeric(s, errors="coerce")
|
| 168 |
+
sec_excel = ((num % 1) * 86400).round().astype("float") # 45213.5 -> 0.5 天 -> 43200 秒
|
| 169 |
+
# 僅在 parsed 失敗時使用 excel 轉換
|
| 170 |
+
result = sec_parsed.where(~sec_parsed.isna(), other=np.nan)
|
| 171 |
+
result = np.where(np.isnan(result), sec_excel, result)
|
| 172 |
+
result = pd.Series(result, index=s.index, dtype="float")
|
| 173 |
+
|
| 174 |
+
# 5) 純數字 HHMMSS(ex: 93005)
|
| 175 |
+
mask_intlike = num.notna() & (num == np.floor(num))
|
| 176 |
+
sec_hhmmss = pd.Series(np.nan, index=s.index, dtype="float")
|
| 177 |
+
if mask_intlike.any():
|
| 178 |
+
ints = num[mask_intlike].astype("int64")
|
| 179 |
+
sec_hhmmss.loc[mask_intlike] = ints.map(_hhmmss_int_to_seconds).astype("float")
|
| 180 |
+
|
| 181 |
+
# 僅在前兩招皆 NaN 時,採用 HHMMSS 轉換
|
| 182 |
+
fill_mask = result.isna() & sec_hhmmss.notna()
|
| 183 |
+
result.loc[fill_mask] = sec_hhmmss.loc[fill_mask]
|
| 184 |
+
|
| 185 |
+
# 6) Python datetime.time 物件
|
| 186 |
+
if result.isna().any():
|
| 187 |
+
obj_mask = result.isna()
|
| 188 |
+
subset = s[obj_mask]
|
| 189 |
+
def time_obj_to_sec(x):
|
| 190 |
+
if isinstance(x, time):
|
| 191 |
+
return x.hour * 3600 + x.minute * 60 + x.second
|
| 192 |
+
return np.nan
|
| 193 |
+
result.loc[obj_mask] = subset.map(time_obj_to_sec)
|
| 194 |
+
|
| 195 |
+
# 最終返回(仍可能有 NaN,代表無法解析)
|
| 196 |
+
return result.astype("float")
|
| 197 |
+
|
| 198 |
+
def pad_time(h, m, s):
|
| 199 |
+
def to2(x):
|
| 200 |
+
return "??" if x is None else f"{int(x):02d}"
|
| 201 |
+
return f"{to2(h)}:{to2(m)}:{to2(s)}"
|
| 202 |
|
| 203 |
with gr.Blocks(title="Excel/CSV 指定欄位擷取器(含時間區段)") as demo:
|
| 204 |
gr.Markdown("### 指定欄位擷取(A,B,K,L,M,V,W,X,Y)→ 重新命名為 data,time,⊿Ptop,⊿Pmid,⊿Pbot,H2%,CO%,CO2%,CH4% ;可依 **時間區段 (hh:mm:ss)** 過濾。")
|
|
|
|
| 235 |
except Exception as e:
|
| 236 |
return gr.update(visible=False), f"處理失敗:{e}", pd.DataFrame()
|
| 237 |
|
| 238 |
+
original_rows = len(out)
|
| 239 |
+
|
| 240 |
+
# 嘗試時間過濾
|
| 241 |
try:
|
| 242 |
start_sec = parse_time_to_seconds(sh_, sm_, ss_)
|
| 243 |
end_sec = parse_time_to_seconds(eh_, em_, es_)
|
| 244 |
except Exception as e:
|
| 245 |
return gr.update(visible=False), f"時間輸入錯誤:{e}", pd.DataFrame()
|
| 246 |
|
| 247 |
+
parsed_ok = None
|
| 248 |
if (start_sec is not None) and (end_sec is not None):
|
| 249 |
if "time" not in out.columns:
|
| 250 |
return gr.update(visible=False), "找不到 'time' 欄,無法做時間過濾。", pd.DataFrame()
|
| 251 |
|
| 252 |
secs = series_time_to_seconds_of_day(out["time"])
|
| 253 |
+
parsed_ok = int(secs.notna().sum())
|
| 254 |
|
|
|
|
| 255 |
valid_mask = secs.notna()
|
| 256 |
+
secs_valid = secs.where(valid_mask, other=-1)
|
| 257 |
|
| 258 |
if start_sec <= end_sec:
|
|
|
|
| 259 |
keep = valid_mask & (secs_valid >= start_sec) & (secs_valid <= end_sec)
|
| 260 |
else:
|
|
|
|
| 261 |
keep = valid_mask & ((secs_valid >= start_sec) | (secs_valid <= end_sec))
|
| 262 |
|
| 263 |
out = out.loc[keep].reset_index(drop=True)
|
|
|
|
| 271 |
return gr.update(visible=False), f"輸出 Excel 失敗:{e}", pd.DataFrame()
|
| 272 |
|
| 273 |
# 訊息摘要
|
| 274 |
+
note_lines = [f"完成!原始列數:**{original_rows}**"]
|
| 275 |
+
if parsed_ok is not None:
|
| 276 |
+
note_lines.append(f"可解析時間列數:**{parsed_ok}**")
|
| 277 |
+
note_lines.append(f"時間區段:**{pad_time(sh_, sm_, ss_)} → {pad_time(eh_, em_, es_)}**")
|
| 278 |
+
note_lines.append(f"輸出列數:**{len(out)}**")
|
| 279 |
+
note_lines.append("下方預覽、右側可下載 Excel。")
|
| 280 |
+
note = "|".join(note_lines)
|
| 281 |
|
| 282 |
return gr.update(value=out_path, visible=True), note, out.head(20)
|
| 283 |
|