Spaces:

Kung-Hsun
/

Data_Extraction_CLG_Exp

Sleeping

App Files Files Community

Kung-Hsun commited on Nov 12, 2025

Commit

dc15dde

verified ·

1 Parent(s): 1b27451

Update app.py

Browse files

Files changed (1) hide show

app.py +87 -26

app.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import gradio as gr
 import pandas as pd
 import io
 import os
-from datetime import datetime
 from typing import Union
 EXCEL_LETTERS = ["A", "B", "K", "L", "M", "V", "W", "X", "Y"]
@@ -105,7 +106,6 @@ def extract_and_rename(df: pd.DataFrame) -> pd.DataFrame:
     return out
 def clamp_int(x, lo, hi):
-    """將輸入轉為 int，並夾在 [lo, hi] 範圍；若為 None/空字串則回傳 None。"""
     if x is None or (isinstance(x, str) and x.strip() == ""):
         return None
     try:
@@ -115,7 +115,6 @@ def clamp_int(x, lo, hi):
     return max(lo, min(hi, xi))
 def parse_time_to_seconds(h, m, s):
-    """將 (h, m, s) 轉成一天內的秒數；若任一為 None，回傳 None（表示不啟用過濾）。"""
     h = clamp_int(h, 0, 23)
     m = clamp_int(m, 0, 59)
     s = clamp_int(s, 0, 59)
@@ -123,24 +122,83 @@ def parse_time_to_seconds(h, m, s):
         return None
     return h * 3600 + m * 60 + s
 def series_time_to_seconds_of_day(series: pd.Series) -> pd.Series:
     """
     將 'time' 欄位轉成 0~86399 的秒數。
     支援：
-      - datetime / 時間字串（使用 to_datetime 解析）
-      - Excel time 小數（0~1 表示一天的小數）
-    解析失敗者回傳 NaN。
     """
-    dt = pd.to_datetime(series, errors="coerce", infer_datetime_format=True)
-    seconds = dt.dt.hour * 3600 + dt.dt.minute * 60 + dt.dt.second
-    # 對於解析失敗且為 0~1 的數值（Excel 時間），轉成秒
-    num = pd.to_numeric(series, errors="coerce")
-    mask_frac = seconds.isna() & num.notna() & (num >= 0) & (num < 1.0)
-    if mask_frac.any():
-        seconds.loc[mask_frac] = (num.loc[mask_frac] * 86400).round().astype(int)
-    return seconds  # 可能含 NaN
 with gr.Blocks(title="Excel/CSV 指定欄位擷取器（含時間區段）") as demo:
     gr.Markdown("### 指定欄位擷取（A,B,K,L,M,V,W,X,Y）→ 重新命名為 data,time,⊿Ptop,⊿Pmid,⊿Pbot,H2%,CO%,CO2%,CH4% ；可依 **時間區段 (hh:mm:ss)** 過濾。")
@@ -177,29 +235,29 @@ with gr.Blocks(title="Excel/CSV 指定欄位擷取器（含時間區段）") as
         except Exception as e:
             return gr.update(visible=False), f"處理失敗：{e}", pd.DataFrame()
-        # 嘗試解析時間區段
         try:
             start_sec = parse_time_to_seconds(sh_, sm_, ss_)
             end_sec   = parse_time_to_seconds(eh_, em_, es_)
         except Exception as e:
             return gr.update(visible=False), f"時間輸入錯誤：{e}", pd.DataFrame()
-        # 若兩端都有填，才進行過濾；否則略過過濾
         if (start_sec is not None) and (end_sec is not None):
             if "time" not in out.columns:
                 return gr.update(visible=False), "找不到 'time' 欄，無法做時間過濾。", pd.DataFrame()
             secs = series_time_to_seconds_of_day(out["time"])
-            # 無法解析時間的列，不納入過濾（視為 False）
             valid_mask = secs.notna()
-            secs_valid = secs.where(valid_mask, other=-1)  # -1 代表無效
             if start_sec <= end_sec:
-                # 一般區段：start ~ end
                 keep = valid_mask & (secs_valid >= start_sec) & (secs_valid <= end_sec)
             else:
-                # 跨午夜：例如 23:30:00 → 00:30:00
                 keep = valid_mask & ((secs_valid >= start_sec) | (secs_valid <= end_sec))
             out = out.loc[keep].reset_index(drop=True)
@@ -213,10 +271,13 @@ with gr.Blocks(title="Excel/CSV 指定欄位擷取器（含時間區段）") as
             return gr.update(visible=False), f"輸出 Excel 失敗：{e}", pd.DataFrame()
         # 訊息摘要
-        note = "完成！"
-        if (start_sec is not None) and (end_sec is not None):
-            note += f" 已套用時間過濾（{sh_}:{sm_}:{ss_} → {eh_}:{em_}:{es_}）。"
-        note += " 下方預覽、右側可下載 Excel。"
         return gr.update(value=out_path, visible=True), note, out.head(20)

 import gradio as gr
 import pandas as pd
+import numpy as np
 import io
 import os
+from datetime import datetime, time, timedelta
 from typing import Union
 EXCEL_LETTERS = ["A", "B", "K", "L", "M", "V", "W", "X", "Y"]
     return out
 def clamp_int(x, lo, hi):
     if x is None or (isinstance(x, str) and x.strip() == ""):
         return None
     try:
     return max(lo, min(hi, xi))
 def parse_time_to_seconds(h, m, s):
     h = clamp_int(h, 0, 23)
     m = clamp_int(m, 0, 59)
     s = clamp_int(s, 0, 59)
         return None
     return h * 3600 + m * 60 + s
+def _hhmmss_int_to_seconds(n: int):
+    """將整數 HHMMSS（例如 93005）轉成秒；不合法回傳 pd.NA。"""
+    if n < 0 or n > 235959:
+        return pd.NA
+    ss = n % 100
+    n //= 100
+    mm = n % 100
+    n //= 100
+    hh = n % 100
+    if 0 <= hh <= 23 and 0 <= mm <= 59 and 0 <= ss <= 59:
+        return hh * 3600 + mm * 60 + ss
+    return pd.NA
 def series_time_to_seconds_of_day(series: pd.Series) -> pd.Series:
     """
     將 'time' 欄位轉成 0~86399 的秒數。
     支援：
+      - pandas datetime64[ns] / datetime64[ns, tz]
+      - timedelta64[ns]
+      - 文字：'YYYY-mm-dd HH:MM:SS' / 'HH:MM:SS(.fff)' / 'AM/PM'
+      - Excel 序列（包含日期+時間，像 45213.5）
+      - 純數字 HHMMSS（93005 -> 09:30:05）
+      - Python datetime.time
+    未能解析者回傳 NaN。
     """
+    s = series.copy()
+    # 1) 若已是 datetime64，直接取時分秒
+    if pd.api.types.is_datetime64_any_dtype(s):
+        sec = (s.dt.hour * 3600 + s.dt.minute * 60 + s.dt.second).astype("float")
+        return sec
+    # 2) 若是 timedelta（少見），取一天內秒數
+    if pd.api.types.is_timedelta64_dtype(s):
+        total_sec = s.dt.total_seconds()
+        return (total_sec % 86400).astype("float")
+    # 3) 嘗試一般字串/物件 → datetime
+    parsed = pd.to_datetime(s, errors="coerce")
+    sec_parsed = (parsed.dt.hour * 3600 + parsed.dt.minute * 60 + parsed.dt.second).astype("float")
+    # 4) Excel 序列時間（含日期部分），任何數值都取小數部分 * 86400
+    num = pd.to_numeric(s, errors="coerce")
+    sec_excel = ((num % 1) * 86400).round().astype("float")  # 45213.5 -> 0.5 天 -> 43200 秒
+    # 僅在 parsed 失敗時使用 excel 轉換
+    result = sec_parsed.where(~sec_parsed.isna(), other=np.nan)
+    result = np.where(np.isnan(result), sec_excel, result)
+    result = pd.Series(result, index=s.index, dtype="float")
+    # 5) 純數字 HHMMSS（ex: 93005）
+    mask_intlike = num.notna() & (num == np.floor(num))
+    sec_hhmmss = pd.Series(np.nan, index=s.index, dtype="float")
+    if mask_intlike.any():
+        ints = num[mask_intlike].astype("int64")
+        sec_hhmmss.loc[mask_intlike] = ints.map(_hhmmss_int_to_seconds).astype("float")
+    # 僅在前兩招皆 NaN 時，採用 HHMMSS 轉換
+    fill_mask = result.isna() & sec_hhmmss.notna()
+    result.loc[fill_mask] = sec_hhmmss.loc[fill_mask]
+    # 6) Python datetime.time 物件
+    if result.isna().any():
+        obj_mask = result.isna()
+        subset = s[obj_mask]
+        def time_obj_to_sec(x):
+            if isinstance(x, time):
+                return x.hour * 3600 + x.minute * 60 + x.second
+            return np.nan
+        result.loc[obj_mask] = subset.map(time_obj_to_sec)
+    # 最終返回（仍可能有 NaN，代表無法解析）
+    return result.astype("float")
+def pad_time(h, m, s):
+    def to2(x):
+        return "??" if x is None else f"{int(x):02d}"
+    return f"{to2(h)}:{to2(m)}:{to2(s)}"
 with gr.Blocks(title="Excel/CSV 指定欄位擷取器（含時間區段）") as demo:
     gr.Markdown("### 指定欄位擷取（A,B,K,L,M,V,W,X,Y）→ 重新命名為 data,time,⊿Ptop,⊿Pmid,⊿Pbot,H2%,CO%,CO2%,CH4% ；可依 **時間區段 (hh:mm:ss)** 過濾。")
         except Exception as e:
             return gr.update(visible=False), f"處理失敗：{e}", pd.DataFrame()
+        original_rows = len(out)
+        # 嘗試時間過濾
         try:
             start_sec = parse_time_to_seconds(sh_, sm_, ss_)
             end_sec   = parse_time_to_seconds(eh_, em_, es_)
         except Exception as e:
             return gr.update(visible=False), f"時間輸入錯誤：{e}", pd.DataFrame()
+        parsed_ok = None
         if (start_sec is not None) and (end_sec is not None):
             if "time" not in out.columns:
                 return gr.update(visible=False), "找不到 'time' 欄，無法做時間過濾。", pd.DataFrame()
             secs = series_time_to_seconds_of_day(out["time"])
+            parsed_ok = int(secs.notna().sum())
             valid_mask = secs.notna()
+            secs_valid = secs.where(valid_mask, other=-1)
             if start_sec <= end_sec:
                 keep = valid_mask & (secs_valid >= start_sec) & (secs_valid <= end_sec)
             else:
                 keep = valid_mask & ((secs_valid >= start_sec) | (secs_valid <= end_sec))
             out = out.loc[keep].reset_index(drop=True)
             return gr.update(visible=False), f"輸出 Excel 失敗：{e}", pd.DataFrame()
         # 訊息摘要
+        note_lines = [f"完成！原始列數：**{original_rows}**"]
+        if parsed_ok is not None:
+            note_lines.append(f"可解析時間列數：**{parsed_ok}**")
+            note_lines.append(f"時間區段：**{pad_time(sh_, sm_, ss_)} → {pad_time(eh_, em_, es_)}**")
+        note_lines.append(f"輸出列數：**{len(out)}**")
+        note_lines.append("下方預覽、右側可下載 Excel。")
+        note = "｜".join(note_lines)
         return gr.update(value=out_path, visible=True), note, out.head(20)