Spaces:

Kung-Hsun
/

Data_Extraction_CLG_Exp

Sleeping

App Files Files Community

Kung-Hsun commited on Nov 12, 2025

Commit

1b27451

verified ·

1 Parent(s): 653d65b

Update app.py

Browse files

Files changed (1) hide show

app.py +103 -23

app.py CHANGED Viewed

@@ -17,27 +17,17 @@ def letters_to_index_zero_based(letter: str) -> int:
 TARGET_INDICES = [letters_to_index_zero_based(L) for L in EXCEL_LETTERS]
 def get_lower_name(file_input: Union[str, os.PathLike, io.BytesIO, bytes, object]) -> str:
-    """
-    取得副檔名判斷用的檔名（若是路徑取 basename；若是物件則取 .name，否則空字串）
-    """
     if isinstance(file_input, (str, os.PathLike)):
         return os.path.basename(str(file_input)).lower()
-    # Gradio 某些情況會傳回 NamedString（str 子類），上一行已涵蓋
     name_attr = getattr(file_input, "name", None)
     if isinstance(name_attr, (str, os.PathLike)):
         return os.path.basename(str(name_attr)).lower()
     return ""
 def load_dataframe(file_input) -> pd.DataFrame:
-    """
-    同時支援：
-    - 路徑字串（Gradio 預設）
-    - 檔案物件（具 .read()）
-    - Bytes（較少見）
-    """
     lower_name = get_lower_name(file_input)
-    # 1) 若是路徑（含 NamedString），直接交給 pandas
     if isinstance(file_input, (str, os.PathLike)):
         path = str(file_input)
         if lower_name.endswith((".xlsx", ".xls")):
@@ -48,7 +38,6 @@ def load_dataframe(file_input) -> pd.DataFrame:
             except Exception:
                 return pd.read_csv(path)
         else:
-            # 不確定副檔名時，先嘗試 Excel 再 CSV
             try:
                 return pd.read_excel(path, engine="openpyxl")
             except Exception:
@@ -57,7 +46,7 @@ def load_dataframe(file_input) -> pd.DataFrame:
                 except Exception:
                     return pd.read_csv(path)
-    # 2) 若是檔案物件（具 .read）
     if hasattr(file_input, "read"):
         raw = file_input.read()
         bio = io.BytesIO(raw)
@@ -72,7 +61,6 @@ def load_dataframe(file_input) -> pd.DataFrame:
                 bio.seek(0)
                 return pd.read_csv(bio)
         else:
-            # 嘗試 Excel -> CSV
             try:
                 bio.seek(0)
                 return pd.read_excel(bio, engine="openpyxl")
@@ -84,10 +72,9 @@ def load_dataframe(file_input) -> pd.DataFrame:
                     bio.seek(0)
                     return pd.read_csv(bio)
-    # 3) 其他類型（例如 bytes）
     if isinstance(file_input, (bytes, bytearray)):
         bio = io.BytesIO(file_input)
-        # 不知道副檔名時，比照上
         try:
             bio.seek(0)
             return pd.read_excel(bio, engine="openpyxl")
@@ -117,18 +104,70 @@ def extract_and_rename(df: pd.DataFrame) -> pd.DataFrame:
     out.columns = name_map
     return out
-with gr.Blocks(title="Excel/CSV 指定欄位擷取器") as demo:
-    gr.Markdown("### 指定欄位擷取（A,B,K,L,M,V,W,X,Y）→ 重新命名為 data,time,⊿Ptop,⊿Pmid,⊿Pbot,H2%,CO%,CO2%,CH4%")
-    # 指定 type="filepath" 讓輸入穩定為路徑字串（NamedString）
     inp = gr.File(label="上傳 .xlsx 或 .csv 檔案", file_types=[".xlsx", ".csv"], type="filepath")
     run_btn = gr.Button("開始處理", variant="primary")
     file_out = gr.File(label="下載處理後的 Excel", visible=False)
     msg = gr.Markdown()
     preview = gr.Dataframe(label="預覽（前 20 列）", wrap=True)
-    def run_pipeline(file_path_str):
         if not file_path_str:
             return gr.update(visible=False), "請先上傳檔案。", pd.DataFrame()
@@ -138,13 +177,54 @@ with gr.Blocks(title="Excel/CSV 指定欄位擷取器") as demo:
         except Exception as e:
             return gr.update(visible=False), f"處理失敗：{e}", pd.DataFrame()
         ts = datetime.now().strftime("%Y%m%d_%H%M%S")
         out_path = f"/tmp/extracted_columns_{ts}.xlsx"
-        out.to_excel(out_path, index=False, engine="openpyxl")
-        return gr.update(value=out_path, visible=True), "完成！下方預覽、右側可下載 Excel。", out.head(20)
-    run_btn.click(run_pipeline, inputs=[inp], outputs=[file_out, msg, preview])
 if __name__ == "__main__":
     demo.launch()

 TARGET_INDICES = [letters_to_index_zero_based(L) for L in EXCEL_LETTERS]
 def get_lower_name(file_input: Union[str, os.PathLike, io.BytesIO, bytes, object]) -> str:
     if isinstance(file_input, (str, os.PathLike)):
         return os.path.basename(str(file_input)).lower()
     name_attr = getattr(file_input, "name", None)
     if isinstance(name_attr, (str, os.PathLike)):
         return os.path.basename(str(name_attr)).lower()
     return ""
 def load_dataframe(file_input) -> pd.DataFrame:
     lower_name = get_lower_name(file_input)
+    # 1) 路徑（含 NamedString）
     if isinstance(file_input, (str, os.PathLike)):
         path = str(file_input)
         if lower_name.endswith((".xlsx", ".xls")):
             except Exception:
                 return pd.read_csv(path)
         else:
             try:
                 return pd.read_excel(path, engine="openpyxl")
             except Exception:
                 except Exception:
                     return pd.read_csv(path)
+    # 2) 檔案物件（具 .read）
     if hasattr(file_input, "read"):
         raw = file_input.read()
         bio = io.BytesIO(raw)
                 bio.seek(0)
                 return pd.read_csv(bio)
         else:
             try:
                 bio.seek(0)
                 return pd.read_excel(bio, engine="openpyxl")
                     bio.seek(0)
                     return pd.read_csv(bio)
+    # 3) bytes
     if isinstance(file_input, (bytes, bytearray)):
         bio = io.BytesIO(file_input)
         try:
             bio.seek(0)
             return pd.read_excel(bio, engine="openpyxl")
     out.columns = name_map
     return out
+def clamp_int(x, lo, hi):
+    """將輸入轉為 int，並夾在 [lo, hi] 範圍；若為 None/空字串則回傳 None。"""
+    if x is None or (isinstance(x, str) and x.strip() == ""):
+        return None
+    try:
+        xi = int(float(x))
+    except Exception:
+        raise ValueError("時間欄位需為數字（整數）")
+    return max(lo, min(hi, xi))
+def parse_time_to_seconds(h, m, s):
+    """將 (h, m, s) 轉成一天內的秒數；若任一為 None，回傳 None（表示不啟用過濾）。"""
+    h = clamp_int(h, 0, 23)
+    m = clamp_int(m, 0, 59)
+    s = clamp_int(s, 0, 59)
+    if h is None or m is None or s is None:
+        return None
+    return h * 3600 + m * 60 + s
+def series_time_to_seconds_of_day(series: pd.Series) -> pd.Series:
+    """
+    將 'time' 欄位轉成 0~86399 的秒數。
+    支援：
+      - datetime / 時間字串（使用 to_datetime 解析）
+      - Excel time 小數（0~1 表示一天的小數）
+    解析失敗者回傳 NaN。
+    """
+    dt = pd.to_datetime(series, errors="coerce", infer_datetime_format=True)
+    seconds = dt.dt.hour * 3600 + dt.dt.minute * 60 + dt.dt.second
+    # 對於解析失敗且為 0~1 的數值（Excel 時間），轉成秒
+    num = pd.to_numeric(series, errors="coerce")
+    mask_frac = seconds.isna() & num.notna() & (num >= 0) & (num < 1.0)
+    if mask_frac.any():
+        seconds.loc[mask_frac] = (num.loc[mask_frac] * 86400).round().astype(int)
+    return seconds  # 可能含 NaN
+with gr.Blocks(title="Excel/CSV 指定欄位擷取器（含時間區段）") as demo:
+    gr.Markdown("### 指定欄位擷取（A,B,K,L,M,V,W,X,Y）→ 重新命名為 data,time,⊿Ptop,⊿Pmid,⊿Pbot,H2%,CO%,CO2%,CH4% ；可依 **時間區段 (hh:mm:ss)** 過濾。")
     inp = gr.File(label="上傳 .xlsx 或 .csv 檔案", file_types=[".xlsx", ".csv"], type="filepath")
+    with gr.Row():
+        gr.Markdown("**開始時間 (hh:mm:ss)** — 三個欄位：")
+    with gr.Row():
+        sh = gr.Number(label="Start HH (0-23)", value=None)
+        sm = gr.Number(label="Start MM (0-59)", value=None)
+        ss = gr.Number(label="Start SS (0-59)", value=None)
+    with gr.Row():
+        gr.Markdown("**結束時間 (hh:mm:ss)** — 三個欄位：")
+    with gr.Row():
+        eh = gr.Number(label="End HH (0-23)", value=None)
+        em = gr.Number(label="End MM (0-59)", value=None)
+        es = gr.Number(label="End SS (0-59)", value=None)
     run_btn = gr.Button("開始處理", variant="primary")
     file_out = gr.File(label="下載處理後的 Excel", visible=False)
     msg = gr.Markdown()
     preview = gr.Dataframe(label="預覽（前 20 列）", wrap=True)
+    def run_pipeline(file_path_str, sh_, sm_, ss_, eh_, em_, es_):
         if not file_path_str:
             return gr.update(visible=False), "請先上傳檔案。", pd.DataFrame()
         except Exception as e:
             return gr.update(visible=False), f"處理失敗：{e}", pd.DataFrame()
+        # 嘗試解析時間區段
+        try:
+            start_sec = parse_time_to_seconds(sh_, sm_, ss_)
+            end_sec   = parse_time_to_seconds(eh_, em_, es_)
+        except Exception as e:
+            return gr.update(visible=False), f"時間輸入錯誤：{e}", pd.DataFrame()
+        # 若兩端都有填，才進行過濾；否則略過過濾
+        if (start_sec is not None) and (end_sec is not None):
+            if "time" not in out.columns:
+                return gr.update(visible=False), "找不到 'time' 欄，無法做時間過濾。", pd.DataFrame()
+            secs = series_time_to_seconds_of_day(out["time"])
+            # 無法解析時間的列，不納入過濾（視為 False）
+            valid_mask = secs.notna()
+            secs_valid = secs.where(valid_mask, other=-1)  # -1 代表無效
+            if start_sec <= end_sec:
+                # 一般區段：start ~ end
+                keep = valid_mask & (secs_valid >= start_sec) & (secs_valid <= end_sec)
+            else:
+                # 跨午夜：例如 23:30:00 → 00:30:00
+                keep = valid_mask & ((secs_valid >= start_sec) | (secs_valid <= end_sec))
+            out = out.loc[keep].reset_index(drop=True)
+        # 寫檔輸出
         ts = datetime.now().strftime("%Y%m%d_%H%M%S")
         out_path = f"/tmp/extracted_columns_{ts}.xlsx"
+        try:
+            out.to_excel(out_path, index=False, engine="openpyxl")
+        except Exception as e:
+            return gr.update(visible=False), f"輸出 Excel 失敗：{e}", pd.DataFrame()
+        # 訊息摘要
+        note = "完成！"
+        if (start_sec is not None) and (end_sec is not None):
+            note += f" 已套用時間過濾（{sh_}:{sm_}:{ss_} → {eh_}:{em_}:{es_}）。"
+        note += " 下方預覽、右側可下載 Excel。"
+        return gr.update(value=out_path, visible=True), note, out.head(20)
+    run_btn.click(
+        run_pipeline,
+        inputs=[inp, sh, sm, ss, eh, em, es],
+        outputs=[file_out, msg, preview]
+    )
 if __name__ == "__main__":
     demo.launch()