Spaces:

Kung-Hsun
/

Data_Extraction_CLG_Exp

Sleeping

App Files Files Community

Kung-Hsun commited on Nov 12, 2025

Commit

f3e1f75

verified ·

1 Parent(s): db1add2

Update app.py

Browse files

Files changed (1) hide show

app.py +81 -32

app.py CHANGED Viewed

@@ -2,8 +2,8 @@ import gradio as gr
 import pandas as pd
 import io
 import os
-import tempfile
 from datetime import datetime
 EXCEL_LETTERS = ["A", "B", "K", "L", "M", "V", "W", "X", "Y"]
 TARGET_NAMES  = ["data", "time", "⊿Ptop", "⊿Pmid", "⊿Pbot", "H2%", "CO%", "CO2%", "CH4%"]
@@ -16,39 +16,91 @@ def letters_to_index_zero_based(letter: str) -> int:
 TARGET_INDICES = [letters_to_index_zero_based(L) for L in EXCEL_LETTERS]
-def load_dataframe(file_obj) -> pd.DataFrame:
-    name = getattr(file_obj, "name", "") or ""
-    lower = name.lower()
-    # 先把檔案內容讀到記憶體，避免多次讀取時指標位置問題
-    file_obj.seek(0)
-    raw = file_obj.read()
-    bio = io.BytesIO(raw)
-    if lower.endswith(".xlsx") or lower.endswith(".xls"):
-        bio.seek(0)
-        return pd.read_excel(bio, engine="openpyxl")
-    elif lower.endswith(".csv"):
-        # 嘗試自動分隔符號；若失敗再退回逗號
-        try:
-            bio.seek(0)
-            return pd.read_csv(bio, sep=None, engine="python")
-        except Exception:
             bio.seek(0)
-            return pd.read_csv(bio)
-    else:
-        # 嘗試 Excel -> CSV
         try:
             bio.seek(0)
             return pd.read_excel(bio, engine="openpyxl")
         except Exception:
-            bio.seek(0)
             try:
                 return pd.read_csv(bio, sep=None, engine="python")
             except Exception:
                 bio.seek(0)
                 return pd.read_csv(bio)
 def extract_and_rename(df: pd.DataFrame) -> pd.DataFrame:
     n_cols = df.shape[1]
     existing_positions = [i for i in TARGET_INDICES if i < n_cols]
@@ -68,34 +120,31 @@ def extract_and_rename(df: pd.DataFrame) -> pd.DataFrame:
 with gr.Blocks(title="Excel/CSV 指定欄位擷取器") as demo:
     gr.Markdown("### 指定欄位擷取（A,B,K,L,M,V,W,X,Y）→ 重新命名為 data,time,⊿Ptop,⊿Pmid,⊿Pbot,H2%,CO%,CO2%,CH4%")
-    inp = gr.File(label="上傳 .xlsx 或 .csv 檔案", file_types=[".xlsx", ".csv"])
     run_btn = gr.Button("開始處理", variant="primary")
     file_out = gr.File(label="下載處理後的 Excel", visible=False)
     msg = gr.Markdown()
-    preview = gr.Dataframe(label="預覽（前 20 列）", wrap=True)
-    def run_pipeline(file_obj):
-        if file_obj is None:
             return gr.update(visible=False), "請先上傳檔案。", pd.DataFrame()
         try:
-            df = load_dataframe(file_obj)
             out = extract_and_rename(df)
         except Exception as e:
             return gr.update(visible=False), f"處理失敗：{e}", pd.DataFrame()
-        # 寫到 /tmp 並用檔名控制下載時的顯示名稱
         ts = datetime.now().strftime("%Y%m%d_%H%M%S")
         out_path = f"/tmp/extracted_columns_{ts}.xlsx"
         out.to_excel(out_path, index=False, engine="openpyxl")
-        # 回傳：讓 File 元件顯示下載連結（用 gr.update，而不是 File.update）
         return gr.update(value=out_path, visible=True), "完成！下方預覽、右側可下載 Excel。", out.head(20)
     run_btn.click(run_pipeline, inputs=[inp], outputs=[file_out, msg, preview])
 if __name__ == "__main__":
-    # 在 Hugging Face Spaces 不需要 share；若本機想避免 SSR 訊息可加 ssr_mode=False
     demo.launch()
-    # demo.launch(ssr_mode=False)  # 若你想關閉啟動訊息中的 SSR ⚡ 提示

 import pandas as pd
 import io
 import os
 from datetime import datetime
+from typing import Union
 EXCEL_LETTERS = ["A", "B", "K", "L", "M", "V", "W", "X", "Y"]
 TARGET_NAMES  = ["data", "time", "⊿Ptop", "⊿Pmid", "⊿Pbot", "H2%", "CO%", "CO2%", "CH4%"]
 TARGET_INDICES = [letters_to_index_zero_based(L) for L in EXCEL_LETTERS]
+def get_lower_name(file_input: Union[str, os.PathLike, io.BytesIO, bytes, object]) -> str:
+    """
+    取得副檔名判斷用的檔名（若是路徑取 basename；若是物件則取 .name，否則空字串）
+    """
+    if isinstance(file_input, (str, os.PathLike)):
+        return os.path.basename(str(file_input)).lower()
+    # Gradio 某些情況會傳回 NamedString（str 子類），上一行已涵蓋
+    name_attr = getattr(file_input, "name", None)
+    if isinstance(name_attr, (str, os.PathLike)):
+        return os.path.basename(str(name_attr)).lower()
+    return ""
+def load_dataframe(file_input) -> pd.DataFrame:
+    """
+    同時支援：
+    - 路徑字串（Gradio 預設）
+    - 檔案物件（具 .read()）
+    - Bytes（較少見）
+    """
+    lower_name = get_lower_name(file_input)
+    # 1) 若是路徑（含 NamedString），直接交給 pandas
+    if isinstance(file_input, (str, os.PathLike)):
+        path = str(file_input)
+        if lower_name.endswith((".xlsx", ".xls")):
+            return pd.read_excel(path, engine="openpyxl")
+        elif lower_name.endswith(".csv"):
+            try:
+                return pd.read_csv(path, sep=None, engine="python")
+            except Exception:
+                return pd.read_csv(path)
+        else:
+            # 不確定副檔名時，先嘗試 Excel 再 CSV
+            try:
+                return pd.read_excel(path, engine="openpyxl")
+            except Exception:
+                try:
+                    return pd.read_csv(path, sep=None, engine="python")
+                except Exception:
+                    return pd.read_csv(path)
+    # 2) 若是檔案物件（具 .read）
+    if hasattr(file_input, "read"):
+        raw = file_input.read()
+        bio = io.BytesIO(raw)
+        if lower_name.endswith((".xlsx", ".xls")):
             bio.seek(0)
+            return pd.read_excel(bio, engine="openpyxl")
+        elif lower_name.endswith(".csv"):
+            try:
+                bio.seek(0)
+                return pd.read_csv(bio, sep=None, engine="python")
+            except Exception:
+                bio.seek(0)
+                return pd.read_csv(bio)
+        else:
+            # 嘗試 Excel -> CSV
+            try:
+                bio.seek(0)
+                return pd.read_excel(bio, engine="openpyxl")
+            except Exception:
+                try:
+                    bio.seek(0)
+                    return pd.read_csv(bio, sep=None, engine="python")
+                except Exception:
+                    bio.seek(0)
+                    return pd.read_csv(bio)
+    # 3) 其他類型（例如 bytes）
+    if isinstance(file_input, (bytes, bytearray)):
+        bio = io.BytesIO(file_input)
+        # 不知道副檔名時，比照上
         try:
             bio.seek(0)
             return pd.read_excel(bio, engine="openpyxl")
         except Exception:
             try:
+                bio.seek(0)
                 return pd.read_csv(bio, sep=None, engine="python")
             except Exception:
                 bio.seek(0)
                 return pd.read_csv(bio)
+    raise ValueError("不支援的檔案型態，請上傳 .xlsx 或 .csv 檔。")
 def extract_and_rename(df: pd.DataFrame) -> pd.DataFrame:
     n_cols = df.shape[1]
     existing_positions = [i for i in TARGET_INDICES if i < n_cols]
 with gr.Blocks(title="Excel/CSV 指定欄位擷取器") as demo:
     gr.Markdown("### 指定欄位擷取（A,B,K,L,M,V,W,X,Y）→ 重新命名為 data,time,⊿Ptop,⊿Pmid,⊿Pbot,H2%,CO%,CO2%,CH4%")
+    # 指定 type="filepath" 讓輸入穩定為路徑字串（NamedString）
+    inp = gr.File(label="上傳 .xlsx 或 .csv 檔案", file_types=[".xlsx", ".csv"], type="filepath")
     run_btn = gr.Button("開始處理", variant="primary")
     file_out = gr.File(label="下載處理後的 Excel", visible=False)
     msg = gr.Markdown()
+    preview = gr.Dataframe(label="預覽（前 20 列）", wrap=True, height=300)
+    def run_pipeline(file_path_str):
+        if not file_path_str:
             return gr.update(visible=False), "請先上傳檔案。", pd.DataFrame()
         try:
+            df = load_dataframe(file_path_str)
             out = extract_and_rename(df)
         except Exception as e:
             return gr.update(visible=False), f"處理失敗：{e}", pd.DataFrame()
         ts = datetime.now().strftime("%Y%m%d_%H%M%S")
         out_path = f"/tmp/extracted_columns_{ts}.xlsx"
         out.to_excel(out_path, index=False, engine="openpyxl")
         return gr.update(value=out_path, visible=True), "完成！下方預覽、右側可下載 Excel。", out.head(20)
     run_btn.click(run_pipeline, inputs=[inp], outputs=[file_out, msg, preview])
 if __name__ == "__main__":
     demo.launch()