Spaces:

Kung-Hsun
/

Data_Extraction_CLG_Exp

Sleeping

App Files Files Community

Kung-Hsun commited on Nov 12, 2025

Commit

db1add2

verified ·

1 Parent(s): 513da39

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -66

app.py CHANGED Viewed

@@ -2,14 +2,13 @@ import gradio as gr
 import pandas as pd
 import io
 import os
 from datetime import datetime
-# 需要擷取的 Excel 欄位（用 Excel 字母定位）；CSV 也以欄位「位置」處理
 EXCEL_LETTERS = ["A", "B", "K", "L", "M", "V", "W", "X", "Y"]
 TARGET_NAMES  = ["data", "time", "⊿Ptop", "⊿Pmid", "⊿Pbot", "H2%", "CO%", "CO2%", "CH4%"]
 def letters_to_index_zero_based(letter: str) -> int:
-    """將 Excel 欄位字母轉成 0-based index（A->0, B->1, ..., Z->25, AA->26, ...）"""
     idx = 0
     for ch in letter.upper():
         idx = idx * 26 + (ord(ch) - ord('A') + 1)
@@ -18,40 +17,46 @@ def letters_to_index_zero_based(letter: str) -> int:
 TARGET_INDICES = [letters_to_index_zero_based(L) for L in EXCEL_LETTERS]
 def load_dataframe(file_obj) -> pd.DataFrame:
-    """根據副檔名讀入 DataFrame；xlsx 用 openpyxl、csv 用 pandas 讀入"""
-    name = getattr(file_obj, "name", None) or ""
     lower = name.lower()
     if lower.endswith(".xlsx") or lower.endswith(".xls"):
-        # 讀第一個工作表
-        return pd.read_excel(file_obj, engine="openpyxl")
     elif lower.endswith(".csv"):
-        # 盡量自動偵測編碼與分隔符號（若已知規格，可固化）
-        # 這裡採用最常見的 UTF-8 與逗號
-        return pd.read_csv(file_obj)
     else:
-        # 嘗試以 Excel 讀取；失敗再嘗試 CSV
         try:
-            file_obj.seek(0)
-            return pd.read_excel(file_obj, engine="openpyxl")
         except Exception:
-            file_obj.seek(0)
-            return pd.read_csv(file_obj)
-def extract_and_rename(df: pd.DataFrame):
-    """
-    以「欄位位置」擷取 A,B,K,L,M,V,W,X,Y（即 0,1,10,11,12,21,22,23,24）。
-    無論原始是否有標題，都以位置切片，再以 TARGET_NAMES 依序命名。
-    若原始欄數不足，僅輸出可取得的子集，並對應命名。
-    """
     n_cols = df.shape[1]
     existing_positions = [i for i in TARGET_INDICES if i < n_cols]
     if not existing_positions:
         raise ValueError("上傳的資料欄位數不足，無法擷取指定欄位（A,B,K,L,M,V,W,X,Y）。")
-    # 依存在的欄位位置切片
     out = df.iloc[:, existing_positions].copy()
-    # 對應名稱：以 positions 在 TARGET_INDICES 中的相對順序，對應到 TARGET_NAMES
     name_map = []
     for pos in existing_positions:
         idx_in_targets = TARGET_INDICES.index(pos)
@@ -60,59 +65,37 @@ def extract_and_rename(df: pd.DataFrame):
     out.columns = name_map
     return out
-def process(file_obj):
-    if file_obj is None:
-        return None, "請先上傳檔案。"
-    # 讀檔
-    df = load_dataframe(file_obj)
-    # 擷取與命名
-    try:
-        out = extract_and_rename(df)
-    except Exception as e:
-        return None, f"處理失敗：{e}"
-    # 匯出為 Excel，並回傳供下載
-    buffer = io.BytesIO()
-    out.to_excel(buffer, index=False, engine="openpyxl")
-    buffer.seek(0)
-    # 讓 Gradio 以檔案形式輸出（會自帶下載按鈕）
-    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-    download_name = f"extracted_columns_{timestamp}.xlsx"
-    return (gr.File.update(value=buffer, visible=True, filename=download_name),
-            "完成！下方可預覽前幾列，右側可下載 Excel。")
 with gr.Blocks(title="Excel/CSV 指定欄位擷取器") as demo:
     gr.Markdown("### 指定欄位擷取（A,B,K,L,M,V,W,X,Y）→ 重新命名為 data,time,⊿Ptop,⊿Pmid,⊿Pbot,H2%,CO%,CO2%,CH4%")
-    with gr.Row():
-        inp = gr.File(label="上傳 .xlsx 或 .csv 檔案", file_types=[".xlsx", ".csv"])
-    with gr.Row():
-        run_btn = gr.Button("開始處理", variant="primary")
-    with gr.Row():
-        file_out = gr.File(label="下載處理後的 Excel", visible=False)
-        msg = gr.Markdown()
-    with gr.Row():
-        preview = gr.Dataframe(label="預覽（前 20 列）", wrap=True)
     def run_pipeline(file_obj):
-        file_ret, text = process(file_obj)
-        # 額外提供預覽
-        df = load_dataframe(file_obj)
         try:
             out = extract_and_rename(df)
-            prev = out.head(20)
-        except Exception:
-            prev = pd.DataFrame()
-        return file_ret, text, prev
     run_btn.click(run_pipeline, inputs=[inp], outputs=[file_out, msg, preview])
 if __name__ == "__main__":
     demo.launch()

 import pandas as pd
 import io
 import os
+import tempfile
 from datetime import datetime
 EXCEL_LETTERS = ["A", "B", "K", "L", "M", "V", "W", "X", "Y"]
 TARGET_NAMES  = ["data", "time", "⊿Ptop", "⊿Pmid", "⊿Pbot", "H2%", "CO%", "CO2%", "CH4%"]
 def letters_to_index_zero_based(letter: str) -> int:
     idx = 0
     for ch in letter.upper():
         idx = idx * 26 + (ord(ch) - ord('A') + 1)
 TARGET_INDICES = [letters_to_index_zero_based(L) for L in EXCEL_LETTERS]
 def load_dataframe(file_obj) -> pd.DataFrame:
+    name = getattr(file_obj, "name", "") or ""
     lower = name.lower()
+    # 先把檔案內容讀到記憶體，避免多次讀取時指標位置問題
+    file_obj.seek(0)
+    raw = file_obj.read()
+    bio = io.BytesIO(raw)
     if lower.endswith(".xlsx") or lower.endswith(".xls"):
+        bio.seek(0)
+        return pd.read_excel(bio, engine="openpyxl")
     elif lower.endswith(".csv"):
+        # 嘗試自動分隔符號；若失敗再退回逗號
+        try:
+            bio.seek(0)
+            return pd.read_csv(bio, sep=None, engine="python")
+        except Exception:
+            bio.seek(0)
+            return pd.read_csv(bio)
     else:
+        # 嘗試 Excel -> CSV
         try:
+            bio.seek(0)
+            return pd.read_excel(bio, engine="openpyxl")
         except Exception:
+            bio.seek(0)
+            try:
+                return pd.read_csv(bio, sep=None, engine="python")
+            except Exception:
+                bio.seek(0)
+                return pd.read_csv(bio)
+def extract_and_rename(df: pd.DataFrame) -> pd.DataFrame:
     n_cols = df.shape[1]
     existing_positions = [i for i in TARGET_INDICES if i < n_cols]
     if not existing_positions:
         raise ValueError("上傳的資料欄位數不足，無法擷取指定欄位（A,B,K,L,M,V,W,X,Y）。")
     out = df.iloc[:, existing_positions].copy()
     name_map = []
     for pos in existing_positions:
         idx_in_targets = TARGET_INDICES.index(pos)
     out.columns = name_map
     return out
 with gr.Blocks(title="Excel/CSV 指定欄位擷取器") as demo:
     gr.Markdown("### 指定欄位擷取（A,B,K,L,M,V,W,X,Y）→ 重新命名為 data,time,⊿Ptop,⊿Pmid,⊿Pbot,H2%,CO%,CO2%,CH4%")
+    inp = gr.File(label="上傳 .xlsx 或 .csv 檔案", file_types=[".xlsx", ".csv"])
+    run_btn = gr.Button("開始處理", variant="primary")
+    file_out = gr.File(label="下載處理後的 Excel", visible=False)
+    msg = gr.Markdown()
+    preview = gr.Dataframe(label="預覽（前 20 列）", wrap=True)
     def run_pipeline(file_obj):
+        if file_obj is None:
+            return gr.update(visible=False), "請先上傳檔案。", pd.DataFrame()
         try:
+            df = load_dataframe(file_obj)
             out = extract_and_rename(df)
+        except Exception as e:
+            return gr.update(visible=False), f"處理失敗：{e}", pd.DataFrame()
+        # 寫到 /tmp 並用檔名控制下載時的顯示名稱
+        ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+        out_path = f"/tmp/extracted_columns_{ts}.xlsx"
+        out.to_excel(out_path, index=False, engine="openpyxl")
+        # 回傳：讓 File 元件顯示下載連結（用 gr.update，而不是 File.update）
+        return gr.update(value=out_path, visible=True), "完成！下方預覽、右側可下載 Excel。", out.head(20)
     run_btn.click(run_pipeline, inputs=[inp], outputs=[file_out, msg, preview])
 if __name__ == "__main__":
+    # 在 Hugging Face Spaces 不需要 share；若本機想避免 SSR 訊息可加 ssr_mode=False
     demo.launch()
+    # demo.launch(ssr_mode=False)  # 若你想關閉啟動訊息中的 SSR ⚡ 提示