Spaces:

Kung-Hsun
/

Data_Extraction_CLG_Exp

Sleeping

App Files Files Community

Kung-Hsun commited on Nov 12, 2025

Commit

552e59e

verified ·

1 Parent(s): 959cda2

Create app.py

Browse files

Files changed (1) hide show

app.py +118 -0

app.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import gradio as gr
+import pandas as pd
+import io
+import os
+from datetime import datetime
+# 需要擷取的 Excel 欄位（用 Excel 字母定位）；CSV 也以欄位「位置」處理
+EXCEL_LETTERS = ["A", "B", "K", "L", "M", "V", "W", "X", "Y"]
+TARGET_NAMES  = ["data", "time", "⊿Ptop", "⊿Pmid", "⊿Pbot", "H2%", "CO%", "CO2%", "CH4%"]
+def letters_to_index_zero_based(letter: str) -> int:
+    """將 Excel 欄位字母轉成 0-based index（A->0, B->1, ..., Z->25, AA->26, ...）"""
+    idx = 0
+    for ch in letter.upper():
+        idx = idx * 26 + (ord(ch) - ord('A') + 1)
+    return idx - 1
+TARGET_INDICES = [letters_to_index_zero_based(L) for L in EXCEL_LETTERS]
+def load_dataframe(file_obj) -> pd.DataFrame:
+    """根據副檔名讀入 DataFrame；xlsx 用 openpyxl、csv 用 pandas 讀入"""
+    name = getattr(file_obj, "name", None) or ""
+    lower = name.lower()
+    if lower.endswith(".xlsx") or lower.endswith(".xls"):
+        # 讀第一個工作表
+        return pd.read_excel(file_obj, engine="openpyxl")
+    elif lower.endswith(".csv"):
+        # 盡量自動偵測編碼與分隔符號（若已知規格，可固化）
+        # 這裡採用最常見的 UTF-8 與逗號
+        return pd.read_csv(file_obj)
+    else:
+        # 嘗試以 Excel 讀取；失敗再嘗試 CSV
+        try:
+            file_obj.seek(0)
+            return pd.read_excel(file_obj, engine="openpyxl")
+        except Exception:
+            file_obj.seek(0)
+            return pd.read_csv(file_obj)
+def extract_and_rename(df: pd.DataFrame):
+    """
+    以「欄位位置」擷取 A,B,K,L,M,V,W,X,Y（即 0,1,10,11,12,21,22,23,24）。
+    無論原始是否有標題，都以位置切片，再以 TARGET_NAMES 依序命名。
+    若原始欄數不足，僅輸出可取得的子集，並對應命名。
+    """
+    n_cols = df.shape[1]
+    existing_positions = [i for i in TARGET_INDICES if i < n_cols]
+    if not existing_positions:
+        raise ValueError("上傳的資料欄位數不足，無法擷取指定欄位（A,B,K,L,M,V,W,X,Y）。")
+    # 依存在的欄位位置切片
+    out = df.iloc[:, existing_positions].copy()
+    # 對應名稱：以 positions 在 TARGET_INDICES 中的相對順序，對應到 TARGET_NAMES
+    name_map = []
+    for pos in existing_positions:
+        idx_in_targets = TARGET_INDICES.index(pos)
+        name_map.append(TARGET_NAMES[idx_in_targets])
+    out.columns = name_map
+    return out
+def process(file_obj):
+    if file_obj is None:
+        return None, "請先上傳檔案。"
+    # 讀檔
+    df = load_dataframe(file_obj)
+    # 擷取與命名
+    try:
+        out = extract_and_rename(df)
+    except Exception as e:
+        return None, f"處理失敗：{e}"
+    # 匯出為 Excel，並回傳供下載
+    buffer = io.BytesIO()
+    out.to_excel(buffer, index=False, engine="openpyxl")
+    buffer.seek(0)
+    # 讓 Gradio 以檔案形式輸出（會自帶下載按鈕）
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    download_name = f"extracted_columns_{timestamp}.xlsx"
+    return (gr.File.update(value=buffer, visible=True, filename=download_name),
+            "完成！下方可預覽前幾列，右側可下載 Excel。")
+with gr.Blocks(title="Excel/CSV 指定欄位擷取器") as demo:
+    gr.Markdown("### 指定欄位擷取（A,B,K,L,M,V,W,X,Y）→ 重新命名為 data,time,⊿Ptop,⊿Pmid,⊿Pbot,H2%,CO%,CO2%,CH4%")
+    with gr.Row():
+        inp = gr.File(label="上傳 .xlsx 或 .csv 檔案", file_types=[".xlsx", ".csv"])
+    with gr.Row():
+        run_btn = gr.Button("開始處理", variant="primary")
+    with gr.Row():
+        file_out = gr.File(label="下載處理後的 Excel", visible=False)
+        msg = gr.Markdown()
+    with gr.Row():
+        preview = gr.Dataframe(label="預覽（前 20 列）", wrap=True, height=300)
+    def run_pipeline(file_obj):
+        file_ret, text = process(file_obj)
+        # 額外提供預覽
+        df = load_dataframe(file_obj)
+        try:
+            out = extract_and_rename(df)
+            prev = out.head(20)
+        except Exception:
+            prev = pd.DataFrame()
+        return file_ret, text, prev
+    run_btn.click(run_pipeline, inputs=[inp], outputs=[file_out, msg, preview])
+if __name__ == "__main__":
+    demo.launch()