Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,8 +2,8 @@ import gradio as gr
|
|
| 2 |
import pandas as pd
|
| 3 |
import io
|
| 4 |
import os
|
| 5 |
-
import tempfile
|
| 6 |
from datetime import datetime
|
|
|
|
| 7 |
|
| 8 |
EXCEL_LETTERS = ["A", "B", "K", "L", "M", "V", "W", "X", "Y"]
|
| 9 |
TARGET_NAMES = ["data", "time", "⊿Ptop", "⊿Pmid", "⊿Pbot", "H2%", "CO%", "CO2%", "CH4%"]
|
|
@@ -16,39 +16,91 @@ def letters_to_index_zero_based(letter: str) -> int:
|
|
| 16 |
|
| 17 |
TARGET_INDICES = [letters_to_index_zero_based(L) for L in EXCEL_LETTERS]
|
| 18 |
|
| 19 |
-
def
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
bio.seek(0)
|
| 38 |
-
return pd.
|
| 39 |
-
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
try:
|
| 42 |
bio.seek(0)
|
| 43 |
return pd.read_excel(bio, engine="openpyxl")
|
| 44 |
except Exception:
|
| 45 |
-
bio.seek(0)
|
| 46 |
try:
|
|
|
|
| 47 |
return pd.read_csv(bio, sep=None, engine="python")
|
| 48 |
except Exception:
|
| 49 |
bio.seek(0)
|
| 50 |
return pd.read_csv(bio)
|
| 51 |
|
|
|
|
|
|
|
| 52 |
def extract_and_rename(df: pd.DataFrame) -> pd.DataFrame:
|
| 53 |
n_cols = df.shape[1]
|
| 54 |
existing_positions = [i for i in TARGET_INDICES if i < n_cols]
|
|
@@ -68,34 +120,31 @@ def extract_and_rename(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 68 |
with gr.Blocks(title="Excel/CSV 指定欄位擷取器") as demo:
|
| 69 |
gr.Markdown("### 指定欄位擷取(A,B,K,L,M,V,W,X,Y)→ 重新命名為 data,time,⊿Ptop,⊿Pmid,⊿Pbot,H2%,CO%,CO2%,CH4%")
|
| 70 |
|
| 71 |
-
|
|
|
|
| 72 |
run_btn = gr.Button("開始處理", variant="primary")
|
| 73 |
|
| 74 |
file_out = gr.File(label="下載處理後的 Excel", visible=False)
|
| 75 |
msg = gr.Markdown()
|
| 76 |
-
preview = gr.Dataframe(label="預覽(前 20 列)", wrap=True)
|
| 77 |
|
| 78 |
-
def run_pipeline(
|
| 79 |
-
if
|
| 80 |
return gr.update(visible=False), "請先上傳檔案。", pd.DataFrame()
|
| 81 |
|
| 82 |
try:
|
| 83 |
-
df = load_dataframe(
|
| 84 |
out = extract_and_rename(df)
|
| 85 |
except Exception as e:
|
| 86 |
return gr.update(visible=False), f"處理失敗:{e}", pd.DataFrame()
|
| 87 |
|
| 88 |
-
# 寫到 /tmp 並用檔名控制下載時的顯示名稱
|
| 89 |
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 90 |
out_path = f"/tmp/extracted_columns_{ts}.xlsx"
|
| 91 |
out.to_excel(out_path, index=False, engine="openpyxl")
|
| 92 |
|
| 93 |
-
# 回傳:讓 File 元件顯示下載連結(用 gr.update,而不是 File.update)
|
| 94 |
return gr.update(value=out_path, visible=True), "完成!下方預覽、右側可下載 Excel。", out.head(20)
|
| 95 |
|
| 96 |
run_btn.click(run_pipeline, inputs=[inp], outputs=[file_out, msg, preview])
|
| 97 |
|
| 98 |
if __name__ == "__main__":
|
| 99 |
-
# 在 Hugging Face Spaces 不需要 share;若本機想避免 SSR 訊息可加 ssr_mode=False
|
| 100 |
demo.launch()
|
| 101 |
-
# demo.launch(ssr_mode=False) # 若你想關閉啟動訊息中的 SSR ⚡ 提示
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
import io
|
| 4 |
import os
|
|
|
|
| 5 |
from datetime import datetime
|
| 6 |
+
from typing import Union
|
| 7 |
|
| 8 |
EXCEL_LETTERS = ["A", "B", "K", "L", "M", "V", "W", "X", "Y"]
|
| 9 |
TARGET_NAMES = ["data", "time", "⊿Ptop", "⊿Pmid", "⊿Pbot", "H2%", "CO%", "CO2%", "CH4%"]
|
|
|
|
| 16 |
|
| 17 |
TARGET_INDICES = [letters_to_index_zero_based(L) for L in EXCEL_LETTERS]
|
| 18 |
|
| 19 |
+
def get_lower_name(file_input: Union[str, os.PathLike, io.BytesIO, bytes, object]) -> str:
|
| 20 |
+
"""
|
| 21 |
+
取得副檔名判斷用的檔名(若是路徑取 basename;若是物件則取 .name,否則空字串)
|
| 22 |
+
"""
|
| 23 |
+
if isinstance(file_input, (str, os.PathLike)):
|
| 24 |
+
return os.path.basename(str(file_input)).lower()
|
| 25 |
+
# Gradio 某些情況會傳回 NamedString(str 子類),上一行已涵蓋
|
| 26 |
+
name_attr = getattr(file_input, "name", None)
|
| 27 |
+
if isinstance(name_attr, (str, os.PathLike)):
|
| 28 |
+
return os.path.basename(str(name_attr)).lower()
|
| 29 |
+
return ""
|
| 30 |
+
|
| 31 |
+
def load_dataframe(file_input) -> pd.DataFrame:
|
| 32 |
+
"""
|
| 33 |
+
同時支援:
|
| 34 |
+
- 路徑字串(Gradio 預設)
|
| 35 |
+
- 檔案物件(具 .read())
|
| 36 |
+
- Bytes(較少見)
|
| 37 |
+
"""
|
| 38 |
+
lower_name = get_lower_name(file_input)
|
| 39 |
+
|
| 40 |
+
# 1) 若是路徑(含 NamedString),直接交給 pandas
|
| 41 |
+
if isinstance(file_input, (str, os.PathLike)):
|
| 42 |
+
path = str(file_input)
|
| 43 |
+
if lower_name.endswith((".xlsx", ".xls")):
|
| 44 |
+
return pd.read_excel(path, engine="openpyxl")
|
| 45 |
+
elif lower_name.endswith(".csv"):
|
| 46 |
+
try:
|
| 47 |
+
return pd.read_csv(path, sep=None, engine="python")
|
| 48 |
+
except Exception:
|
| 49 |
+
return pd.read_csv(path)
|
| 50 |
+
else:
|
| 51 |
+
# 不確定副檔名時,先嘗試 Excel 再 CSV
|
| 52 |
+
try:
|
| 53 |
+
return pd.read_excel(path, engine="openpyxl")
|
| 54 |
+
except Exception:
|
| 55 |
+
try:
|
| 56 |
+
return pd.read_csv(path, sep=None, engine="python")
|
| 57 |
+
except Exception:
|
| 58 |
+
return pd.read_csv(path)
|
| 59 |
+
|
| 60 |
+
# 2) 若是檔案物件(具 .read)
|
| 61 |
+
if hasattr(file_input, "read"):
|
| 62 |
+
raw = file_input.read()
|
| 63 |
+
bio = io.BytesIO(raw)
|
| 64 |
+
if lower_name.endswith((".xlsx", ".xls")):
|
| 65 |
bio.seek(0)
|
| 66 |
+
return pd.read_excel(bio, engine="openpyxl")
|
| 67 |
+
elif lower_name.endswith(".csv"):
|
| 68 |
+
try:
|
| 69 |
+
bio.seek(0)
|
| 70 |
+
return pd.read_csv(bio, sep=None, engine="python")
|
| 71 |
+
except Exception:
|
| 72 |
+
bio.seek(0)
|
| 73 |
+
return pd.read_csv(bio)
|
| 74 |
+
else:
|
| 75 |
+
# 嘗試 Excel -> CSV
|
| 76 |
+
try:
|
| 77 |
+
bio.seek(0)
|
| 78 |
+
return pd.read_excel(bio, engine="openpyxl")
|
| 79 |
+
except Exception:
|
| 80 |
+
try:
|
| 81 |
+
bio.seek(0)
|
| 82 |
+
return pd.read_csv(bio, sep=None, engine="python")
|
| 83 |
+
except Exception:
|
| 84 |
+
bio.seek(0)
|
| 85 |
+
return pd.read_csv(bio)
|
| 86 |
+
|
| 87 |
+
# 3) 其他類型(例如 bytes)
|
| 88 |
+
if isinstance(file_input, (bytes, bytearray)):
|
| 89 |
+
bio = io.BytesIO(file_input)
|
| 90 |
+
# 不知道副檔名時,比照上
|
| 91 |
try:
|
| 92 |
bio.seek(0)
|
| 93 |
return pd.read_excel(bio, engine="openpyxl")
|
| 94 |
except Exception:
|
|
|
|
| 95 |
try:
|
| 96 |
+
bio.seek(0)
|
| 97 |
return pd.read_csv(bio, sep=None, engine="python")
|
| 98 |
except Exception:
|
| 99 |
bio.seek(0)
|
| 100 |
return pd.read_csv(bio)
|
| 101 |
|
| 102 |
+
raise ValueError("不支援的檔案型態,請上傳 .xlsx 或 .csv 檔。")
|
| 103 |
+
|
| 104 |
def extract_and_rename(df: pd.DataFrame) -> pd.DataFrame:
|
| 105 |
n_cols = df.shape[1]
|
| 106 |
existing_positions = [i for i in TARGET_INDICES if i < n_cols]
|
|
|
|
| 120 |
with gr.Blocks(title="Excel/CSV 指定欄位擷取器") as demo:
|
| 121 |
gr.Markdown("### 指定欄位擷取(A,B,K,L,M,V,W,X,Y)→ 重新命名為 data,time,⊿Ptop,⊿Pmid,⊿Pbot,H2%,CO%,CO2%,CH4%")
|
| 122 |
|
| 123 |
+
# 指定 type="filepath" 讓輸入穩定為路徑字串(NamedString)
|
| 124 |
+
inp = gr.File(label="上傳 .xlsx 或 .csv 檔案", file_types=[".xlsx", ".csv"], type="filepath")
|
| 125 |
run_btn = gr.Button("開始處理", variant="primary")
|
| 126 |
|
| 127 |
file_out = gr.File(label="下載處理後的 Excel", visible=False)
|
| 128 |
msg = gr.Markdown()
|
| 129 |
+
preview = gr.Dataframe(label="預覽(前 20 列)", wrap=True, height=300)
|
| 130 |
|
| 131 |
+
def run_pipeline(file_path_str):
|
| 132 |
+
if not file_path_str:
|
| 133 |
return gr.update(visible=False), "請先上傳檔案。", pd.DataFrame()
|
| 134 |
|
| 135 |
try:
|
| 136 |
+
df = load_dataframe(file_path_str)
|
| 137 |
out = extract_and_rename(df)
|
| 138 |
except Exception as e:
|
| 139 |
return gr.update(visible=False), f"處理失敗:{e}", pd.DataFrame()
|
| 140 |
|
|
|
|
| 141 |
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 142 |
out_path = f"/tmp/extracted_columns_{ts}.xlsx"
|
| 143 |
out.to_excel(out_path, index=False, engine="openpyxl")
|
| 144 |
|
|
|
|
| 145 |
return gr.update(value=out_path, visible=True), "完成!下方預覽、右側可下載 Excel。", out.head(20)
|
| 146 |
|
| 147 |
run_btn.click(run_pipeline, inputs=[inp], outputs=[file_out, msg, preview])
|
| 148 |
|
| 149 |
if __name__ == "__main__":
|
|
|
|
| 150 |
demo.launch()
|
|
|