Kung-Hsun commited on
Commit
f3e1f75
·
verified ·
1 Parent(s): db1add2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -32
app.py CHANGED
@@ -2,8 +2,8 @@ import gradio as gr
2
  import pandas as pd
3
  import io
4
  import os
5
- import tempfile
6
  from datetime import datetime
 
7
 
8
  EXCEL_LETTERS = ["A", "B", "K", "L", "M", "V", "W", "X", "Y"]
9
  TARGET_NAMES = ["data", "time", "⊿Ptop", "⊿Pmid", "⊿Pbot", "H2%", "CO%", "CO2%", "CH4%"]
@@ -16,39 +16,91 @@ def letters_to_index_zero_based(letter: str) -> int:
16
 
17
  TARGET_INDICES = [letters_to_index_zero_based(L) for L in EXCEL_LETTERS]
18
 
19
- def load_dataframe(file_obj) -> pd.DataFrame:
20
- name = getattr(file_obj, "name", "") or ""
21
- lower = name.lower()
22
-
23
- # 先把檔案內容讀到記憶體,避免多次讀取時指標位置問題
24
- file_obj.seek(0)
25
- raw = file_obj.read()
26
- bio = io.BytesIO(raw)
27
-
28
- if lower.endswith(".xlsx") or lower.endswith(".xls"):
29
- bio.seek(0)
30
- return pd.read_excel(bio, engine="openpyxl")
31
- elif lower.endswith(".csv"):
32
- # 嘗試自動分隔符號;若失敗再退回逗號
33
- try:
34
- bio.seek(0)
35
- return pd.read_csv(bio, sep=None, engine="python")
36
- except Exception:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  bio.seek(0)
38
- return pd.read_csv(bio)
39
- else:
40
- # 嘗試 Excel -> CSV
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  try:
42
  bio.seek(0)
43
  return pd.read_excel(bio, engine="openpyxl")
44
  except Exception:
45
- bio.seek(0)
46
  try:
 
47
  return pd.read_csv(bio, sep=None, engine="python")
48
  except Exception:
49
  bio.seek(0)
50
  return pd.read_csv(bio)
51
 
 
 
52
  def extract_and_rename(df: pd.DataFrame) -> pd.DataFrame:
53
  n_cols = df.shape[1]
54
  existing_positions = [i for i in TARGET_INDICES if i < n_cols]
@@ -68,34 +120,31 @@ def extract_and_rename(df: pd.DataFrame) -> pd.DataFrame:
68
  with gr.Blocks(title="Excel/CSV 指定欄位擷取器") as demo:
69
  gr.Markdown("### 指定欄位擷取(A,B,K,L,M,V,W,X,Y)→ 重新命名為 data,time,⊿Ptop,⊿Pmid,⊿Pbot,H2%,CO%,CO2%,CH4%")
70
 
71
- inp = gr.File(label="上傳 .xlsx 或 .csv 檔案", file_types=[".xlsx", ".csv"])
 
72
  run_btn = gr.Button("開始處理", variant="primary")
73
 
74
  file_out = gr.File(label="下載處理後的 Excel", visible=False)
75
  msg = gr.Markdown()
76
- preview = gr.Dataframe(label="預覽(前 20 列)", wrap=True)
77
 
78
- def run_pipeline(file_obj):
79
- if file_obj is None:
80
  return gr.update(visible=False), "請先上傳檔案。", pd.DataFrame()
81
 
82
  try:
83
- df = load_dataframe(file_obj)
84
  out = extract_and_rename(df)
85
  except Exception as e:
86
  return gr.update(visible=False), f"處理失敗:{e}", pd.DataFrame()
87
 
88
- # 寫到 /tmp 並用檔名控制下載時的顯示名稱
89
  ts = datetime.now().strftime("%Y%m%d_%H%M%S")
90
  out_path = f"/tmp/extracted_columns_{ts}.xlsx"
91
  out.to_excel(out_path, index=False, engine="openpyxl")
92
 
93
- # 回傳:讓 File 元件顯示下載連結(用 gr.update,而不是 File.update)
94
  return gr.update(value=out_path, visible=True), "完成!下方預覽、右側可下載 Excel。", out.head(20)
95
 
96
  run_btn.click(run_pipeline, inputs=[inp], outputs=[file_out, msg, preview])
97
 
98
  if __name__ == "__main__":
99
- # 在 Hugging Face Spaces 不需要 share;若本機想避免 SSR 訊息可加 ssr_mode=False
100
  demo.launch()
101
- # demo.launch(ssr_mode=False) # 若你想關閉啟動訊息中的 SSR ⚡ 提示
 
2
  import pandas as pd
3
  import io
4
  import os
 
5
  from datetime import datetime
6
+ from typing import Union
7
 
8
  EXCEL_LETTERS = ["A", "B", "K", "L", "M", "V", "W", "X", "Y"]
9
  TARGET_NAMES = ["data", "time", "⊿Ptop", "⊿Pmid", "⊿Pbot", "H2%", "CO%", "CO2%", "CH4%"]
 
16
 
17
  TARGET_INDICES = [letters_to_index_zero_based(L) for L in EXCEL_LETTERS]
18
 
19
+ def get_lower_name(file_input: Union[str, os.PathLike, io.BytesIO, bytes, object]) -> str:
20
+ """
21
+ 取得副檔名判斷用的檔名(若是路徑取 basename;若是物件則取 .name,否則空字串)
22
+ """
23
+ if isinstance(file_input, (str, os.PathLike)):
24
+ return os.path.basename(str(file_input)).lower()
25
+ # Gradio 某些情況會傳回 NamedString(str 子類),上一行已涵蓋
26
+ name_attr = getattr(file_input, "name", None)
27
+ if isinstance(name_attr, (str, os.PathLike)):
28
+ return os.path.basename(str(name_attr)).lower()
29
+ return ""
30
+
31
+ def load_dataframe(file_input) -> pd.DataFrame:
32
+ """
33
+ 同時支援:
34
+ - 路徑字串(Gradio 預設)
35
+ - 檔案物件(具 .read()
36
+ - Bytes(較少見)
37
+ """
38
+ lower_name = get_lower_name(file_input)
39
+
40
+ # 1) 若是路徑(含 NamedString),直接交給 pandas
41
+ if isinstance(file_input, (str, os.PathLike)):
42
+ path = str(file_input)
43
+ if lower_name.endswith((".xlsx", ".xls")):
44
+ return pd.read_excel(path, engine="openpyxl")
45
+ elif lower_name.endswith(".csv"):
46
+ try:
47
+ return pd.read_csv(path, sep=None, engine="python")
48
+ except Exception:
49
+ return pd.read_csv(path)
50
+ else:
51
+ # 不確定副檔名時,先嘗試 Excel 再 CSV
52
+ try:
53
+ return pd.read_excel(path, engine="openpyxl")
54
+ except Exception:
55
+ try:
56
+ return pd.read_csv(path, sep=None, engine="python")
57
+ except Exception:
58
+ return pd.read_csv(path)
59
+
60
+ # 2) 若是檔案物件(具 .read)
61
+ if hasattr(file_input, "read"):
62
+ raw = file_input.read()
63
+ bio = io.BytesIO(raw)
64
+ if lower_name.endswith((".xlsx", ".xls")):
65
  bio.seek(0)
66
+ return pd.read_excel(bio, engine="openpyxl")
67
+ elif lower_name.endswith(".csv"):
68
+ try:
69
+ bio.seek(0)
70
+ return pd.read_csv(bio, sep=None, engine="python")
71
+ except Exception:
72
+ bio.seek(0)
73
+ return pd.read_csv(bio)
74
+ else:
75
+ # 嘗試 Excel -> CSV
76
+ try:
77
+ bio.seek(0)
78
+ return pd.read_excel(bio, engine="openpyxl")
79
+ except Exception:
80
+ try:
81
+ bio.seek(0)
82
+ return pd.read_csv(bio, sep=None, engine="python")
83
+ except Exception:
84
+ bio.seek(0)
85
+ return pd.read_csv(bio)
86
+
87
+ # 3) 其他類型(例如 bytes)
88
+ if isinstance(file_input, (bytes, bytearray)):
89
+ bio = io.BytesIO(file_input)
90
+ # 不知道副檔名時,比照上
91
  try:
92
  bio.seek(0)
93
  return pd.read_excel(bio, engine="openpyxl")
94
  except Exception:
 
95
  try:
96
+ bio.seek(0)
97
  return pd.read_csv(bio, sep=None, engine="python")
98
  except Exception:
99
  bio.seek(0)
100
  return pd.read_csv(bio)
101
 
102
+ raise ValueError("不支援的檔案型態,請上傳 .xlsx 或 .csv 檔。")
103
+
104
  def extract_and_rename(df: pd.DataFrame) -> pd.DataFrame:
105
  n_cols = df.shape[1]
106
  existing_positions = [i for i in TARGET_INDICES if i < n_cols]
 
120
  with gr.Blocks(title="Excel/CSV 指定欄位擷取器") as demo:
121
  gr.Markdown("### 指定欄位擷取(A,B,K,L,M,V,W,X,Y)→ 重新命名為 data,time,⊿Ptop,⊿Pmid,⊿Pbot,H2%,CO%,CO2%,CH4%")
122
 
123
+ # 指定 type="filepath" 讓輸入穩定為路徑字串(NamedString)
124
+ inp = gr.File(label="上傳 .xlsx 或 .csv 檔案", file_types=[".xlsx", ".csv"], type="filepath")
125
  run_btn = gr.Button("開始處理", variant="primary")
126
 
127
  file_out = gr.File(label="下載處理後的 Excel", visible=False)
128
  msg = gr.Markdown()
129
+ preview = gr.Dataframe(label="預覽(前 20 列)", wrap=True, height=300)
130
 
131
+ def run_pipeline(file_path_str):
132
+ if not file_path_str:
133
  return gr.update(visible=False), "請先上傳檔案。", pd.DataFrame()
134
 
135
  try:
136
+ df = load_dataframe(file_path_str)
137
  out = extract_and_rename(df)
138
  except Exception as e:
139
  return gr.update(visible=False), f"處理失敗:{e}", pd.DataFrame()
140
 
 
141
  ts = datetime.now().strftime("%Y%m%d_%H%M%S")
142
  out_path = f"/tmp/extracted_columns_{ts}.xlsx"
143
  out.to_excel(out_path, index=False, engine="openpyxl")
144
 
 
145
  return gr.update(value=out_path, visible=True), "完成!下方預覽、右側可下載 Excel。", out.head(20)
146
 
147
  run_btn.click(run_pipeline, inputs=[inp], outputs=[file_out, msg, preview])
148
 
149
  if __name__ == "__main__":
 
150
  demo.launch()