Kung-Hsun commited on
Commit
db1add2
·
verified ·
1 Parent(s): 513da39

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -66
app.py CHANGED
@@ -2,14 +2,13 @@ import gradio as gr
2
  import pandas as pd
3
  import io
4
  import os
 
5
  from datetime import datetime
6
 
7
- # 需要擷取的 Excel 欄位(用 Excel 字母定位);CSV 也以欄位「位置」處理
8
  EXCEL_LETTERS = ["A", "B", "K", "L", "M", "V", "W", "X", "Y"]
9
  TARGET_NAMES = ["data", "time", "⊿Ptop", "⊿Pmid", "⊿Pbot", "H2%", "CO%", "CO2%", "CH4%"]
10
 
11
  def letters_to_index_zero_based(letter: str) -> int:
12
- """將 Excel 欄位字母轉成 0-based index(A->0, B->1, ..., Z->25, AA->26, ...)"""
13
  idx = 0
14
  for ch in letter.upper():
15
  idx = idx * 26 + (ord(ch) - ord('A') + 1)
@@ -18,40 +17,46 @@ def letters_to_index_zero_based(letter: str) -> int:
18
  TARGET_INDICES = [letters_to_index_zero_based(L) for L in EXCEL_LETTERS]
19
 
20
  def load_dataframe(file_obj) -> pd.DataFrame:
21
- """根據副檔名讀入 DataFrame;xlsx openpyxl、csv pandas 讀入"""
22
- name = getattr(file_obj, "name", None) or ""
23
  lower = name.lower()
 
 
 
 
 
 
24
  if lower.endswith(".xlsx") or lower.endswith(".xls"):
25
- # 讀第一個工作表
26
- return pd.read_excel(file_obj, engine="openpyxl")
27
  elif lower.endswith(".csv"):
28
- # 盡量自動偵測編碼與分隔符號已知規格,可固化)
29
- # 這裡採用最常見的 UTF-8 與逗號
30
- return pd.read_csv(file_obj)
 
 
 
 
31
  else:
32
- # 嘗試 Excel 讀取;失敗再嘗試 CSV
33
  try:
34
- file_obj.seek(0)
35
- return pd.read_excel(file_obj, engine="openpyxl")
36
  except Exception:
37
- file_obj.seek(0)
38
- return pd.read_csv(file_obj)
39
-
40
- def extract_and_rename(df: pd.DataFrame):
41
- """
42
- 以「欄位位置」擷取 A,B,K,L,M,V,W,X,Y(即 0,1,10,11,12,21,22,23,24)。
43
- 無論原始是否有標題,都以位置切片,再以 TARGET_NAMES 依序命名。
44
- 若原始欄數不足,僅輸出可取得的子集,並對應命名。
45
- """
46
  n_cols = df.shape[1]
47
  existing_positions = [i for i in TARGET_INDICES if i < n_cols]
48
  if not existing_positions:
49
  raise ValueError("上傳的資料欄位數不足,無法擷取指定欄位(A,B,K,L,M,V,W,X,Y)。")
50
 
51
- # 依存在的欄位位置切片
52
  out = df.iloc[:, existing_positions].copy()
53
 
54
- # 對應名稱:以 positions 在 TARGET_INDICES 中的相對順序,對應到 TARGET_NAMES
55
  name_map = []
56
  for pos in existing_positions:
57
  idx_in_targets = TARGET_INDICES.index(pos)
@@ -60,59 +65,37 @@ def extract_and_rename(df: pd.DataFrame):
60
  out.columns = name_map
61
  return out
62
 
63
- def process(file_obj):
64
- if file_obj is None:
65
- return None, "請先上傳檔案。"
66
-
67
- # 讀檔
68
- df = load_dataframe(file_obj)
69
-
70
- # 擷取與命名
71
- try:
72
- out = extract_and_rename(df)
73
- except Exception as e:
74
- return None, f"處理失敗:{e}"
75
-
76
- # 匯出為 Excel,並回傳供下載
77
- buffer = io.BytesIO()
78
- out.to_excel(buffer, index=False, engine="openpyxl")
79
- buffer.seek(0)
80
-
81
- # 讓 Gradio 以檔案形式輸出(會自帶下載按鈕)
82
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
83
- download_name = f"extracted_columns_{timestamp}.xlsx"
84
-
85
- return (gr.File.update(value=buffer, visible=True, filename=download_name),
86
- "完成!下方可預覽前幾列,右側可下載 Excel。")
87
-
88
  with gr.Blocks(title="Excel/CSV 指定欄位擷取器") as demo:
89
  gr.Markdown("### 指定欄位擷取(A,B,K,L,M,V,W,X,Y)→ 重新命名為 data,time,⊿Ptop,⊿Pmid,⊿Pbot,H2%,CO%,CO2%,CH4%")
90
 
91
- with gr.Row():
92
- inp = gr.File(label="上傳 .xlsx 或 .csv 檔案", file_types=[".xlsx", ".csv"])
93
-
94
- with gr.Row():
95
- run_btn = gr.Button("開始處理", variant="primary")
96
-
97
- with gr.Row():
98
- file_out = gr.File(label="下載處理後的 Excel", visible=False)
99
- msg = gr.Markdown()
100
 
101
- with gr.Row():
102
- preview = gr.Dataframe(label="預覽(前 20 列)", wrap=True)
 
103
 
104
  def run_pipeline(file_obj):
105
- file_ret, text = process(file_obj)
106
- # 額外提供預覽
107
- df = load_dataframe(file_obj)
108
  try:
 
109
  out = extract_and_rename(df)
110
- prev = out.head(20)
111
- except Exception:
112
- prev = pd.DataFrame()
113
- return file_ret, text, prev
 
 
 
 
 
 
114
 
115
  run_btn.click(run_pipeline, inputs=[inp], outputs=[file_out, msg, preview])
116
 
117
  if __name__ == "__main__":
 
118
  demo.launch()
 
 
2
  import pandas as pd
3
  import io
4
  import os
5
+ import tempfile
6
  from datetime import datetime
7
 
 
8
  EXCEL_LETTERS = ["A", "B", "K", "L", "M", "V", "W", "X", "Y"]
9
  TARGET_NAMES = ["data", "time", "⊿Ptop", "⊿Pmid", "⊿Pbot", "H2%", "CO%", "CO2%", "CH4%"]
10
 
11
  def letters_to_index_zero_based(letter: str) -> int:
 
12
  idx = 0
13
  for ch in letter.upper():
14
  idx = idx * 26 + (ord(ch) - ord('A') + 1)
 
17
  TARGET_INDICES = [letters_to_index_zero_based(L) for L in EXCEL_LETTERS]
18
 
19
  def load_dataframe(file_obj) -> pd.DataFrame:
20
+ name = getattr(file_obj, "name", "") or ""
 
21
  lower = name.lower()
22
+
23
+ # 先把檔案內容讀到記憶體,避免多次讀取時指標位置問題
24
+ file_obj.seek(0)
25
+ raw = file_obj.read()
26
+ bio = io.BytesIO(raw)
27
+
28
  if lower.endswith(".xlsx") or lower.endswith(".xls"):
29
+ bio.seek(0)
30
+ return pd.read_excel(bio, engine="openpyxl")
31
  elif lower.endswith(".csv"):
32
+ # 嘗試自動分隔符號失敗再退回逗號
33
+ try:
34
+ bio.seek(0)
35
+ return pd.read_csv(bio, sep=None, engine="python")
36
+ except Exception:
37
+ bio.seek(0)
38
+ return pd.read_csv(bio)
39
  else:
40
+ # 嘗試 Excel -> CSV
41
  try:
42
+ bio.seek(0)
43
+ return pd.read_excel(bio, engine="openpyxl")
44
  except Exception:
45
+ bio.seek(0)
46
+ try:
47
+ return pd.read_csv(bio, sep=None, engine="python")
48
+ except Exception:
49
+ bio.seek(0)
50
+ return pd.read_csv(bio)
51
+
52
+ def extract_and_rename(df: pd.DataFrame) -> pd.DataFrame:
 
53
  n_cols = df.shape[1]
54
  existing_positions = [i for i in TARGET_INDICES if i < n_cols]
55
  if not existing_positions:
56
  raise ValueError("上傳的資料欄位數不足,無法擷取指定欄位(A,B,K,L,M,V,W,X,Y)。")
57
 
 
58
  out = df.iloc[:, existing_positions].copy()
59
 
 
60
  name_map = []
61
  for pos in existing_positions:
62
  idx_in_targets = TARGET_INDICES.index(pos)
 
65
  out.columns = name_map
66
  return out
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  with gr.Blocks(title="Excel/CSV 指定欄位擷取器") as demo:
69
  gr.Markdown("### 指定欄位擷取(A,B,K,L,M,V,W,X,Y)→ 重新命名為 data,time,⊿Ptop,⊿Pmid,⊿Pbot,H2%,CO%,CO2%,CH4%")
70
 
71
+ inp = gr.File(label="上傳 .xlsx 或 .csv 檔案", file_types=[".xlsx", ".csv"])
72
+ run_btn = gr.Button("開始處理", variant="primary")
 
 
 
 
 
 
 
73
 
74
+ file_out = gr.File(label="下載處理後的 Excel", visible=False)
75
+ msg = gr.Markdown()
76
+ preview = gr.Dataframe(label="預覽(前 20 列)", wrap=True)
77
 
78
  def run_pipeline(file_obj):
79
+ if file_obj is None:
80
+ return gr.update(visible=False), "請先上傳檔案。", pd.DataFrame()
81
+
82
  try:
83
+ df = load_dataframe(file_obj)
84
  out = extract_and_rename(df)
85
+ except Exception as e:
86
+ return gr.update(visible=False), f"處理失敗:{e}", pd.DataFrame()
87
+
88
+ # 寫到 /tmp 並用檔名控制下載時的顯示名稱
89
+ ts = datetime.now().strftime("%Y%m%d_%H%M%S")
90
+ out_path = f"/tmp/extracted_columns_{ts}.xlsx"
91
+ out.to_excel(out_path, index=False, engine="openpyxl")
92
+
93
+ # 回傳:讓 File 元件顯示下載連結(用 gr.update,而不是 File.update)
94
+ return gr.update(value=out_path, visible=True), "完成!下方預覽、右側可下載 Excel。", out.head(20)
95
 
96
  run_btn.click(run_pipeline, inputs=[inp], outputs=[file_out, msg, preview])
97
 
98
  if __name__ == "__main__":
99
+ # 在 Hugging Face Spaces 不需要 share;若本機想避免 SSR 訊息可加 ssr_mode=False
100
  demo.launch()
101
+ # demo.launch(ssr_mode=False) # 若你想關閉啟動訊息中的 SSR ⚡ 提示