Kung-Hsun commited on
Commit
552e59e
·
verified ·
1 Parent(s): 959cda2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -0
app.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import io
4
+ import os
5
+ from datetime import datetime
6
+
7
+ # 需要擷取的 Excel 欄位(用 Excel 字母定位);CSV 也以欄位「位置」處理
8
+ EXCEL_LETTERS = ["A", "B", "K", "L", "M", "V", "W", "X", "Y"]
9
+ TARGET_NAMES = ["data", "time", "⊿Ptop", "⊿Pmid", "⊿Pbot", "H2%", "CO%", "CO2%", "CH4%"]
10
+
11
+ def letters_to_index_zero_based(letter: str) -> int:
12
+ """將 Excel 欄位字母轉成 0-based index(A->0, B->1, ..., Z->25, AA->26, ...)"""
13
+ idx = 0
14
+ for ch in letter.upper():
15
+ idx = idx * 26 + (ord(ch) - ord('A') + 1)
16
+ return idx - 1
17
+
18
+ TARGET_INDICES = [letters_to_index_zero_based(L) for L in EXCEL_LETTERS]
19
+
20
+ def load_dataframe(file_obj) -> pd.DataFrame:
21
+ """根據副檔名讀入 DataFrame;xlsx 用 openpyxl、csv 用 pandas 讀入"""
22
+ name = getattr(file_obj, "name", None) or ""
23
+ lower = name.lower()
24
+ if lower.endswith(".xlsx") or lower.endswith(".xls"):
25
+ # 讀第一個工作表
26
+ return pd.read_excel(file_obj, engine="openpyxl")
27
+ elif lower.endswith(".csv"):
28
+ # 盡量自動偵測編碼與分隔符號(若已知規格,可固化)
29
+ # 這裡採用最常見的 UTF-8 與逗號
30
+ return pd.read_csv(file_obj)
31
+ else:
32
+ # 嘗試以 Excel 讀取;失敗再嘗試 CSV
33
+ try:
34
+ file_obj.seek(0)
35
+ return pd.read_excel(file_obj, engine="openpyxl")
36
+ except Exception:
37
+ file_obj.seek(0)
38
+ return pd.read_csv(file_obj)
39
+
40
+ def extract_and_rename(df: pd.DataFrame):
41
+ """
42
+ 以「欄位位置」擷取 A,B,K,L,M,V,W,X,Y(即 0,1,10,11,12,21,22,23,24)。
43
+ 無論原始是否有標題,都以位置切片,再以 TARGET_NAMES 依序命名。
44
+ 若原始欄數不足,僅輸出可取得的子集,並對應命名。
45
+ """
46
+ n_cols = df.shape[1]
47
+ existing_positions = [i for i in TARGET_INDICES if i < n_cols]
48
+ if not existing_positions:
49
+ raise ValueError("上傳的資料欄位數不足,無法擷取指定欄位(A,B,K,L,M,V,W,X,Y)。")
50
+
51
+ # 依存在的欄位位置切片
52
+ out = df.iloc[:, existing_positions].copy()
53
+
54
+ # 對應名稱:以 positions 在 TARGET_INDICES 中的相對順序,對應到 TARGET_NAMES
55
+ name_map = []
56
+ for pos in existing_positions:
57
+ idx_in_targets = TARGET_INDICES.index(pos)
58
+ name_map.append(TARGET_NAMES[idx_in_targets])
59
+
60
+ out.columns = name_map
61
+ return out
62
+
63
+ def process(file_obj):
64
+ if file_obj is None:
65
+ return None, "請先上傳檔案。"
66
+
67
+ # 讀檔
68
+ df = load_dataframe(file_obj)
69
+
70
+ # 擷取與命名
71
+ try:
72
+ out = extract_and_rename(df)
73
+ except Exception as e:
74
+ return None, f"處理失敗:{e}"
75
+
76
+ # 匯出為 Excel,並回傳供下載
77
+ buffer = io.BytesIO()
78
+ out.to_excel(buffer, index=False, engine="openpyxl")
79
+ buffer.seek(0)
80
+
81
+ # 讓 Gradio 以檔案形式輸出(會自帶下載按鈕)
82
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
83
+ download_name = f"extracted_columns_{timestamp}.xlsx"
84
+
85
+ return (gr.File.update(value=buffer, visible=True, filename=download_name),
86
+ "完成!下方可預覽前幾列,右側可下載 Excel。")
87
+
88
+ with gr.Blocks(title="Excel/CSV 指定欄位擷取器") as demo:
89
+ gr.Markdown("### 指定欄位擷取(A,B,K,L,M,V,W,X,Y)→ 重新命名為 data,time,⊿Ptop,⊿Pmid,⊿Pbot,H2%,CO%,CO2%,CH4%")
90
+
91
+ with gr.Row():
92
+ inp = gr.File(label="上傳 .xlsx 或 .csv 檔案", file_types=[".xlsx", ".csv"])
93
+
94
+ with gr.Row():
95
+ run_btn = gr.Button("開始處理", variant="primary")
96
+
97
+ with gr.Row():
98
+ file_out = gr.File(label="下載處理後的 Excel", visible=False)
99
+ msg = gr.Markdown()
100
+
101
+ with gr.Row():
102
+ preview = gr.Dataframe(label="預覽(前 20 列)", wrap=True, height=300)
103
+
104
+ def run_pipeline(file_obj):
105
+ file_ret, text = process(file_obj)
106
+ # 額外提供預覽
107
+ df = load_dataframe(file_obj)
108
+ try:
109
+ out = extract_and_rename(df)
110
+ prev = out.head(20)
111
+ except Exception:
112
+ prev = pd.DataFrame()
113
+ return file_ret, text, prev
114
+
115
+ run_btn.click(run_pipeline, inputs=[inp], outputs=[file_out, msg, preview])
116
+
117
+ if __name__ == "__main__":
118
+ demo.launch()