Kung-Hsun's picture
Update app.py
e0838b4 verified
import gradio as gr
import pandas as pd
import numpy as np
import io
import os
from datetime import datetime, time
from typing import Union, Optional, Tuple, Dict, List
import plotly.graph_objects as go
# ====== 常數與預設 ======
EXCEL_LETTERS = ["A", "B", "K", "L", "M", "V", "W", "X", "Y"]
TARGET_NAMES = ["data", "time", "⊿Ptop", "⊿Pmid", "⊿Pbot", "H2%", "CO%", "CO2%", "CH4%"]
DEFAULT_COLORS = [
"#1f77b4","#ff7f0e","#2ca02c","#d62728","#9467bd",
"#8c564b","#e377c2","#7f7f7f","#bcbd22","#17becf"
]
DASH_OPTIONS = ["solid","dot","dash","longdash","dashdot","longdashdot"]
PALETTES = {
"Plotly10": DEFAULT_COLORS,
"Tableau10": ["#4E79A7","#F28E2B","#E15759","#76B7B2","#59A14F","#EDC948","#B07AA1","#FF9DA7","#9C755F","#BAB0AC"],
"Set2": ["#66c2a5","#fc8d62","#8da0cb","#e78ac3","#a6d854","#ffd92f","#e5c494","#b3b3b3"],
"Dark2": ["#1b9e77","#d95f02","#7570b3","#e7298a","#66a61e","#e6ab02","#a6761d","#666666"],
"Okabe-Ito (CB-safe)": ["#0072B2","#E69F00","#009E73","#D55E00","#CC79A7","#F0E442","#56B4E9","#000000"]
}
MAX_SERIES = 10 # 顏色/線型/寬度控制最多顯示前 10 條
# ====== 工具函式 ======
def letters_to_index_zero_based(letter: str) -> int:
idx = 0
for ch in letter.upper():
idx = idx * 26 + (ord(ch) - ord('A') + 1)
return idx - 1
TARGET_INDICES = [letters_to_index_zero_based(L) for L in EXCEL_LETTERS]
def get_lower_name(file_input: Union[str, os.PathLike, io.BytesIO, bytes, object]) -> str:
if isinstance(file_input, (str, os.PathLike)):
return os.path.basename(str(file_input)).lower()
name_attr = getattr(file_input, "name", None)
if isinstance(name_attr, (str, os.PathLike)):
return os.path.basename(str(name_attr)).lower()
return ""
def load_dataframe(file_input) -> pd.DataFrame:
lower_name = get_lower_name(file_input)
if isinstance(file_input, (str, os.PathLike)):
path = str(file_input)
if lower_name.endswith((".xlsx", ".xls")):
return pd.read_excel(path, engine="openpyxl")
elif lower_name.endswith(".csv"):
try:
return pd.read_csv(path, sep=None, engine="python")
except Exception:
return pd.read_csv(path)
else:
try:
return pd.read_excel(path, engine="openpyxl")
except Exception:
try:
return pd.read_csv(path, sep=None, engine="python")
except Exception:
return pd.read_csv(path)
if hasattr(file_input, "read"):
raw = file_input.read()
bio = io.BytesIO(raw)
if lower_name.endswith((".xlsx", ".xls")):
bio.seek(0); return pd.read_excel(bio, engine="openpyxl")
elif lower_name.endswith(".csv"):
try:
bio.seek(0); return pd.read_csv(bio, sep=None, engine="python")
except Exception:
bio.seek(0); return pd.read_csv(bio)
else:
try:
bio.seek(0); return pd.read_excel(bio, engine="openpyxl")
except Exception:
try:
bio.seek(0); return pd.read_csv(bio, sep=None, engine="python")
except Exception:
bio.seek(0); return pd.read_csv(bio)
if isinstance(file_input, (bytes, bytearray)):
bio = io.BytesIO(file_input)
try:
bio.seek(0); return pd.read_excel(bio, engine="openpyxl")
except Exception:
try:
bio.seek(0); return pd.read_csv(bio, sep=None, engine="python")
except Exception:
bio.seek(0); return pd.read_csv(bio)
raise ValueError("不支援的檔案型態,請上傳 .xlsx 或 .csv 檔。")
def extract_and_rename(df: pd.DataFrame) -> pd.DataFrame:
n_cols = df.shape[1]
existing_positions = [i for i in TARGET_INDICES if i < n_cols]
if not existing_positions:
raise ValueError("上傳的資料欄位數不足,無法擷取指定欄位(A,B,K,L,M,V,W,X,Y)。")
out = df.iloc[:, existing_positions].copy()
name_map = []
for pos in existing_positions:
idx_in_targets = TARGET_INDICES.index(pos)
name_map.append(TARGET_NAMES[idx_in_targets])
out.columns = name_map
return out
def clamp_int(x, lo, hi):
if x is None or (isinstance(x, str) and x.strip() == ""):
return None
try:
xi = int(float(x))
except Exception:
raise ValueError("時間欄位需為數字(整數)")
return max(lo, min(hi, xi))
def parse_time_to_seconds(h, m, s):
h = clamp_int(h, 0, 23)
m = clamp_int(m, 0, 59)
s = clamp_int(s, 0, 59)
if h is None or m is None or s is None:
return None
return h * 3600 + m * 60 + s
def _hhmmss_int_to_seconds(n: int):
if n < 0 or n > 235959: return pd.NA
ss = n % 100; n //= 100
mm = n % 100; n //= 100
hh = n % 100
if 0 <= hh <= 23 and 0 <= mm <= 59 and 0 <= ss <= 59:
return hh*3600 + mm*60 + ss
return pd.NA
def series_time_to_seconds_of_day(series: pd.Series) -> pd.Series:
s = series.copy()
if pd.api.types.is_datetime64_any_dtype(s):
return (s.dt.hour*3600 + s.dt.minute*60 + s.dt.second).astype("float")
if pd.api.types.is_timedelta64_dtype(s):
total_sec = s.dt.total_seconds()
return (total_sec % 86400).astype("float")
parsed = pd.to_datetime(s, errors="coerce")
sec_parsed = (parsed.dt.hour*3600 + parsed.dt.minute*60 + parsed.dt.second).astype("float")
num = pd.to_numeric(s, errors="coerce")
sec_excel = ((num % 1) * 86400).round().astype("float")
result = sec_parsed.where(~sec_parsed.isna(), other=np.nan)
result = np.where(np.isnan(result), sec_excel, result)
result = pd.Series(result, index=s.index, dtype="float")
mask_intlike = num.notna() & (num == np.floor(num))
sec_hhmmss = pd.Series(np.nan, index=s.index, dtype="float")
if mask_intlike.any():
ints = num[mask_intlike].astype("int64")
sec_hhmmss.loc[mask_intlike] = ints.map(_hhmmss_int_to_seconds).astype("float")
fill_mask = result.isna() & sec_hhmmss.notna()
result.loc[fill_mask] = sec_hhmmss.loc[fill_mask]
if result.isna().any():
obj_mask = result.isna()
subset = s[obj_mask]
def time_obj_to_sec(x):
if isinstance(x, time):
return x.hour*3600 + x.minute*60 + x.second
return np.nan
result.loc[obj_mask] = subset.map(time_obj_to_sec)
return result.astype("float")
def pad_time(h, m, s):
def to2(x): return "??" if x is None else f"{int(x):02d}"
return f"{to2(h)}:{to2(m)}:{to2(s)}"
def parse_hhmmss_or_number(s: Optional[str]) -> Optional[float]:
if s is None: return None
if isinstance(s, (int, float)): return float(s)
text = str(s).strip()
if text == "": return None
if ":" in text:
parts = text.split(":")
try:
if len(parts) == 2:
mm, ss = int(parts[0]), int(parts[1]); return mm*60 + ss
elif len(parts) == 3:
hh, mm, ss = int(parts[0]), int(parts[1]), int(parts[2]); return hh*3600 + mm*60 + ss
except Exception:
return None
try:
return float(text)
except Exception:
return None
def prepare_x_series(df: pd.DataFrame, x_col: str) -> Tuple[pd.Series, bool]:
x = df[x_col]
if x_col == "time" or x.dtype == object:
secs = series_time_to_seconds_of_day(x)
x_dt = pd.to_datetime(secs, unit="s", origin="unix", errors="coerce")
return x_dt, True
else:
num = pd.to_numeric(x, errors="coerce")
return num, False
def styles_to_map(y_cols: List[str],
colors: List[Optional[str]],
dashes: List[Optional[str]],
widths: List[Optional[float]]) -> Dict[str, Dict]:
"""
最終用於繪圖的樣式:{series: {color, width, dash}}
- color:ColorPicker
- dash:Dropdown
- width:Number
"""
m: Dict[str, Dict] = {}
for i, s in enumerate(y_cols):
col = colors[i] if i < len(colors) and colors[i] else DEFAULT_COLORS[i % len(DEFAULT_COLORS)]
dash = dashes[i] if i < len(dashes) and dashes[i] in DASH_OPTIONS else "solid"
try:
w = float(widths[i]) if i < len(widths) and widths[i] is not None and str(widths[i]).strip() != "" else 2.0
except Exception:
w = 2.0
m[s] = {"color": col, "width": w, "dash": dash}
return m
def make_line_figure(df: pd.DataFrame, x_col: str, y_cols: list,
fig_w: int, fig_h: int,
auto_x: bool, x_min: Optional[str], x_max: Optional[str], x_dtick: Optional[str],
auto_y: bool, y_min: Optional[str], y_max: Optional[str], y_dtick: Optional[str],
style_map: Dict[str, Dict]):
if df is None or len(df) == 0:
raise ValueError("沒有可繪圖的資料。")
if not x_col or not y_cols:
raise ValueError("請選擇 X 與至少一個 Y 欄位。")
for c in [x_col, *y_cols]:
if c not in df.columns:
raise ValueError(f"找不到欄位:{c}")
x_series, is_time = prepare_x_series(df, x_col)
if x_series.notna().sum() < 2:
raise ValueError("X 軸無法解析為有效序列或點數不足。")
fig = go.Figure()
for idx, y_col in enumerate(y_cols):
y = pd.to_numeric(df[y_col], errors="coerce")
mask = x_series.notna() & y.notna()
if mask.sum() < 2:
continue
st = style_map.get(y_col, {"color": DEFAULT_COLORS[idx % len(DEFAULT_COLORS)],
"width": 2.0, "dash": "solid"})
fig.add_trace(
go.Scatter(
x=x_series[mask],
y=y[mask],
mode="lines",
name=y_col,
line=dict(color=st["color"], width=st["width"], dash=st["dash"])
)
)
fig.update_layout(
width=int(fig_w) if fig_w else None,
height=int(fig_h) if fig_h else None,
margin=dict(l=60, r=20, t=40, b=60),
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
hovermode="x unified"
)
# X 軸
xaxis = {}
if is_time:
xaxis["type"] = "date"
xaxis["tickformat"] = "%H:%M:%S"
if not auto_x:
xmin_s = parse_hhmmss_or_number(x_min)
xmax_s = parse_hhmmss_or_number(x_max)
if xmin_s is not None and xmax_s is not None:
xmin_dt = pd.to_datetime(xmin_s, unit="s", origin="unix")
xmax_dt = pd.to_datetime(xmax_s, unit="s", origin="unix")
xaxis["range"] = [xmin_dt, xmax_dt]
dtick_s = parse_hhmmss_or_number(x_dtick)
if dtick_s and dtick_s > 0:
xaxis["dtick"] = dtick_s * 1000.0
else:
if not auto_x:
xmin = parse_hhmmss_or_number(x_min)
xmax = parse_hhmmss_or_number(x_max)
if xmin is not None and xmax is not None:
xaxis["range"] = [xmin, xmax]
dtick = parse_hhmmss_or_number(x_dtick)
if dtick and dtick > 0:
xaxis["dtick"] = dtick
fig.update_xaxes(**xaxis)
# Y 軸
yaxis = {}
if not auto_y:
ymin = parse_hhmmss_or_number(y_min)
ymax = parse_hhmmss_or_number(y_max)
if ymin is not None and ymax is not None:
yaxis["range"] = [ymin, ymax]
y_dt = parse_hhmmss_or_number(y_dtick)
if y_dt and y_dt > 0:
yaxis["dtick"] = y_dt
fig.update_yaxes(**yaxis)
fig.update_yaxes(showgrid=True)
fig.update_xaxes(showgrid=True)
return fig
# ====== 介面 ======
with gr.Blocks(title="Excel/CSV 指定欄位擷取器 (Biomass gasification-單槽系統使用)") as demo:
gr.Markdown("### 自動擷取欄位(A,B,K,L,M,V,W,X,Y)轉換為 data, time, ⊿Ptop, ⊿Pmid, ⊿Pbot, H2%, CO%, CO2%, CH4%。")
df_state = gr.State(value=None)
inp = gr.File(label="上傳 .xlsx 或 .csv 檔案", file_types=[".xlsx", ".csv"], type="filepath")
with gr.Row():
gr.Markdown("**開始時間 (hh:mm:ss)**")
with gr.Row():
sh = gr.Number(label="Start HH (0-23)", value="")
sm = gr.Number(label="Start MM (0-59)", value="")
ss = gr.Number(label="Start SS (0-59)", value="")
with gr.Row():
gr.Markdown("**結束時間 (hh:mm:ss)**")
with gr.Row():
eh = gr.Number(label="End HH (0-23)", value="")
em = gr.Number(label="End MM (0-59)", value="")
es = gr.Number(label="End SS (0-59)", value="")
run_btn = gr.Button("開始處理", variant="primary")
file_out = gr.File(label="下載處理後的 Excel", visible=False)
msg = gr.Markdown()
preview = gr.Dataframe(label="預覽(前 20 列)", wrap=True)
gr.Markdown("### 互動線圖設定")
with gr.Row():
x_sel = gr.Dropdown(label="X 軸欄位", choices=[], value=None)
y_sel = gr.Dropdown(label="Y 軸欄位(可複選)", choices=[], value=None, multiselect=True)
with gr.Accordion("外觀與座標調整", open=False):
with gr.Row():
fig_w = gr.Number(label="圖寬 (px)", value=900)
fig_h = gr.Number(label="圖高 (px)", value=500)
with gr.Row():
auto_x = gr.Checkbox(label="X 自動範圍", value=True)
x_min = gr.Textbox(label="X 最小(time: hh:mm[:ss] 或 數值)", value="")
x_max = gr.Textbox(label="X 最大(time: hh:mm[:ss] 或 數值)", value="")
x_dtick = gr.Textbox(label="X 刻度間距 dtick(time: hh:mm[:ss];數值:數字)", value="")
with gr.Row():
auto_y = gr.Checkbox(label="Y 自動範圍", value=True)
y_min = gr.Textbox(label="Y 最小(數值)", value="")
y_max = gr.Textbox(label="Y 最大(數值)", value="")
y_dtick = gr.Textbox(label="Y 刻度間距 dtick(數值)", value="")
# ---- 樣式控制(每條線一組:ColorPicker + Dash Dropdown + Width Number) ----
gr.Markdown("### 樣式(前 10 條線):顏色 / 線型 / 線寬")
with gr.Row():
palette_dd = gr.Dropdown(label="色盤", choices=list(PALETTES.keys()), value="Okabe-Ito (CB-safe)")
apply_palette_btn = gr.Button("套用色盤到 Y(依序)")
# 兩排 × 5 組,每組三個控制:ColorPicker + Dropdown + Number
color_pickers, dash_dds, width_nums = [], [], []
for row in range(2):
with gr.Row():
for i in range(5):
idx = row*5 + i
with gr.Column(scale=1):
color_pickers.append(gr.ColorPicker(label=f"系列 {idx+1} 顏色", value="#000000", visible=False))
dash_dds.append(gr.Dropdown(label=f"系列 {idx+1} 線型", choices=DASH_OPTIONS, value="solid", visible=False))
width_nums.append(gr.Number(label=f"系列 {idx+1} 線寬", value=2.0, visible=False))
plot_btn = gr.Button("繪製線圖(互動)")
plot_out = gr.Plot(label="互動線圖")
plot_msg = gr.Markdown()
# --------- 回呼邏輯 ---------
def run_pipeline(file_path_str, sh_, sm_, ss_, eh_, em_, es_):
# 預設隱藏樣式控制
hidden_colors = [gr.update(visible=False) for _ in range(MAX_SERIES)]
hidden_dashes = [gr.update(visible=False) for _ in range(MAX_SERIES)]
hidden_widths = [gr.update(visible=False) for _ in range(MAX_SERIES)]
if not file_path_str:
return (gr.update(visible=False), "請先上傳檔案。", pd.DataFrame(),
None, gr.update(choices=[], value=None), gr.update(choices=[], value=None),
*hidden_colors, *hidden_dashes, *hidden_widths)
try:
df = load_dataframe(file_path_str)
out = extract_and_rename(df)
except Exception as e:
return (gr.update(visible=False), f"處理失敗:{e}", pd.DataFrame(),
None, gr.update(choices=[], value=None), gr.update(choices=[], value=None),
*hidden_colors, *hidden_dashes, *hidden_widths)
original_rows = len(out)
try:
start_sec = parse_time_to_seconds(sh_, sm_, ss_)
end_sec = parse_time_to_seconds(eh_, em_, es_)
except Exception as e:
return (gr.update(visible=False), f"時間輸入錯誤:{e}", pd.DataFrame(),
None, gr.update(choices=[], value=None), gr.update(choices=[], value=None),
*hidden_colors, *hidden_dashes, *hidden_widths)
parsed_ok = None
if (start_sec is not None) and (end_sec is not None):
if "time" not in out.columns:
return (gr.update(visible=False), "找不到 'time' 欄,無法做時間過濾。", pd.DataFrame(),
None, gr.update(choices=[], value=None), gr.update(choices=[], value=None),
*hidden_colors, *hidden_dashes, *hidden_widths)
secs = series_time_to_seconds_of_day(out["time"])
parsed_ok = int(secs.notna().sum())
valid_mask = secs.notna()
secs_valid = secs.where(valid_mask, other=-1)
if start_sec <= end_sec:
keep = valid_mask & (secs_valid >= start_sec) & (secs_valid <= end_sec)
else:
keep = valid_mask & ((secs_valid >= start_sec) | (secs_valid <= end_sec))
out = out.loc[keep].reset_index(drop=True)
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
out_path = f"/tmp/extracted_columns_{ts}.xlsx"
try:
out.to_excel(out_path, index=False, engine="openpyxl")
except Exception as e:
return (gr.update(visible=False), f"輸出 Excel 失敗:{e}", pd.DataFrame(),
None, gr.update(choices=[], value=None), gr.update(choices=[], value=None),
*hidden_colors, *hidden_dashes, *hidden_widths)
cols = out.columns.tolist()
default_x = "time" if "time" in cols else (cols[0] if cols else None)
default_y = [c for c in ["H2%", "CO%", "CO2%", "CH4%"] if c in cols] or ([cols[1]] if len(cols) > 1 else cols)
note_lines = [f"完成!原始列數:**{original_rows}**",
f"輸出列數:**{len(out)}**"]
if parsed_ok is not None:
note_lines.insert(1, f"可解析時間列數:**{parsed_ok}**")
note_lines.insert(2, f"時間區段:**{pad_time(sh_, sm_, ss_)}{pad_time(eh_, em_, es_)}**")
note_lines.append("下方預覽、右側可下載;選欄位與樣式後繪圖。")
note = "|".join(note_lines)
# 初始顯示前 N 條控制
color_updates, dash_updates, width_updates = [], [], []
for i in range(MAX_SERIES):
if i < len(default_y):
series = default_y[i]
color_updates.append(gr.update(visible=True, value=DEFAULT_COLORS[i % len(DEFAULT_COLORS)], label=f"{series} 顏色"))
dash_updates.append(gr.update(visible=True, choices=DASH_OPTIONS, value="solid", label=f"{series} 線型"))
width_updates.append(gr.update(visible=True, value=2.0, label=f"{series} 線寬"))
else:
color_updates.append(gr.update(visible=False))
dash_updates.append(gr.update(visible=False))
width_updates.append(gr.update(visible=False))
return (
gr.update(value=out_path, visible=True),
note,
out.head(20),
out, # df_state
gr.update(choices=cols, value=default_x),
gr.update(choices=cols, value=default_y),
*color_updates,
*dash_updates,
*width_updates
)
def on_y_change(y_cols):
y_cols = y_cols or []
color_updates, dash_updates, width_updates = [], [], []
for i in range(MAX_SERIES):
if i < len(y_cols):
series = y_cols[i]
color_updates.append(gr.update(visible=True, value=DEFAULT_COLORS[i % len(DEFAULT_COLORS)], label=f"{series} 顏色"))
dash_updates.append(gr.update(visible=True, choices=DASH_OPTIONS, value="solid", label=f"{series} 線型"))
width_updates.append(gr.update(visible=True, value=2.0, label=f"{series} 寬度"))
else:
color_updates.append(gr.update(visible=False))
dash_updates.append(gr.update(visible=False))
width_updates.append(gr.update(visible=False))
return (*color_updates, *dash_updates, *width_updates)
def apply_palette(y_cols, palette_name):
y_cols = y_cols or []
pal = PALETTES.get(palette_name, DEFAULT_COLORS)
color_updates = []
for i in range(MAX_SERIES):
if i < len(y_cols):
color_updates.append(gr.update(visible=True, value=pal[i % len(pal)], label=f"{y_cols[i]} 顏色"))
else:
color_updates.append(gr.update(visible=False))
# 線型與寬度不改
dash_updates = []
width_updates = []
for i in range(MAX_SERIES):
if i < len(y_cols):
dash_updates.append(gr.update(visible=True, choices=DASH_OPTIONS, label=f"{y_cols[i]} 線型"))
width_updates.append(gr.update(visible=True, label=f"{y_cols[i]} 寬度"))
else:
dash_updates.append(gr.update(visible=False))
width_updates.append(gr.update(visible=False))
return (*color_updates, *dash_updates, *width_updates)
def plot_handler(df, x_col, y_cols, fig_w_, fig_h_,
auto_x_, x_min_, x_max_, x_dtick_,
auto_y_, y_min_, y_max_, y_dtick_,
*style_values):
if df is None:
return None, "尚未有可用資料,請先完成上方處理。"
# style_values: [colors (10), dashes (10), widths (10)] 依序
colors = list(style_values[:MAX_SERIES])
dashes = list(style_values[MAX_SERIES:MAX_SERIES*2])
widths = list(style_values[MAX_SERIES*2:MAX_SERIES*3])
try:
y_cols = y_cols or []
s_map = styles_to_map(y_cols, colors, dashes, widths)
fig = make_line_figure(
df, x_col, y_cols,
fig_w=int(fig_w_ or 900), fig_h=int(fig_h_ or 500),
auto_x=bool(auto_x_), x_min=x_min_, x_max=x_max_, x_dtick=x_dtick_,
auto_y=bool(auto_y_), y_min=y_min_, y_max=y_max_, y_dtick=y_dtick_,
style_map=s_map
)
return fig, f"完成繪圖:Y 數量 {len(y_cols)}。"
except Exception as e:
return None, f"繪圖失敗:{e}"
# 綁定事件
base_outputs = [file_out, msg, preview, df_state, x_sel, y_sel]
style_outputs = color_pickers + dash_dds + width_nums
run_btn.click(
run_pipeline,
inputs=[inp, sh, sm, ss, eh, em, es],
outputs=base_outputs + style_outputs
)
# Y 變更時同步顯示/標籤
y_sel.change(
on_y_change,
inputs=[y_sel],
outputs=style_outputs
)
# 套用色盤
apply_palette_btn.click(
apply_palette,
inputs=[y_sel, palette_dd],
outputs=style_outputs
)
# 繪圖(把所有樣式控制的值傳入)
plot_btn.click(
plot_handler,
inputs=[df_state, x_sel, y_sel, fig_w, fig_h, auto_x, x_min, x_max, x_dtick, auto_y, y_min, y_max, y_dtick] + style_outputs,
outputs=[plot_out, plot_msg]
)
if __name__ == "__main__":
demo.launch()