Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,8 +3,9 @@ import pandas as pd
|
|
| 3 |
import numpy as np
|
| 4 |
import io
|
| 5 |
import os
|
| 6 |
-
from datetime import datetime, time
|
| 7 |
from typing import Union
|
|
|
|
| 8 |
|
| 9 |
EXCEL_LETTERS = ["A", "B", "K", "L", "M", "V", "W", "X", "Y"]
|
| 10 |
TARGET_NAMES = ["data", "time", "⊿Ptop", "⊿Pmid", "⊿Pbot", "H2%", "CO%", "CO2%", "CH4%"]
|
|
@@ -27,8 +28,6 @@ def get_lower_name(file_input: Union[str, os.PathLike, io.BytesIO, bytes, object
|
|
| 27 |
|
| 28 |
def load_dataframe(file_input) -> pd.DataFrame:
|
| 29 |
lower_name = get_lower_name(file_input)
|
| 30 |
-
|
| 31 |
-
# 1) 路徑(含 NamedString)
|
| 32 |
if isinstance(file_input, (str, os.PathLike)):
|
| 33 |
path = str(file_input)
|
| 34 |
if lower_name.endswith((".xlsx", ".xls")):
|
|
@@ -47,45 +46,34 @@ def load_dataframe(file_input) -> pd.DataFrame:
|
|
| 47 |
except Exception:
|
| 48 |
return pd.read_csv(path)
|
| 49 |
|
| 50 |
-
# 2) 檔案物件(具 .read)
|
| 51 |
if hasattr(file_input, "read"):
|
| 52 |
raw = file_input.read()
|
| 53 |
bio = io.BytesIO(raw)
|
| 54 |
if lower_name.endswith((".xlsx", ".xls")):
|
| 55 |
-
bio.seek(0)
|
| 56 |
-
return pd.read_excel(bio, engine="openpyxl")
|
| 57 |
elif lower_name.endswith(".csv"):
|
| 58 |
try:
|
| 59 |
-
bio.seek(0)
|
| 60 |
-
return pd.read_csv(bio, sep=None, engine="python")
|
| 61 |
except Exception:
|
| 62 |
-
bio.seek(0)
|
| 63 |
-
return pd.read_csv(bio)
|
| 64 |
else:
|
| 65 |
try:
|
| 66 |
-
bio.seek(0)
|
| 67 |
-
return pd.read_excel(bio, engine="openpyxl")
|
| 68 |
except Exception:
|
| 69 |
try:
|
| 70 |
-
bio.seek(0)
|
| 71 |
-
return pd.read_csv(bio, sep=None, engine="python")
|
| 72 |
except Exception:
|
| 73 |
-
bio.seek(0)
|
| 74 |
-
return pd.read_csv(bio)
|
| 75 |
|
| 76 |
-
# 3) bytes
|
| 77 |
if isinstance(file_input, (bytes, bytearray)):
|
| 78 |
bio = io.BytesIO(file_input)
|
| 79 |
try:
|
| 80 |
-
bio.seek(0)
|
| 81 |
-
return pd.read_excel(bio, engine="openpyxl")
|
| 82 |
except Exception:
|
| 83 |
try:
|
| 84 |
-
bio.seek(0)
|
| 85 |
-
return pd.read_csv(bio, sep=None, engine="python")
|
| 86 |
except Exception:
|
| 87 |
-
bio.seek(0)
|
| 88 |
-
return pd.read_csv(bio)
|
| 89 |
|
| 90 |
raise ValueError("不支援的檔案型態,請上傳 .xlsx 或 .csv 檔。")
|
| 91 |
|
|
@@ -94,14 +82,11 @@ def extract_and_rename(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 94 |
existing_positions = [i for i in TARGET_INDICES if i < n_cols]
|
| 95 |
if not existing_positions:
|
| 96 |
raise ValueError("上傳的資料欄位數不足,無法擷取指定欄位(A,B,K,L,M,V,W,X,Y)。")
|
| 97 |
-
|
| 98 |
out = df.iloc[:, existing_positions].copy()
|
| 99 |
-
|
| 100 |
name_map = []
|
| 101 |
for pos in existing_positions:
|
| 102 |
idx_in_targets = TARGET_INDICES.index(pos)
|
| 103 |
name_map.append(TARGET_NAMES[idx_in_targets])
|
| 104 |
-
|
| 105 |
out.columns = name_map
|
| 106 |
return out
|
| 107 |
|
|
@@ -123,97 +108,126 @@ def parse_time_to_seconds(h, m, s):
|
|
| 123 |
return h * 3600 + m * 60 + s
|
| 124 |
|
| 125 |
def _hhmmss_int_to_seconds(n: int):
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
ss = n % 100
|
| 130 |
-
n //= 100
|
| 131 |
-
mm = n % 100
|
| 132 |
-
n //= 100
|
| 133 |
hh = n % 100
|
| 134 |
if 0 <= hh <= 23 and 0 <= mm <= 59 and 0 <= ss <= 59:
|
| 135 |
-
return hh
|
| 136 |
return pd.NA
|
| 137 |
|
| 138 |
def series_time_to_seconds_of_day(series: pd.Series) -> pd.Series:
|
| 139 |
-
"""
|
| 140 |
-
將 'time' 欄位轉成 0~86399 的秒數。
|
| 141 |
-
支援:
|
| 142 |
-
- pandas datetime64[ns] / datetime64[ns, tz]
|
| 143 |
-
- timedelta64[ns]
|
| 144 |
-
- 文字:'YYYY-mm-dd HH:MM:SS' / 'HH:MM:SS(.fff)' / 'AM/PM'
|
| 145 |
-
- Excel 序列(包含日期+時間,像 45213.5)
|
| 146 |
-
- 純數字 HHMMSS(93005 -> 09:30:05)
|
| 147 |
-
- Python datetime.time
|
| 148 |
-
未能解析者回傳 NaN。
|
| 149 |
-
"""
|
| 150 |
s = series.copy()
|
| 151 |
|
| 152 |
-
# 1) 若已是 datetime64,直接取時分秒
|
| 153 |
if pd.api.types.is_datetime64_any_dtype(s):
|
| 154 |
-
|
| 155 |
-
return sec
|
| 156 |
|
| 157 |
-
# 2) 若是 timedelta(少見),取一天內秒數
|
| 158 |
if pd.api.types.is_timedelta64_dtype(s):
|
| 159 |
total_sec = s.dt.total_seconds()
|
| 160 |
return (total_sec % 86400).astype("float")
|
| 161 |
|
| 162 |
-
# 3) 嘗試一般字串/物件 → datetime
|
| 163 |
parsed = pd.to_datetime(s, errors="coerce")
|
| 164 |
-
sec_parsed = (parsed.dt.hour
|
| 165 |
|
| 166 |
-
# 4) Excel 序列時間(含日期部分),任何數值都取小數部分 * 86400
|
| 167 |
num = pd.to_numeric(s, errors="coerce")
|
| 168 |
-
sec_excel = ((num % 1) * 86400).round().astype("float")
|
| 169 |
-
|
| 170 |
result = sec_parsed.where(~sec_parsed.isna(), other=np.nan)
|
| 171 |
result = np.where(np.isnan(result), sec_excel, result)
|
| 172 |
result = pd.Series(result, index=s.index, dtype="float")
|
| 173 |
|
| 174 |
-
# 5) 純數字 HHMMSS(ex: 93005)
|
| 175 |
mask_intlike = num.notna() & (num == np.floor(num))
|
| 176 |
sec_hhmmss = pd.Series(np.nan, index=s.index, dtype="float")
|
| 177 |
if mask_intlike.any():
|
| 178 |
ints = num[mask_intlike].astype("int64")
|
| 179 |
sec_hhmmss.loc[mask_intlike] = ints.map(_hhmmss_int_to_seconds).astype("float")
|
| 180 |
-
|
| 181 |
-
# 僅在前兩招皆 NaN 時,採用 HHMMSS 轉換
|
| 182 |
fill_mask = result.isna() & sec_hhmmss.notna()
|
| 183 |
result.loc[fill_mask] = sec_hhmmss.loc[fill_mask]
|
| 184 |
|
| 185 |
-
# 6) Python datetime.time 物件
|
| 186 |
if result.isna().any():
|
| 187 |
obj_mask = result.isna()
|
| 188 |
subset = s[obj_mask]
|
| 189 |
def time_obj_to_sec(x):
|
| 190 |
if isinstance(x, time):
|
| 191 |
-
return x.hour
|
| 192 |
return np.nan
|
| 193 |
result.loc[obj_mask] = subset.map(time_obj_to_sec)
|
| 194 |
|
| 195 |
-
# 最終返回(仍可能有 NaN,代表無法解析)
|
| 196 |
return result.astype("float")
|
| 197 |
|
| 198 |
def pad_time(h, m, s):
|
| 199 |
-
def to2(x):
|
| 200 |
-
return "??" if x is None else f"{int(x):02d}"
|
| 201 |
return f"{to2(h)}:{to2(m)}:{to2(s)}"
|
| 202 |
|
| 203 |
-
|
| 204 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
|
| 206 |
inp = gr.File(label="上傳 .xlsx 或 .csv 檔案", file_types=[".xlsx", ".csv"], type="filepath")
|
| 207 |
|
| 208 |
with gr.Row():
|
| 209 |
-
gr.Markdown("**開始時間 (hh:mm:ss)**
|
| 210 |
with gr.Row():
|
| 211 |
sh = gr.Number(label="Start HH (0-23)", value=None)
|
| 212 |
sm = gr.Number(label="Start MM (0-59)", value=None)
|
| 213 |
ss = gr.Number(label="Start SS (0-59)", value=None)
|
| 214 |
|
| 215 |
with gr.Row():
|
| 216 |
-
gr.Markdown("**結束時間 (hh:mm:ss)**
|
| 217 |
with gr.Row():
|
| 218 |
eh = gr.Number(label="End HH (0-23)", value=None)
|
| 219 |
em = gr.Number(label="End MM (0-59)", value=None)
|
|
@@ -225,66 +239,96 @@ with gr.Blocks(title="Excel/CSV 指定欄位擷取器(含時間區段)") as
|
|
| 225 |
msg = gr.Markdown()
|
| 226 |
preview = gr.Dataframe(label="預覽(前 20 列)", wrap=True)
|
| 227 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
def run_pipeline(file_path_str, sh_, sm_, ss_, eh_, em_, es_):
|
| 229 |
if not file_path_str:
|
| 230 |
-
return gr.update(visible=False), "請先上傳檔案。", pd.DataFrame()
|
| 231 |
|
| 232 |
try:
|
| 233 |
df = load_dataframe(file_path_str)
|
| 234 |
out = extract_and_rename(df)
|
| 235 |
except Exception as e:
|
| 236 |
-
return gr.update(visible=False), f"處理失敗:{e}", pd.DataFrame()
|
| 237 |
|
| 238 |
original_rows = len(out)
|
| 239 |
|
| 240 |
-
# 嘗試時間過濾
|
| 241 |
try:
|
| 242 |
start_sec = parse_time_to_seconds(sh_, sm_, ss_)
|
| 243 |
end_sec = parse_time_to_seconds(eh_, em_, es_)
|
| 244 |
except Exception as e:
|
| 245 |
-
return gr.update(visible=False), f"時間輸入錯誤:{e}", pd.DataFrame()
|
| 246 |
|
| 247 |
parsed_ok = None
|
| 248 |
if (start_sec is not None) and (end_sec is not None):
|
| 249 |
if "time" not in out.columns:
|
| 250 |
-
return gr.update(visible=False), "找不到 'time' 欄,無法做時間過濾。", pd.DataFrame()
|
| 251 |
-
|
| 252 |
secs = series_time_to_seconds_of_day(out["time"])
|
| 253 |
parsed_ok = int(secs.notna().sum())
|
| 254 |
-
|
| 255 |
valid_mask = secs.notna()
|
| 256 |
secs_valid = secs.where(valid_mask, other=-1)
|
| 257 |
-
|
| 258 |
if start_sec <= end_sec:
|
| 259 |
keep = valid_mask & (secs_valid >= start_sec) & (secs_valid <= end_sec)
|
| 260 |
else:
|
| 261 |
keep = valid_mask & ((secs_valid >= start_sec) | (secs_valid <= end_sec))
|
| 262 |
-
|
| 263 |
out = out.loc[keep].reset_index(drop=True)
|
| 264 |
|
| 265 |
-
# 寫檔輸出
|
| 266 |
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 267 |
out_path = f"/tmp/extracted_columns_{ts}.xlsx"
|
| 268 |
try:
|
| 269 |
out.to_excel(out_path, index=False, engine="openpyxl")
|
| 270 |
except Exception as e:
|
| 271 |
-
return gr.update(visible=False), f"輸出 Excel 失敗:{e}", pd.DataFrame()
|
| 272 |
|
| 273 |
-
#
|
| 274 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
if parsed_ok is not None:
|
| 276 |
-
note_lines.
|
| 277 |
-
note_lines.
|
| 278 |
-
note_lines.append(
|
| 279 |
-
note_lines.append("下方預覽、右側可下載 Excel。")
|
| 280 |
note = "|".join(note_lines)
|
| 281 |
|
| 282 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
|
| 284 |
run_btn.click(
|
| 285 |
run_pipeline,
|
| 286 |
inputs=[inp, sh, sm, ss, eh, em, es],
|
| 287 |
-
outputs=[file_out, msg, preview]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
)
|
| 289 |
|
| 290 |
if __name__ == "__main__":
|
|
|
|
| 3 |
import numpy as np
|
| 4 |
import io
|
| 5 |
import os
|
| 6 |
+
from datetime import datetime, time
|
| 7 |
from typing import Union
|
| 8 |
+
import matplotlib.pyplot as plt
|
| 9 |
|
| 10 |
EXCEL_LETTERS = ["A", "B", "K", "L", "M", "V", "W", "X", "Y"]
|
| 11 |
TARGET_NAMES = ["data", "time", "⊿Ptop", "⊿Pmid", "⊿Pbot", "H2%", "CO%", "CO2%", "CH4%"]
|
|
|
|
| 28 |
|
| 29 |
def load_dataframe(file_input) -> pd.DataFrame:
|
| 30 |
lower_name = get_lower_name(file_input)
|
|
|
|
|
|
|
| 31 |
if isinstance(file_input, (str, os.PathLike)):
|
| 32 |
path = str(file_input)
|
| 33 |
if lower_name.endswith((".xlsx", ".xls")):
|
|
|
|
| 46 |
except Exception:
|
| 47 |
return pd.read_csv(path)
|
| 48 |
|
|
|
|
| 49 |
if hasattr(file_input, "read"):
|
| 50 |
raw = file_input.read()
|
| 51 |
bio = io.BytesIO(raw)
|
| 52 |
if lower_name.endswith((".xlsx", ".xls")):
|
| 53 |
+
bio.seek(0); return pd.read_excel(bio, engine="openpyxl")
|
|
|
|
| 54 |
elif lower_name.endswith(".csv"):
|
| 55 |
try:
|
| 56 |
+
bio.seek(0); return pd.read_csv(bio, sep=None, engine="python")
|
|
|
|
| 57 |
except Exception:
|
| 58 |
+
bio.seek(0); return pd.read_csv(bio)
|
|
|
|
| 59 |
else:
|
| 60 |
try:
|
| 61 |
+
bio.seek(0); return pd.read_excel(bio, engine="openpyxl")
|
|
|
|
| 62 |
except Exception:
|
| 63 |
try:
|
| 64 |
+
bio.seek(0); return pd.read_csv(bio, sep=None, engine="python")
|
|
|
|
| 65 |
except Exception:
|
| 66 |
+
bio.seek(0); return pd.read_csv(bio)
|
|
|
|
| 67 |
|
|
|
|
| 68 |
if isinstance(file_input, (bytes, bytearray)):
|
| 69 |
bio = io.BytesIO(file_input)
|
| 70 |
try:
|
| 71 |
+
bio.seek(0); return pd.read_excel(bio, engine="openpyxl")
|
|
|
|
| 72 |
except Exception:
|
| 73 |
try:
|
| 74 |
+
bio.seek(0); return pd.read_csv(bio, sep=None, engine="python")
|
|
|
|
| 75 |
except Exception:
|
| 76 |
+
bio.seek(0); return pd.read_csv(bio)
|
|
|
|
| 77 |
|
| 78 |
raise ValueError("不支援的檔案型態,請上傳 .xlsx 或 .csv 檔。")
|
| 79 |
|
|
|
|
| 82 |
existing_positions = [i for i in TARGET_INDICES if i < n_cols]
|
| 83 |
if not existing_positions:
|
| 84 |
raise ValueError("上傳的資料欄位數不足,無法擷取指定欄位(A,B,K,L,M,V,W,X,Y)。")
|
|
|
|
| 85 |
out = df.iloc[:, existing_positions].copy()
|
|
|
|
| 86 |
name_map = []
|
| 87 |
for pos in existing_positions:
|
| 88 |
idx_in_targets = TARGET_INDICES.index(pos)
|
| 89 |
name_map.append(TARGET_NAMES[idx_in_targets])
|
|
|
|
| 90 |
out.columns = name_map
|
| 91 |
return out
|
| 92 |
|
|
|
|
| 108 |
return h * 3600 + m * 60 + s
|
| 109 |
|
| 110 |
def _hhmmss_int_to_seconds(n: int):
|
| 111 |
+
if n < 0 or n > 235959: return pd.NA
|
| 112 |
+
ss = n % 100; n //= 100
|
| 113 |
+
mm = n % 100; n //= 100
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
hh = n % 100
|
| 115 |
if 0 <= hh <= 23 and 0 <= mm <= 59 and 0 <= ss <= 59:
|
| 116 |
+
return hh*3600 + mm*60 + ss
|
| 117 |
return pd.NA
|
| 118 |
|
| 119 |
def series_time_to_seconds_of_day(series: pd.Series) -> pd.Series:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
s = series.copy()
|
| 121 |
|
|
|
|
| 122 |
if pd.api.types.is_datetime64_any_dtype(s):
|
| 123 |
+
return (s.dt.hour*3600 + s.dt.minute*60 + s.dt.second).astype("float")
|
|
|
|
| 124 |
|
|
|
|
| 125 |
if pd.api.types.is_timedelta64_dtype(s):
|
| 126 |
total_sec = s.dt.total_seconds()
|
| 127 |
return (total_sec % 86400).astype("float")
|
| 128 |
|
|
|
|
| 129 |
parsed = pd.to_datetime(s, errors="coerce")
|
| 130 |
+
sec_parsed = (parsed.dt.hour*3600 + parsed.dt.minute*60 + parsed.dt.second).astype("float")
|
| 131 |
|
|
|
|
| 132 |
num = pd.to_numeric(s, errors="coerce")
|
| 133 |
+
sec_excel = ((num % 1) * 86400).round().astype("float")
|
| 134 |
+
|
| 135 |
result = sec_parsed.where(~sec_parsed.isna(), other=np.nan)
|
| 136 |
result = np.where(np.isnan(result), sec_excel, result)
|
| 137 |
result = pd.Series(result, index=s.index, dtype="float")
|
| 138 |
|
|
|
|
| 139 |
mask_intlike = num.notna() & (num == np.floor(num))
|
| 140 |
sec_hhmmss = pd.Series(np.nan, index=s.index, dtype="float")
|
| 141 |
if mask_intlike.any():
|
| 142 |
ints = num[mask_intlike].astype("int64")
|
| 143 |
sec_hhmmss.loc[mask_intlike] = ints.map(_hhmmss_int_to_seconds).astype("float")
|
|
|
|
|
|
|
| 144 |
fill_mask = result.isna() & sec_hhmmss.notna()
|
| 145 |
result.loc[fill_mask] = sec_hhmmss.loc[fill_mask]
|
| 146 |
|
|
|
|
| 147 |
if result.isna().any():
|
| 148 |
obj_mask = result.isna()
|
| 149 |
subset = s[obj_mask]
|
| 150 |
def time_obj_to_sec(x):
|
| 151 |
if isinstance(x, time):
|
| 152 |
+
return x.hour*3600 + x.minute*60 + x.second
|
| 153 |
return np.nan
|
| 154 |
result.loc[obj_mask] = subset.map(time_obj_to_sec)
|
| 155 |
|
|
|
|
| 156 |
return result.astype("float")
|
| 157 |
|
| 158 |
def pad_time(h, m, s):
|
| 159 |
+
def to2(x): return "??" if x is None else f"{int(x):02d}"
|
|
|
|
| 160 |
return f"{to2(h)}:{to2(m)}:{to2(s)}"
|
| 161 |
|
| 162 |
+
def make_scatter_with_trend(df: pd.DataFrame, x_col: str, y_cols: list):
|
| 163 |
+
if df is None or len(df) == 0:
|
| 164 |
+
raise ValueError("沒有可繪圖的資料。")
|
| 165 |
+
if not x_col or not y_cols:
|
| 166 |
+
raise ValueError("請選擇 X 與至少一個 Y 欄位。")
|
| 167 |
+
for c in [x_col, *y_cols]:
|
| 168 |
+
if c not in df.columns:
|
| 169 |
+
raise ValueError(f"找不到欄位:{c}")
|
| 170 |
+
|
| 171 |
+
# 轉 x 為數值
|
| 172 |
+
x = df[x_col]
|
| 173 |
+
if x_col == "time" or x.dtype == object:
|
| 174 |
+
x_num = series_time_to_seconds_of_day(x)
|
| 175 |
+
else:
|
| 176 |
+
x_num = pd.to_numeric(x, errors="coerce")
|
| 177 |
+
if x_num.notna().sum() < 2:
|
| 178 |
+
raise ValueError("X 軸無法解析為數值或有效點數不足。")
|
| 179 |
+
|
| 180 |
+
fig, ax = plt.subplots(figsize=(7, 4.5)) # 一張圖、單軸
|
| 181 |
+
|
| 182 |
+
lines = 0
|
| 183 |
+
for y_col in y_cols:
|
| 184 |
+
y = pd.to_numeric(df[y_col], errors="coerce")
|
| 185 |
+
mask = x_num.notna() & y.notna()
|
| 186 |
+
if mask.sum() < 2:
|
| 187 |
+
continue
|
| 188 |
+
|
| 189 |
+
xs = x_num[mask].values
|
| 190 |
+
ys = y[mask].values
|
| 191 |
+
|
| 192 |
+
# 散佈圖
|
| 193 |
+
ax.scatter(xs, ys, label=f"{y_col}", alpha=0.8)
|
| 194 |
+
|
| 195 |
+
# 線性趨勢線(最小平方法)
|
| 196 |
+
try:
|
| 197 |
+
slope, intercept = np.polyfit(xs, ys, 1)
|
| 198 |
+
x_line = np.linspace(xs.min(), xs.max(), 200)
|
| 199 |
+
y_line = slope * x_line + intercept
|
| 200 |
+
ax.plot(x_line, y_line, linewidth=2)
|
| 201 |
+
lines += 1
|
| 202 |
+
except Exception:
|
| 203 |
+
pass
|
| 204 |
+
|
| 205 |
+
ax.set_xlabel(x_col)
|
| 206 |
+
if len(y_cols) == 1:
|
| 207 |
+
ax.set_ylabel(y_cols[0])
|
| 208 |
+
else:
|
| 209 |
+
ax.set_ylabel("Selected Y")
|
| 210 |
+
ax.grid(True, alpha=0.3)
|
| 211 |
+
ax.legend()
|
| 212 |
+
fig.tight_layout()
|
| 213 |
+
return fig, lines
|
| 214 |
+
|
| 215 |
+
with gr.Blocks(title="Excel/CSV 指定欄位擷取器(含時間過濾+繪圖)") as demo:
|
| 216 |
+
gr.Markdown("### 指定欄位擷取(A,B,K,L,M,V,W,X,Y)→ 重新命名為 data,time,⊿Ptop,⊿Pmid,⊿Pbot,H2%,CO%,CO2%,CH4% ;支援 **時間區段 (hh:mm:ss)** 過濾與 **散佈圖+直線** 繪製(Y 可複選)。")
|
| 217 |
+
|
| 218 |
+
df_state = gr.State(value=None) # 保存處理後的 DataFrame
|
| 219 |
|
| 220 |
inp = gr.File(label="上傳 .xlsx 或 .csv 檔案", file_types=[".xlsx", ".csv"], type="filepath")
|
| 221 |
|
| 222 |
with gr.Row():
|
| 223 |
+
gr.Markdown("**開始時間 (hh:mm:ss)**")
|
| 224 |
with gr.Row():
|
| 225 |
sh = gr.Number(label="Start HH (0-23)", value=None)
|
| 226 |
sm = gr.Number(label="Start MM (0-59)", value=None)
|
| 227 |
ss = gr.Number(label="Start SS (0-59)", value=None)
|
| 228 |
|
| 229 |
with gr.Row():
|
| 230 |
+
gr.Markdown("**結束時間 (hh:mm:ss)**")
|
| 231 |
with gr.Row():
|
| 232 |
eh = gr.Number(label="End HH (0-23)", value=None)
|
| 233 |
em = gr.Number(label="End MM (0-59)", value=None)
|
|
|
|
| 239 |
msg = gr.Markdown()
|
| 240 |
preview = gr.Dataframe(label="預覽(前 20 列)", wrap=True)
|
| 241 |
|
| 242 |
+
gr.Markdown("### 繪圖設定")
|
| 243 |
+
with gr.Row():
|
| 244 |
+
x_sel = gr.Dropdown(label="X 軸欄位", choices=[], value=None)
|
| 245 |
+
y_sel = gr.Dropdown(label="Y 軸欄位(可複選)", choices=[], value=None, multiselect=True)
|
| 246 |
+
|
| 247 |
+
plot_btn = gr.Button("繪圖(散佈+直線)")
|
| 248 |
+
plot_out = gr.Plot(label="散佈圖(含線性趨勢線)")
|
| 249 |
+
plot_msg = gr.Markdown()
|
| 250 |
+
|
| 251 |
def run_pipeline(file_path_str, sh_, sm_, ss_, eh_, em_, es_):
|
| 252 |
if not file_path_str:
|
| 253 |
+
return gr.update(visible=False), "請先上傳檔案。", pd.DataFrame(), None, gr.update(choices=[], value=None), gr.update(choices=[], value=None)
|
| 254 |
|
| 255 |
try:
|
| 256 |
df = load_dataframe(file_path_str)
|
| 257 |
out = extract_and_rename(df)
|
| 258 |
except Exception as e:
|
| 259 |
+
return gr.update(visible=False), f"處理失敗:{e}", pd.DataFrame(), None, gr.update(choices=[], value=None), gr.update(choices=[], value=None)
|
| 260 |
|
| 261 |
original_rows = len(out)
|
| 262 |
|
|
|
|
| 263 |
try:
|
| 264 |
start_sec = parse_time_to_seconds(sh_, sm_, ss_)
|
| 265 |
end_sec = parse_time_to_seconds(eh_, em_, es_)
|
| 266 |
except Exception as e:
|
| 267 |
+
return gr.update(visible=False), f"時間輸入錯誤:{e}", pd.DataFrame(), None, gr.update(choices=[], value=None), gr.update(choices=[], value=None)
|
| 268 |
|
| 269 |
parsed_ok = None
|
| 270 |
if (start_sec is not None) and (end_sec is not None):
|
| 271 |
if "time" not in out.columns:
|
| 272 |
+
return gr.update(visible=False), "找不到 'time' 欄,無法做時間過濾。", pd.DataFrame(), None, gr.update(choices=[], value=None), gr.update(choices=[], value=None)
|
|
|
|
| 273 |
secs = series_time_to_seconds_of_day(out["time"])
|
| 274 |
parsed_ok = int(secs.notna().sum())
|
|
|
|
| 275 |
valid_mask = secs.notna()
|
| 276 |
secs_valid = secs.where(valid_mask, other=-1)
|
|
|
|
| 277 |
if start_sec <= end_sec:
|
| 278 |
keep = valid_mask & (secs_valid >= start_sec) & (secs_valid <= end_sec)
|
| 279 |
else:
|
| 280 |
keep = valid_mask & ((secs_valid >= start_sec) | (secs_valid <= end_sec))
|
|
|
|
| 281 |
out = out.loc[keep].reset_index(drop=True)
|
| 282 |
|
|
|
|
| 283 |
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 284 |
out_path = f"/tmp/extracted_columns_{ts}.xlsx"
|
| 285 |
try:
|
| 286 |
out.to_excel(out_path, index=False, engine="openpyxl")
|
| 287 |
except Exception as e:
|
| 288 |
+
return gr.update(visible=False), f"輸出 Excel 失敗:{e}", pd.DataFrame(), None, gr.update(choices=[], value=None), gr.update(choices=[], value=None)
|
| 289 |
|
| 290 |
+
# 更新下拉選單
|
| 291 |
+
cols = out.columns.tolist()
|
| 292 |
+
default_x = "time" if "time" in cols else (cols[0] if cols else None)
|
| 293 |
+
default_y = [c for c in ["H2%", "CO%", "CO2%", "CH4%"] if c in cols] or ([cols[1]] if len(cols) > 1 else cols)
|
| 294 |
+
|
| 295 |
+
note_lines = [f"完成!原始列數:**{original_rows}**",
|
| 296 |
+
f"輸出列數:**{len(out)}**"]
|
| 297 |
if parsed_ok is not None:
|
| 298 |
+
note_lines.insert(1, f"可解析時間列數:**{parsed_ok}**")
|
| 299 |
+
note_lines.insert(2, f"時間區段:**{pad_time(sh_, sm_, ss_)} → {pad_time(eh_, em_, es_)}**")
|
| 300 |
+
note_lines.append("下方預覽、右側可下載 Excel;並可於下方選擇欄位繪圖。")
|
|
|
|
| 301 |
note = "|".join(note_lines)
|
| 302 |
|
| 303 |
+
return (
|
| 304 |
+
gr.update(value=out_path, visible=True),
|
| 305 |
+
note,
|
| 306 |
+
out.head(20),
|
| 307 |
+
out, # 存入 State
|
| 308 |
+
gr.update(choices=cols, value=default_x),
|
| 309 |
+
gr.update(choices=cols, value=default_y)
|
| 310 |
+
)
|
| 311 |
+
|
| 312 |
+
def plot_handler(df, x_col, y_cols):
|
| 313 |
+
if df is None:
|
| 314 |
+
return None, "尚未有可用資料,請先完成上方處理。"
|
| 315 |
+
try:
|
| 316 |
+
fig, lines = make_scatter_with_trend(df, x_col, y_cols or [])
|
| 317 |
+
msg = f"完成繪圖:共 {len(y_cols or [])} 個 Y;已繪製 {lines} 條線性趨勢線。"
|
| 318 |
+
return fig, msg
|
| 319 |
+
except Exception as e:
|
| 320 |
+
return None, f"繪圖失敗:{e}"
|
| 321 |
|
| 322 |
run_btn.click(
|
| 323 |
run_pipeline,
|
| 324 |
inputs=[inp, sh, sm, ss, eh, em, es],
|
| 325 |
+
outputs=[file_out, msg, preview, df_state, x_sel, y_sel]
|
| 326 |
+
)
|
| 327 |
+
|
| 328 |
+
plot_btn.click(
|
| 329 |
+
plot_handler,
|
| 330 |
+
inputs=[df_state, x_sel, y_sel],
|
| 331 |
+
outputs=[plot_out, plot_msg]
|
| 332 |
)
|
| 333 |
|
| 334 |
if __name__ == "__main__":
|