Kung-Hsun's picture
Update app.py
1a3a3af verified
# Requirements (pip)
# gradio>=4.17.0
# pandas>=2.0.0
# numpy>=1.24.0
# matplotlib
# scikit-learn>=1.2.0
# pyDOE2
# Pillow
# xgboost>=2.0.0
# lightgbm>=4.0.0
# seaborn
# scipy>=1.10.0
# plotly>=5.16.0
# scikit-optimize>=0.9.0
# optuna (不需要)
# tpot>=0.12 (目前沒用到)
# shap
# tabulate
# import blocks
# [Gradio 基本UI]
import gradio as gr
# [數據處理/科學計算]
import numpy as np
import pandas as pd
# [DoE設計/取樣法]
from pyDOE2 import lhs, bbdesign, ccdesign
from scipy.stats.qmc import Sobol, Halton
# [AI/ML建模]
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
import xgboost as xgb
import lightgbm as lgb
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
# [模型訓練/評估/特徵選擇]
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.multioutput import MultiOutputRegressor
from sklearn.cross_decomposition import PLSRegression
from sklearn.preprocessing import PolynomialFeatures
# [可視化/統計分析]
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from scipy.stats import skew, kurtosis, shapiro
from scipy.interpolate import griddata
import shap
# [雜項工具/暫存檔]
import tempfile
import warnings
warnings.filterwarnings("ignore")
import io
# [貝葉斯/自動化優化]
from skopt import BayesSearchCV
from skopt.space import Real, Integer, Categorical
import optuna # ← 若要支援optuna,這裡一定要加
import optuna.visualization.matplotlib
from scipy.optimize import minimize
# [互動頁面樣式]
custom_css = """
/* 主卡片 */
/* 用於主要內容區塊(如自動摘要卡片、資訊框)美化,圓角+陰影 */
.gr-card {
background: #f7fafd;
border-radius: 18px;
box-shadow: 0 2px 8px #0002;
}
/* 按鈕樣式 */
/* 主要操作按鈕樣式:漸層、圓角、陰影、字體放大,滑鼠懸停有亮度與色調變化 */
.main-btn {
font-size: 1.14em;
padding: 10px 28px 11px 28px;
border-radius: 999px;
margin: 14px 0 8px 0;
background: linear-gradient(90deg, #5b8cff, #76e7ff 70%);
color: #fff;
font-weight: 600;
box-shadow: 0 2px 12px #3976d855;
border: none;
transition: .2s;
}
.main-btn:hover {
filter: brightness(1.08);
box-shadow: 0 2px 18px #5b8cff33;
background: linear-gradient(90deg, #2259c9 55%, #4fa7d9);
}
/* 全平台字體 */
/* 全域字型套用 Noto Sans JP(優先日系風格),備用 Segoe UI 和 Arial,維持專案一致性 */
.gradio-container {
font-family: 'Noto Sans JP', 'Segoe UI', Arial, sans-serif;
}
/* 區塊標題 */
/* 區塊或章節標題顯眼化,藍色、字重加粗 */
.section-title {
font-size: 1.26em;
font-weight: 700;
color: #3976d8;
margin-bottom: 6px;
}
/* Tab & Row 區塊間距 */
/* Tab、列區塊加上下間距,避免UI太擁擠 */
.tab-pane, .gr-row {
padding: 8px 0;
}
/* 推薦小卡 */
/* 固定右下角推薦訊息小卡,用於快速提醒或建議,帶有柔和背景、陰影 */
#recommend-card {
position: fixed; right: 28px; bottom: 28px; max-width: 360px;
background: #f8fbffde; border-radius: 13px; box-shadow: 0 2px 14px #5ab2fa29;
border-left: 5px solid #88a3e6; padding: 12px 18px 10px 14px; font-size: 1.07em;
z-index: 9999; color: #285078;
}
/* 分隔線 */
/* 水平線設計:加厚、色彩淡藍灰,分隔內容用 */
hr {
border: 0;
border-bottom: 2.5px solid #e4e8f0;
margin: 28px 0 22px 0;
}
/* Accordion 動畫 */
/* 摺疊區塊 summary 動畫與配色,展開時加上陰影強調 */
.accordion > summary {
transition: .25s;
background: #f4f8ff;
}
.accordion[open] > summary {
background: #cfeaff;
}
.accordion[open] {
box-shadow: 0 8px 24px #1ca7ec25;
}
/* Tab高亮 */
/* 當前選中 Tab 標籤高亮顯示,底色、字色、粗體 */
div[role="tab"][aria-selected="true"] {
background: #e3f1ff !important;
font-weight: bold;
color: #2675ff;
}
/* 頁腳 */
/* Footer字體顏色與大小(淡灰、較小字) */
#footer {
color: #888;
font-size: 0.98em;
}
"""
# [UI 樣式工具/自動摘要卡片]
# = 產生HTML卡片說明 =
def make_card(title, tips):
return f"""<div class='gr-card'><b>{title}</b><ul style='margin:0 0 0 12px'>{''.join(['<li>'+str(x)+'</li>' for x in tips])}</ul></div>"""
# = 根據圖形類型產生分圖說明卡片 =
def get_viz_desc(name):
idx = {
"DoE Main": ("DoE設計分布圖", [
"檢查設計點分布是否均勻覆蓋整個空間",
"若集中/離散,代表參數區間或設計法可優化",
"高維會以降維(PCA)檢視主變異結構"
]),
"Heatmap": ("Heatmap(相關係數)", [
"檢查所有數值變數的正負相關",
"紅色:強正相關;藍色:強負相關",
"相關係數>0.7為高度相關,< -0.7為強負相關"
]),
"Pairplot": ("Pairplot(成對散點)", [
"展示任兩變數間的散點型態",
"斜線型=高相關,圓形=低相關",
"可發現集群、離群或特定結構"
]),
"Histogram": ("Histogram(直方圖)", [
"單變數分布形態檢查",
"偏態、長尾、極端值需注意",
"單峰/多峰、常態/非對稱可判斷資料型態"
]),
"Scatter Matrix": ("Scatter Matrix(全變數關聯)", [
"類似Pairplot但一次顯示所有成對分布",
"對角線顯示每欄分布直方圖",
"可發現明顯群集、離群"
]),
"PCA": ("PCA(主成分分布)", [
"多維特徵壓縮到2D",
"檢查主變異來源、潛在群集",
"可輔助檢查是否有明顯離群"
]),
"AI Predict": ("AI預測對比圖", [
"預測y與實際y對比,點貼近對角線代表高精度",
"偏離對角線代表模型誤差大,建議優化特徵或模型"
]),
"Bayesian Optimization": ("貝葉斯優化", [
"自動搜尋最佳參數組合,減少無效試驗",
"可用於AI模型超參數、或實驗設計優化",
"優化歷程圖:看最佳值逐步收斂"
])
}
return make_card(*idx.get(name, ("", [])))
# = 根據視覺化類型自動產生總結 =
def auto_conclude_viz(df, vtype):
out = []
if vtype == "Heatmap":
cor = df.corr()
highcorr = cor.where(np.triu(np.ones(cor.shape), 1).astype(bool)).stack()
hc = highcorr[abs(highcorr) > 0.7]
if not hc.empty:
for idx, v in hc.items():
out.append(f"「{idx[0]}」與「{idx[1]}」高度相關 (corr={v:.2f})")
else:
out.append("無明顯高度相關特徵")
elif vtype == "Histogram":
for col in df.select_dtypes(include=np.number).columns:
sk = skew(df[col].dropna())
if abs(sk) > 2:
out.append(f"「{col}」極端偏態({sk:.2f})")
if not out:
out.append("欄位分布大致對稱")
elif vtype == "PCA":
pca = PCA(n_components=2).fit(df.values)
expvar = pca.explained_variance_ratio_.sum()
out.append(f"前2主成分共解釋 {expvar*100:.1f}% 變異")
elif vtype == "Pairplot":
out.append("請留意有無線性排列(高相關)或明顯群集/異常點")
elif vtype == "Scatter Matrix":
out.append("集群或離群點可從圖中直接辨識")
else:
out.append("資料檢查完成")
return make_card("AI自動結論", out)
# = 產生AI回歸自動總結卡片 =
def auto_conclude_ai(y_test, y_pred, name):
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
out = [
f"模型:{name}",
f"測試集 RMSE={rmse:.3g}",
f"R²={r2:.2f}",
("模型表現佳" if r2 > 0.8 else "可進一步優化特徵/資料量")
]
return make_card("AI自動結論", out)
# = 貝葉斯優化RMSE曲線自動解讀摘要 =
def auto_conclude_bayes_curve(rmse_curve, model_name=None):
import numpy as np
rmse_curve = np.array(rmse_curve)
minv = np.min(rmse_curve)
lastv = rmse_curve[-1]
diff = np.ptp(rmse_curve)
std = np.std(rmse_curve)
trend = "平穩"
if np.allclose(rmse_curve, rmse_curve[0], atol=0.2*std):
trend = "幾乎無變化"
elif rmse_curve[0] > minv and lastv > rmse_curve[0] and lastv > minv + std:
trend = "尾端上升"
elif np.argmin(rmse_curve) < len(rmse_curve)//2:
trend = "快速下降收斂"
elif std > 0.2 * minv and diff > 0.3 * minv:
trend = "波動起伏"
if trend == "快速下降收斂":
comment = "RMSE 隨迭代明顯下降,代表最佳化收斂,已找到較佳參數組合。"
elif trend == "幾乎無變化":
comment = "RMSE 變動極小,代表模型/資料難以藉由超參數優化提升。"
elif trend == "尾端上升":
comment = "最後幾點 RMSE 明顯上升,建議忽略尾端結果,以最低點作最佳選擇。"
elif trend == "波動起伏":
comment = "RMSE 震盪明顯,代表模型不穩定或參數空間設過寬,建議縮小搜尋區間。"
else:
comment = "RMSE 變動趨勢平穩,可依最低點選定最佳參數。"
model_str = f"【{model_name}】" if model_name else ""
return f"{model_str}最低RMSE:**{minv:.3f}**\n- 收斂型態:**{trend}**\n- 建議:{comment}\n"
# [資料品質檢查/AutoML推薦]
# = 自動偵測資料品質與欄位異常 =
def auto_data_quality_check(datafile):
if datafile is None:
return "> 尚未上傳資料"
df = pd.read_csv(datafile.name if hasattr(datafile, "name") else datafile)
tips = []
for col in df.columns:
if df[col].isnull().sum() > 0:
tips.append(f"「{col}」有缺值,建議補值或刪除")
if df[col].nunique() == 1:
tips.append(f"「{col}」為常數欄,建議刪除")
if pd.api.types.is_numeric_dtype(df[col]):
sk = (df[col].dropna().skew() if hasattr(df[col], "skew") else 0)
if abs(sk) > 2:
tips.append(f"「{col}」嚴重偏態(skew={sk:.2f}),建議標準化")
if not tips:
tips = ["資料品質良好,無明顯異常。"]
return "<b>資料品質偵測:</b><ul style='margin:0 0 0 12px'>" + "".join([f"<li>{t}</li>" for t in tips]) + "</ul>"
# = 簡易AutoML,推薦最佳模型並產生程式碼 =
def automl_leaderboard(datafile):
# (簡化) 這裡你可以調用 TPOT、auto-sklearn 或自己比較多組模型
# 這裡直接隨機選一個
best = np.random.choice(["XGBoost", "Random Forest", "LightGBM", "SVR"])
code = f"""# 範例BestModel
from xgboost import XGBRegressor
model = XGBRegressor(n_estimators=120, random_state=0)
model.fit(X_train, y_train)"""
return f"最佳模型推薦:<b>{best}</b>", code
# [DoE設計/推薦點生成]
# = 檢查新點是否與已存在點重複(向量距離法)=
def is_close_to_existing(xrow, existing_X, tol=1e-4):
existing_X = np.asarray(existing_X)
if existing_X.size == 0:
return False
diffs = np.abs(existing_X - np.array(xrow))
if diffs.ndim == 1:
return np.all(diffs < tol)
return np.any(np.all(diffs < tol, axis=1))
# = 多模型AI推薦下一批DoE設計點(探索/最大化等模式)=
def suggest_next_doe_points_batch(
datafile,
model_types = ["Random Forest", "XGBoost", "LightGBM", "SVR"],
mode = "最大化", n_points = 3,
exclude_existing = True,
random_seed = 42,
max_attempts_factor = 30,
return_df = False
):
import numpy as np
from scipy.optimize import minimize
import pandas as pd
if datafile is None:
if return_df:
return pd.DataFrame()
return "> 尚未上傳資料"
df = pd.read_csv(datafile.name if hasattr(datafile, "name") else datafile)
X, y = df.iloc[:, :-1], df.iloc[:, -1]
colnames = X.columns
# 訓練多模型
models = []
for t in model_types:
if t == "Random Forest":
models.append(RandomForestRegressor(n_estimators=120, random_state=random_seed))
elif t == "XGBoost":
models.append(XGBRegressor(n_estimators=120, random_state=random_seed, verbosity=0))
elif t == "LightGBM":
models.append(LGBMRegressor(n_estimators=120, random_state=random_seed))
elif t == "SVR":
models.append(make_pipeline(StandardScaler(), SVR()))
for m in models:
m.fit(X, y)
bounds = [(X[c].min(), X[c].max()) for c in colnames]
def ensemble_pred(xrow):
preds = [m.predict(np.array(xrow).reshape(1, -1))[0] for m in models]
return np.mean(preds), np.std(preds)
if mode == "最大化":
def obj(x): return -ensemble_pred(x)[0]
elif mode == "最小化":
def obj(x): return ensemble_pred(x)[0]
elif mode == "不確定性":
def obj(x): return -ensemble_pred(x)[1]
else:
def obj(x): return -ensemble_pred(x)[0]
found_points, preds_mean, preds_std = [], [], []
attempts = 0
max_attempts = n_points * max_attempts_factor
np.random.seed(random_seed)
while len(found_points) < n_points and attempts < max_attempts:
x0 = np.random.uniform([b[0] for b in bounds], [b[1] for b in bounds])
res = minimize(obj, x0, bounds=bounds)
best_x = res.x
# 排除重複點
exist = False
if exclude_existing:
if is_close_to_existing(best_x, X.values) or is_close_to_existing(best_x, np.array(found_points)):
exist = True
if exist:
attempts += 1
continue
found_points.append(best_x)
mean_pred, std_pred = ensemble_pred(best_x)
preds_mean.append(mean_pred)
preds_std.append(std_pred)
attempts += 1
if not found_points:
if return_df:
return pd.DataFrame()
return "> 無法自動產生新點(參數範圍或步階過細、模型表現過於平坦)"
# == DataFrame格式(for下載用)==
if return_df:
df_points = pd.DataFrame(found_points, columns=colnames)
df_points["y"] = "" # 給user回填
df_points["模型平均預測"] = preds_mean
df_points["不確定性(std)"] = preds_std
df_points["推薦策略"] = mode
return df_points
# == Markdown文字版 ==
if mode == "最大化":
best_idx = int(np.argmax(preds_mean))
elif mode == "最小化":
best_idx = int(np.argmin(preds_mean))
elif mode == "不確定性":
best_idx = int(np.argmax(preds_std))
else:
best_idx = 0
out = "<b>推薦次一輪DoE設計點(Top N):</b><br>"
for i, (best_x, mu, std) in enumerate(zip(found_points, preds_mean, preds_std), 1):
flag = " 🏆 <b>【最推薦】</b>" if (i-1) == best_idx else ""
out += f"<b>候選{i}{flag}:</b>"
out += "<ul style='margin-top:0;margin-bottom:6px;'>"
for c, v in zip(colnames, best_x):
out += f"<li>{c} = {v:.3f}</li>"
out += f"<li>平均預測產率:<b>{mu:.3f}</b></li><li>不確定性(std):{std:.3f}</li>"
if mode == "不確定性":
out += "<li><i>(此點模型間意見分歧最大)</i></li>"
out += "</ul>"
return out
# = 智能混合策略推薦DoE新點(最大化、最小化、不確定性、隨機)=
def suggest_mixed_doe_points(
datafile,
model_types = ["Random Forest", "XGBoost", "LightGBM", "SVR"],
n_total = 4, # 推薦總點數
exclude_existing = True,
random_seed = 2025,
return_df = False
):
import numpy as np
from scipy.optimize import minimize
import pandas as pd
if datafile is None:
if return_df:
return pd.DataFrame()
return "> 尚未上傳資料"
df = pd.read_csv(datafile.name if hasattr(datafile, "name") else datafile)
X, y = df.iloc[:, :-1], df.iloc[:, -1]
colnames = X.columns
# 訓練多模型
models = []
for t in model_types:
if t == "Random Forest":
models.append(RandomForestRegressor(n_estimators=120, random_state=random_seed))
elif t == "XGBoost":
models.append(XGBRegressor(n_estimators=120, random_state=random_seed, verbosity=0))
elif t == "LightGBM":
models.append(LGBMRegressor(n_estimators=120, random_state=random_seed))
elif t == "SVR":
models.append(make_pipeline(StandardScaler(), SVR()))
for m in models:
m.fit(X, y)
bounds = [(X[c].min(), X[c].max()) for c in colnames]
def ensemble_pred(xrow):
preds = [m.predict(np.array(xrow).reshape(1, -1))[0] for m in models]
return np.mean(preds), np.std(preds)
def obj_max(x): return -ensemble_pred(x)[0]
def obj_min(x): return ensemble_pred(x)[0]
def obj_uncert(x): return -ensemble_pred(x)[1]
np.random.seed(random_seed)
found_points, point_types, mu_list, std_list = [], [], [], []
attempts = 0
max_attempts = n_total * 30
# 1. 最大化
res1 = minimize(obj_max, np.random.uniform([b[0] for b in bounds], [b[1] for b in bounds]), bounds=bounds)
x1 = res1.x
if not (exclude_existing and is_close_to_existing(x1, X.values)):
found_points.append(x1)
point_types.append("最大化(exploit)")
mu, std = ensemble_pred(x1)
mu_list.append(mu)
std_list.append(std)
# 2. 最小化
res2 = minimize(obj_min, np.random.uniform([b[0] for b in bounds], [b[1] for b in bounds]), bounds=bounds)
x2 = res2.x
if not (exclude_existing and (is_close_to_existing(x2, X.values) or is_close_to_existing(x2, np.array(found_points)))):
found_points.append(x2)
point_types.append("最小化(exploit)")
mu, std = ensemble_pred(x2)
mu_list.append(mu)
std_list.append(std)
# 3. 最大不確定性
res3 = minimize(obj_uncert, np.random.uniform([b[0] for b in bounds], [b[1] for b in bounds]), bounds=bounds)
x3 = res3.x
if not (exclude_existing and (is_close_to_existing(x3, X.values) or is_close_to_existing(x3, np.array(found_points)))):
found_points.append(x3)
point_types.append("最大不確定性(exploration)")
mu, std = ensemble_pred(x3)
mu_list.append(mu)
std_list.append(std)
# 4. 隨機探索補滿
while len(found_points) < n_total and attempts < max_attempts:
x0 = np.random.uniform([b[0] for b in bounds], [b[1] for b in bounds])
if exclude_existing and (is_close_to_existing(x0, X.values) or is_close_to_existing(x0, np.array(found_points))):
attempts += 1
continue
found_points.append(x0)
point_types.append("隨機探索")
mu, std = ensemble_pred(x0)
mu_list.append(mu)
std_list.append(std)
attempts += 1
# === DataFrame for CSV 下載 ===
if return_df:
df_points = pd.DataFrame(found_points, columns=colnames)
df_points["y"] = ""
return df_points
# === Markdown展示 ===
out = "<b>智能推薦多重新DoE設計點(混合策略)</b><br>"
for i, (best_x, mu, std, label) in enumerate(zip(found_points, mu_list, std_list, point_types), 1):
flag = " 🏆 <b>【最推薦】</b>" if label.startswith("最大化") else ""
out += f"<b>候選{i}{flag}:</b><i>{label}</i>"
out += "<ul style='margin-top:0;margin-bottom:6px;'>"
for c, v in zip(colnames, best_x):
out += f"<li>{c} = {v:.3f}</li>"
out += f"<li>平均預測產率:<b>{mu:.3f}</b></li><li>不確定性(std):{std:.3f}</li>"
if label.startswith("最大不確定性"):
out += "<li><i>(模型分歧最大,探索新區域)</i></li>"
out += "</ul>"
return out
# = 組合各推薦策略生成新點,合併成單一下載檔 =
def make_recommended_points(file, models, modes, n, exclude):
import pandas as pd
import tempfile
outs = []
df_list = []
n = int(n)
if file is None or not modes or not models:
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", encoding="utf-8")
pd.DataFrame().to_csv(tmp.name, index=False)
return "請確認已上傳資料、選模式與模型", tmp.name
for mode in modes:
if mode == "探索型推薦":
out = suggest_next_doe_points_batch(file, models, "最大化", n, exclude, return_df=False)
df = suggest_next_doe_points_batch(file, models, "最大化", n, exclude, return_df=True)
outs.append(f"<b>【探索型推薦】</b><br>{out}")
if isinstance(df, pd.DataFrame) and not df.empty:
df_list.append(df)
elif mode == "混合策略推薦":
out = suggest_mixed_doe_points(file, models, n, exclude, return_df=False)
df = suggest_mixed_doe_points(file, models, n, exclude, return_df=True)
outs.append(f"<b>【混合策略推薦】</b><br>{out}")
if isinstance(df, pd.DataFrame) and not df.empty:
df_list.append(df)
if df_list:
all_df = pd.concat(df_list, ignore_index=True).drop_duplicates()
else:
all_df = pd.DataFrame()
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", encoding="utf-8-sig")
all_df.to_csv(tmp.name, index=False)
tmp.flush()
tmp.close()
return "<br><hr>".join(outs), tmp.name # ok, 兩個 output
# = 下載推薦DoE點組合(CSV)=
def download_recommended_points(file, models, mode, n, exclude):
# 支援模式切換
if mode == "混合策略推薦":
df_points = suggest_mixed_doe_points(file, models, int(n), exclude, return_df=True)
else:
# 預設探索型推薦
df_points = suggest_next_doe_points_batch(file, models, mode, int(n), exclude, return_df=True)
if df_points is None or len(df_points) == 0:
return None
# 存成臨時檔
with tempfile.NamedTemporaryFile(suffix=".csv", mode="w", delete=False, encoding="utf-8-sig") as f:
df_points.to_csv(f.name, index=False)
return f.name
# [資料合併/CSV工具]
# = 兩份CSV資料自動合併、去重、優先保留已填y者 =
def merge_csvs(base_csv, new_csv):
import pandas as pd
import tempfile
if base_csv is None or new_csv is None:
return None
df1 = pd.read_csv(base_csv.name if hasattr(base_csv, "name") else base_csv)
df2 = pd.read_csv(new_csv.name if hasattr(new_csv, "name") else new_csv)
# 將y(目標)欄位填過的優先保留,未填y只補點不覆蓋
key_cols = [c for c in df1.columns if c != "y"]
merged = pd.concat([df1, df2], ignore_index=True)
# 去重優先保留已填y者
merged = merged.sort_values(by=["y"], ascending=[False]).drop_duplicates(subset=key_cols, keep="first")
merged = merged.reset_index(drop=True)
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", encoding="utf-8-sig")
merged.to_csv(tmp.name, index=False)
tmp.flush()
tmp.close()
return tmp.name
# [標準DoE設計/分布比較]
# = 檢查輸入參數列是否有效 =
def is_valid_row(row):
if not isinstance(row, (list, tuple)) or len(row) < 4:
return False
try:
if str(row[0]).strip() == "":
return False
float(row[1])
float(row[2])
float(row[3])
return True
except Exception:
return False
# = 產生指定類型的標準DoE設計點(LHS/Sobol/Halton/Uniform)=
def gen_design(design_type, n_params, n_samples, param_lows, param_highs, param_steps, seed):
if seed is not None and str(seed).strip() != "" and int(seed) != 0:
my_seed = int(seed)
else:
my_seed = None
if design_type == "LHS":
if my_seed is not None:
np.random.seed(my_seed)
design = lhs(n_params, samples=n_samples, criterion='maximin')
elif design_type == "Sobol":
sampler = Sobol(d=n_params, scramble=True, seed=my_seed)
design = sampler.random(n_samples)
elif design_type == "Halton":
sampler = Halton(d=n_params, scramble=True, seed=my_seed)
design = sampler.random(n_samples)
elif design_type == "Uniform":
if my_seed is not None:
np.random.seed(my_seed)
design = np.random.rand(n_samples, n_params)
else:
raise ValueError("Unknown SFD type!")
real_samples = np.zeros_like(design)
for idx, (low, high, step) in enumerate(zip(param_lows, param_highs, param_steps)):
real_samples[:, idx] = design[:, idx] * (high - low) + low
if step > 0:
real_samples[:, idx] = np.round((real_samples[:, idx] - low) / step) * step + low
else:
decimals = str(step)[::-1].find('.')
real_samples[:, idx] = np.round(real_samples[:, idx], decimals)
real_samples[:, idx] = np.clip(real_samples[:, idx], low, high)
return pd.DataFrame(real_samples)
# = 2D參數分布圖(Plotly)=
def plot_scatter_2d(df, title):
fig = px.scatter(df, x=df.columns[0], y=df.columns[1], title=title)
return fig
# = 3D參數分布圖(Plotly)=
def plot_scatter_3d(df, title):
fig = px.scatter_3d(df, x=df.columns[0], y=df.columns[1], z=df.columns[2], title=title)
return fig
# = 多維參數成對散點圖(Plotly)=
def plot_pairplot(df, title):
return px.scatter_matrix(df, title=title)
# = PCA主成分降維分布圖(Plotly)=
def plot_pca(df, title):
X = df.values
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)
df_pca = pd.DataFrame(X_pca, columns=['PCA1', 'PCA2'])
return px.scatter(df_pca, x='PCA1', y='PCA2', title=title + " (PCA降維)")
# = 依參數設定產生所有主流程設計法(四種)並比較分布 =
def compare_all_designs(param_table, n_samples, seed):
all_types = ["LHS", "Sobol", "Halton", "Uniform"]
outs = []
if isinstance(param_table, pd.DataFrame):
param_table = param_table.values.tolist()
param_names, param_lows, param_highs, param_steps = [], [], [], []
for row in param_table:
if not is_valid_row(row):
continue
try:
param_names.append(str(row[0]).strip())
param_lows.append(float(row[1]))
param_highs.append(float(row[2]))
param_steps.append(float(row[3]))
except Exception:
continue
n_params = len(param_names)
if n_params == 0:
return pd.DataFrame({"提醒": ["請正確輸入至少一列參數"]}), None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None
for des in all_types:
df = gen_design(des, n_params, int(n_samples), param_lows, param_highs, param_steps, seed)
df.columns = param_names
# 主分布圖
if n_params == 2:
mainfig = plot_scatter_2d(df, des + " 分布圖")
elif n_params == 3:
mainfig = plot_scatter_3d(df, des + " 分布圖")
elif n_params >= 4 and n_params <= 8:
mainfig = plot_pairplot(df, des + " Pairplot")
else:
mainfig = plot_pca(df, des + " PCA")
with tempfile.NamedTemporaryFile(delete=False, suffix=f'_{des}_design.csv', mode='w', encoding='utf-8-sig') as tmpfile:
df.to_csv(tmpfile, index=False)
outs.extend([df, mainfig, tmpfile.name, get_viz_desc("DoE Main"), auto_conclude_viz(df, "DoE Main")])
return outs
# [進階DoE: Box-Behnken/CCD設計]
# = 將標準化DoE設計矩陣轉換為實際參數 =
def doe_normal_to_actual(doe_matrix, param_info):
df = pd.DataFrame(doe_matrix)
df_out = pd.DataFrame()
param_names = []
for i, info in enumerate(param_info):
name, pmin, pmax, *_ = info
param_names.append(name)
pmin = float(pmin)
pmax = float(pmax)
vals = df.iloc[:, i].values
mid = (pmin + pmax) / 2
half_range = (pmax - pmin) / 2
df_out[name] = mid + vals * half_range
df_out = df_out.round(6)
return df_out
# = 產生Box-Behnken或CCD設計法的標準化/實際參數表 =
def advanced_doe_with_mapping(param_table, design_type):
param_list = []
if isinstance(param_table, pd.DataFrame):
values = param_table.values.tolist()
else:
values = param_table
for row in values:
try:
if not str(row[0]).strip(): continue
param_list.append([row[0], float(row[1]), float(row[2]), float(row[3])])
except Exception:
continue
n_param = len(param_list)
if n_param < 2:
return (pd.DataFrame({"提醒":["請至少輸入2個參數"]}),
pd.DataFrame(),
None, None)
# 產生DoE矩陣
if design_type == "Box-Behnken":
mat = bbdesign(n_param, center=1)
elif design_type == "CCD":
mat = ccdesign(n_param, center=(1,1), face='ccc')
else:
return (pd.DataFrame({"提醒":["不支援的設計法"]}),
pd.DataFrame(),
None, None)
# 產生標準化表
colnames = [f"X{i+1}" for i in range(n_param)]
df_std = pd.DataFrame(mat, columns=colnames)
# 實際參數表
df_real = doe_normal_to_actual(mat, param_list)
# CSV暫存檔
std_fd, std_path = tempfile.mkstemp(suffix="_std.csv")
df_std.to_csv(std_path, index=False)
real_fd, real_path = tempfile.mkstemp(suffix="_real.csv")
df_real.to_csv(real_path, index=False)
return df_std, df_real, std_path, real_path
# [AI建模/特徵重要性/SHAP]
# = 獲取模型特徵重要性圖與摘要 =
def get_feature_importance(model, feature_names):
if hasattr(model, "feature_importances_"):
importances = model.feature_importances_
indices = np.argsort(importances)[::-1]
fig = px.bar(
x=[feature_names[i] for i in indices],
y=importances[indices],
orientation='v', title="特徵重要性(Feature Importance)",
labels={"x":"特徵","y":"重要性"}
)
top3 = ", ".join([feature_names[i] for i in indices[:3]])
summary = f"最重要特徵前三名:{top3}"
return fig, summary
return None, "此模型無 feature_importances_"
# = 產生SHAP特徵解釋圖像檔 =
def get_shap_summary(model, X, feature_names):
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X)
plt.figure(figsize=(7,4))
shap.summary_plot(shap_values, X, feature_names=feature_names, show=False)
buf = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
plt.tight_layout()
plt.savefig(buf, format="png", bbox_inches='tight')
plt.close()
return buf.name
# = 訓練多模型、產生預測與特徵重要性/SHAP圖,返回各指標 =
def train_and_predict_with_importance(datafile, algos, test_ratio, show_importance=True, show_shap=False):
if datafile is None or algos is None or len(algos) == 0:
return None, pd.DataFrame({"提醒": ["請上傳DoE資料並選擇演算法"]}), "", "", None, ""
df = pd.read_csv(datafile.name if hasattr(datafile, "name") else datafile)
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_ratio, random_state=42)
feature_names = list(df.columns[:-1])
results = []
y_pred_dict = {}
outlines = []
feature_fig = None
feature_summary = ""
shap_img = None
for algo in algos:
try:
# 使用標準化管線讓小資料集下更穩定
if algo == "Random Forest":
model = RandomForestRegressor(n_estimators=150, random_state=0)
elif algo == "XGBoost":
model = xgb.XGBRegressor(n_estimators=120, random_state=0, verbosity=0)
elif algo == "LightGBM":
model = lgb.LGBMRegressor(n_estimators=120, random_state=0)
elif algo == "SVR":
model = make_pipeline(StandardScaler(), SVR())
elif algo == "Linear Regression":
model = make_pipeline(StandardScaler(), LinearRegression())
elif algo == "Lasso":
model = make_pipeline(StandardScaler(), Lasso(alpha=0.0001, max_iter=5000))
elif algo == "Ridge":
model = make_pipeline(StandardScaler(), Ridge())
elif algo == "ElasticNet":
model = make_pipeline(StandardScaler(), ElasticNet(alpha=0.0001))
else:
continue
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# 避免y_pred為scalar
if np.isscalar(y_pred):
y_pred = np.full_like(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
results.append({
"模型": algo,
"測試RMSE": rmse,
"測試R²": r2,
"訓練數": len(X_train),
"測試數": len(X_test),
})
y_pred_dict[algo] = y_pred
outlines.append(auto_conclude_ai(y_test, y_pred, algo))
if show_importance and feature_fig is None and hasattr(getattr(model, "named_steps", model), "feature_importances_"):
base_model = model.named_steps.get(list(model.named_steps)[-1], model) if hasattr(model, "named_steps") else model
feature_fig, feature_summary = get_feature_importance(base_model, feature_names)
if show_shap and shap_img is None and hasattr(getattr(model, "named_steps", model), "feature_importances_"):
base_model = model.named_steps.get(list(model.named_steps)[-1], model) if hasattr(model, "named_steps") else model
shap_img = get_shap_summary(base_model, X_test, feature_names)
except Exception as e:
print(f"模型 {algo} 失敗: {e}")
continue
res_df = pd.DataFrame(results)
model_colors = {
"Random Forest": "#7c82f6", "XGBoost": "#ff686b", "LightGBM": "#54b984", "SVR": "#7e4a99",
"Linear Regression": "#229aff", "Lasso": "#f8d90f", "Ridge": "#9edafe", "ElasticNet": "#f9a15b"
}
model_markers = {
"Random Forest": "circle", "XGBoost": "diamond", "LightGBM": "triangle-up", "SVR": "square",
"Linear Regression": "star", "Lasso": "cross", "Ridge": "hexagon", "ElasticNet": "x"
}
fig = px.scatter()
for algo, y_pred in y_pred_dict.items():
# 只畫有效的預測點
if y_pred is not None and len(y_pred) == len(y_test) and not np.isnan(y_pred).all():
fig.add_scatter(
x=y_test, y=y_pred, mode='markers', name=algo,
marker=dict(
size=13, color=model_colors.get(algo, "#888"),
symbol=model_markers.get(algo, "circle"),
line=dict(width=1.5, color="#222")
),
showlegend=True
)
minv, maxv = np.min(y_test), np.max(y_test)
fig.add_scatter(
x=[minv, maxv], y=[minv, maxv], mode='lines', name='Ideal',
line=dict(dash='dash', color='black'), showlegend=True
)
fig.update_layout(
title="Test Set Prediction(預測/實際)",
xaxis_title="True Output", yaxis_title="Predicted",
legend=dict(font=dict(size=17)),
margin=dict(l=40, r=20, t=60, b=40)
)
fig.update_yaxes(scaleanchor="x", scaleratio=1)
return fig, res_df, get_viz_desc("AI Predict"), "<br>".join(outlines), feature_fig, shap_img
# [多圖資料視覺化/自動建議卡片]
# = 產生多種資料視覺化圖與摘要(最多五種)=
def multi_viz(file, plot_types):
# 準備三個 list 裝圖、描述、總結
figs, descs, sums = [], [], []
# 防呆:如果沒檔案或沒選圖,直接回傳 15 個空 output
if file is None or not plot_types:
return [None, "", ""] * 5
# 載入資料
df = pd.read_csv(file.name if hasattr(file, "name") else file)
plot_types = plot_types[:5] # 最多五種
# 每一種圖型各自 try,出錯補空
for t in plot_types:
fig, desc, summ = None, "", ""
try:
if t == "Heatmap":
fig = px.imshow(df.corr(), text_auto=True, title="相關係數Heatmap")
desc = get_viz_desc("Heatmap")
summ = auto_conclude_viz(df, "Heatmap")
elif t == "Pairplot":
fig = px.scatter_matrix(df, title="資料 Pairplot")
desc = get_viz_desc("Pairplot")
summ = auto_conclude_viz(df, "Pairplot")
elif t == "Histogram":
fig = px.histogram(df, nbins=10, title="資料 Histogram")
desc = get_viz_desc("Histogram")
summ = auto_conclude_viz(df, "Histogram")
elif t == "Scatter Matrix":
fig = px.scatter_matrix(df, title="Scatter Matrix")
desc = get_viz_desc("Scatter Matrix")
summ = auto_conclude_viz(df, "Scatter Matrix")
elif t == "PCA":
X = df.values
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)
df_pca = pd.DataFrame(X_pca, columns=['PCA1', 'PCA2'])
fig = px.scatter(df_pca, x='PCA1', y='PCA2', title="PCA降維")
desc = get_viz_desc("PCA")
summ = auto_conclude_viz(df, "PCA")
except Exception as e:
print(f"[multi_viz error]: {t}", e)
fig, desc, summ = None, "", ""
figs.append(fig)
descs.append(desc)
sums.append(summ)
# 不足5組的話補空
while len(figs) < 5:
figs.append(None)
descs.append("")
sums.append("")
# 將三個 list interleave 成 [fig, desc, sum, ...]
outs = []
for i in range(5):
outs.extend([figs[i], descs[i], sums[i]])
# Debug:確認長度一定是15
print("multi_viz 輸出數量:", len(outs))
assert len(outs) == 15, f"multi_viz 輸出長度異常:{len(outs)}"
return outs
# = 根據資料欄位自動產生系統建議 =
def auto_recommendation(file):
df = pd.read_csv(file.name if hasattr(file, "name") else file)
tips = []
for col in df.columns:
if df[col].isnull().sum() > 0:
tips.append(f"「{col}」有缺值,建議補值或刪除")
if df[col].nunique() == 1:
tips.append(f"「{col}」為常數欄,建議刪除")
if pd.api.types.is_numeric_dtype(df[col]):
sk = skew(df[col].dropna())
if abs(sk) > 2:
tips.append(f"「{col}」嚴重偏態(skew={sk:.2f}),建議標準化")
cor = df.corr().abs()
for c1 in cor.columns:
for c2 in cor.columns:
if c1 != c2 and cor.loc[c1, c2] > 0.8:
tips.append(f"「{c1}」與「{c2}」高度相關,建議後續特徵選擇")
if not tips:
tips = ["資料品質良好,無明顯異常。"]
else:
tips = tips[:5]
return "<b>系統建議:</b><ul style='margin:0 0 0 12px'>" + "".join([f"<li>{t}</li>" for t in tips]) + "</ul>"
# = 多圖視覺化+自動建議卡片綁定 =
def multi_viz_and_recommend(file, plot_types):
vis = multi_viz(file, plot_types)
recomm = auto_recommendation(file)
print("vis型態", type(vis), "vis長度", len(vis))
result = (*vis, recomm)
print("最終回傳型態", type(result), "長度", len(result))
return result
with gr.Blocks() as demo:
upfile2 = gr.File(label="上傳檔案")
plot_select = gr.CheckboxGroup(
choices=["Heatmap", "Pairplot", "Histogram", "Scatter Matrix", "PCA"],
value=["Heatmap", "Pairplot", "Histogram", "PCA"],
label="選擇圖像類型"
)
vis_outs = [gr.Plot() for _ in range(5)] + [gr.Markdown() for _ in range(10)]
recomm_card = gr.Markdown()
vizbtn = gr.Button("產生多圖分析", elem_classes=["main-btn"])
vizbtn.click(
lambda f, t: (*multi_viz(f, t), auto_recommendation(f)),
inputs=[upfile2, plot_select],
outputs=vis_outs + [recomm_card]
)
# [貝葉斯優化/超參數搜尋]
model_spaces = {
"XGBoost": (
XGBRegressor(verbosity=0, random_state=42),
{
"max_depth": Integer(2, 10),
"n_estimators": Integer(50, 300),
"learning_rate": Real(0.01, 0.2, prior="log-uniform")
}
),
"Random Forest": (
RandomForestRegressor(random_state=42),
{
"max_depth": Integer(2, 15),
"n_estimators": Integer(50, 300)
}
),
"LightGBM": (
LGBMRegressor(random_state=42),
{
"max_depth": Integer(2, 15),
"n_estimators": Integer(50, 300),
"learning_rate": Real(0.01, 0.2, prior="log-uniform")
}
),
"SVR": (
SVR(),
{
"C": Real(0.01, 100, prior="log-uniform"),
"gamma": Real(0.001, 1.0, prior="log-uniform")
}
)
}
# = 格式化最佳參數組合摘要(Markdown)=
def format_best_params(best_params, best_score):
txt = "<b>最佳參數組合:</b><ul style='margin:0 0 0 16px;'>"
for k, v in best_params.items():
txt += f"<li><b>{k}</b>: {v}</li>"
txt += f"</ul><b>最佳CV RMSE:</b> {best_score:.4f}"
return txt
# = 單模型貝葉斯優化超參數、產生收斂曲線與自動摘要 =
def run_bayes_optimization(datafile, model_name, n_iter=20):
if datafile is None:
return "請上傳CSV資料", None, "", ""
df = pd.read_csv(datafile.name if hasattr(datafile, "name") else datafile)
X, y = df.iloc[:, :-1], df.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
model, search_space = model_spaces.get(model_name, (None, None))
if model is None:
return "不支援的模型", None, "", ""
if not search_space:
model.fit(X_train, y_train)
score = model.score(X_test, y_test)
return f"{model_name}為單純模型,無可優化超參數。\n測試集R2={score:.3f}", None, "", ""
opt = BayesSearchCV(
model,
search_spaces=search_space,
n_iter=n_iter,
scoring='neg_root_mean_squared_error',
cv=3,
n_jobs=-1,
random_state=42,
verbose=0
)
opt.fit(X_train, y_train)
best_params = opt.best_params_
best_score = -opt.best_score_
results = opt.cv_results_["mean_test_score"]
rmse_curve = -1 * results # 轉正的RMSE
fig, ax = plt.subplots(figsize=(6,3))
ax.plot(rmse_curve, marker='o')
ax.set_title("優化歷程 (CV RMSE)")
ax.set_xlabel("Iteration")
ax.set_ylabel("CV RMSE")
ax.grid(True)
plt.tight_layout()
plt.close(fig)
# === 自動判讀收斂趨勢,產生 markdown ===
auto_summary = auto_conclude_bayes_curve(rmse_curve, model_name)
return (
format_best_params(best_params, best_score), # 超參數最佳組合摘要(markdown)
fig, # RMSE收斂圖
get_viz_desc("Bayesian Optimization"), # 圖說
auto_summary # << 這裡就是自動結論
)
# = 多模型同時Bayes優化,產生多條收斂曲線 =
def run_multi_bayes_optimization(datafile, model_types, n_iter=20):
# UI要的模型順序
MODEL_ORDER = ["Random Forest", "XGBoost", "LightGBM", "SVR"]
N_PER_MODEL = 3 # [fig, summary, best_param]
if datafile is None or not model_types:
# 沒資料或沒選模型,直接全空
return [None] + [None, "", ""] * len(MODEL_ORDER)
df = pd.read_csv(datafile.name if hasattr(datafile, "name") else datafile)
X, y = df.iloc[:, :-1], df.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
rmse_curves = {}
summaries = {}
best_params_dict = {}
for mtype in model_types:
model, search_space = model_spaces.get(mtype, (None, None))
if model is None or not search_space:
rmse_curves[mtype] = [np.nan]
summaries[mtype] = f"{mtype} 無法執行最佳化"
best_params_dict[mtype] = f"{mtype} 不支援"
continue
opt = BayesSearchCV(
model,
search_spaces=search_space,
n_iter=n_iter,
scoring='neg_root_mean_squared_error',
cv=3,
n_jobs=-1,
random_state=42,
verbose=0
)
opt.fit(X_train, y_train)
rmse_curve = -1 * opt.cv_results_["mean_test_score"]
rmse_curves[mtype] = rmse_curve
summaries[mtype] = auto_conclude_bayes_curve(rmse_curve, mtype)
best_params_dict[mtype] = format_best_params(opt.best_params_, -opt.best_score_)
# 多模型共線互動圖
fig = go.Figure()
for mtype in model_types:
curve = rmse_curves.get(mtype, [])
fig.add_trace(go.Scatter(
x=list(range(len(curve))),
y=curve,
mode='lines+markers',
name=mtype
))
fig.update_layout(
title="所有模型Bayes優化歷程 (CV RMSE)",
xaxis_title="Iteration",
yaxis_title="CV RMSE",
hovermode="x unified"
)
outs = [fig]
# 確保 outputs 長度一定固定
for mtype in MODEL_ORDER:
if mtype in model_types:
# 有選就給內容
curve = rmse_curves.get(mtype, [])
fig_tab = go.Figure(go.Scatter(
x=list(range(len(curve))),
y=curve,
mode='lines+markers',
name=mtype
))
summary = summaries.get(mtype, "")
best_param = best_params_dict.get(mtype, "")
outs.extend([fig_tab, summary, best_param])
else:
outs.extend([None, "", ""])
return outs
# [AI搜尋最佳化條件]
# = 自動判斷欄位型態(連續/離散)以建立搜尋空間 =
def suggest_optimization_space(df, discrete_cols=[]):
space = {}
for col in df.columns[:-1]:
if col in discrete_cols:
vals = sorted(df[col].unique())
space[col] = Categorical(vals)
else:
vmin, vmax = df[col].min(), df[col].max()
if len(df[col].unique()) < 8:
space[col] = Categorical(sorted(df[col].unique()))
else:
space[col] = Real(vmin, vmax)
return space
# = 搜尋單模型之預測最佳/最差條件組合 =
def find_best_feature(model, X, maximize=True):
print(">>> maximize=", maximize) # Debug 印出
# 建立每個feature的上下限作為搜尋範圍
bounds = []
for i in range(X.shape[1]):
vmin, vmax = X.iloc[:, i].min(), X.iloc[:, i].max()
bounds.append((vmin, vmax))
def obj(x):
y_pred = model.predict(np.array(x).reshape(1, -1))[0]
print("obj() y_pred:", y_pred, "| maximize:", maximize)
return -y_pred if maximize else y_pred
x0 = X.mean().values
res = minimize(obj, x0, bounds=bounds)
best_x = res.x
best_pred = -res.fun if maximize else res.fun
return best_x, best_pred
# = 多模型搜尋最佳化參數組合(最大化/最小化)=
def optimize_conditions(datafile, model_types, direction, is_discrete=False, n_iter=32):
if datafile is None or not model_types:
return pd.DataFrame({"提醒": ["請上傳DoE數據與選擇模型"]}), "", "", "" # 四個 output
df = pd.read_csv(datafile.name if hasattr(datafile, "name") else datafile)
X, y = df.iloc[:, :-1], df.iloc[:, -1]
maximize = (direction == "最大化")
summary = []
all_results = []
for mtype in model_types:
if mtype == "Random Forest":
model = RandomForestRegressor(n_estimators=160, random_state=42)
elif mtype == "XGBoost":
model = XGBRegressor(n_estimators=100, random_state=42, verbosity=0)
elif mtype == "LightGBM":
model = LGBMRegressor(n_estimators=100, random_state=42)
elif mtype == "SVR":
model = make_pipeline(StandardScaler(), SVR())
elif mtype == "Linear Regression":
model = make_pipeline(StandardScaler(), LinearRegression())
elif mtype == "Lasso":
model = make_pipeline(StandardScaler(), Lasso(alpha=0.0001, max_iter=5000))
elif mtype == "Ridge":
model = make_pipeline(StandardScaler(), Ridge())
elif mtype == "ElasticNet":
model = make_pipeline(StandardScaler(), ElasticNet(alpha=0.0001))
else:
continue
model.fit(X, y)
if "大" in str(direction): # 有「大」字就是最大化
maximize = True
else:
maximize = False
best_x, best_pred = find_best_feature(model, X, maximize=maximize)
feature_dict = {k: float(v) for k, v in zip(X.columns, best_x)}
feature_dict["模型"] = mtype
feature_dict["預測產率"] = float(best_pred)
all_results.append(feature_dict)
summary.append(
f"【{mtype}】最佳反應條件:{feature_dict}<br>預測產率/目標值:<b>{best_pred:.3g}</b><br>"
)
df_out = pd.DataFrame(all_results)
txt = "<br>".join(summary)
desc = "" # 可寫 "依據多模型搜尋理論極值"
sum_ = "" # 你可以回傳模型特性、附註等
return df_out, txt, desc, sum_
# [AI模型訓練前後回歸比較]
# = 單一y,對比多模型在前後資料集的R2/RMSE與特徵重要性變化 =
def compare_models_before_after(old_csv, new_csv, model_linear, model_nonlinear, target):
try:
# 合併用戶選的模型
model_types = (model_linear or []) + (model_nonlinear or [])
if old_csv is None or new_csv is None or not model_types:
return "⚠️ 請上傳原始/合併DoE並選擇模型", pd.DataFrame(), None, None
old_df = pd.read_csv(old_csv.name if hasattr(old_csv, "name") else old_csv)
new_df = pd.read_csv(new_csv.name if hasattr(new_csv, "name") else new_csv)
if target not in old_df.columns or target not in new_df.columns:
return f"⚠️ 缺少 '{target}' 欄位", pd.DataFrame(), None, None
X_old, y_old = old_df.drop(columns=[target]), old_df[target]
X_new, y_new = new_df.drop(columns=[target]), new_df[target]
cv_num = min(5, len(y_old), len(y_new))
if cv_num < 2:
return "⚠️ 資料筆數太少,無法交叉驗證(至少需2列)", pd.DataFrame(), None, None
rows = []
importances = {}
for mtype in model_types:
if mtype == "Random Forest":
model1 = RandomForestRegressor(n_estimators=120, random_state=42)
model2 = RandomForestRegressor(n_estimators=120, random_state=42)
elif mtype == "XGBoost":
model1 = XGBRegressor(n_estimators=100, random_state=42, verbosity=0)
model2 = XGBRegressor(n_estimators=100, random_state=42, verbosity=0)
elif mtype == "LightGBM":
model1 = LGBMRegressor(n_estimators=100, random_state=42)
model2 = LGBMRegressor(n_estimators=100, random_state=42)
elif mtype == "SVR":
model1 = make_pipeline(StandardScaler(), SVR())
model2 = make_pipeline(StandardScaler(), SVR())
elif mtype == "Linear Regression":
model1 = make_pipeline(StandardScaler(), LinearRegression())
model2 = make_pipeline(StandardScaler(), LinearRegression())
elif mtype == "Lasso":
model1 = make_pipeline(StandardScaler(), Lasso(alpha=0.0001, max_iter=5000))
model2 = make_pipeline(StandardScaler(), Lasso(alpha=0.0001, max_iter=5000))
elif mtype == "Ridge":
model1 = make_pipeline(StandardScaler(), Ridge())
model2 = make_pipeline(StandardScaler(), Ridge())
elif mtype == "ElasticNet":
model1 = make_pipeline(StandardScaler(), ElasticNet(alpha=0.0001))
model2 = make_pipeline(StandardScaler(), ElasticNet(alpha=0.0001))
else:
continue
try:
# R² (CV)
r2_old = cross_val_score(model1, X_old, y_old, cv=cv_num, scoring="r2").mean()
r2_new = cross_val_score(model2, X_new, y_new, cv=cv_num, scoring="r2").mean()
# RMSE (fit on all, compute on all)
model1.fit(X_old, y_old)
model2.fit(X_new, y_new)
pred_old = model1.predict(X_old)
pred_new = model2.predict(X_new)
rmse_old = np.sqrt(np.mean((pred_old - y_old) ** 2))
rmse_new = np.sqrt(np.mean((pred_new - y_new) ** 2))
# feature importance
if hasattr(model1, "feature_importances_"):
imp_old = model1.feature_importances_
imp_new = model2.feature_importances_
elif hasattr(model1, "named_steps") and "randomforestregressor" in model1.named_steps:
imp_old = model1.named_steps["randomforestregressor"].feature_importances_
imp_new = model2.named_steps["randomforestregressor"].feature_importances_
elif hasattr(model1, "coef_"):
imp_old = np.abs(model1.coef_)
imp_new = np.abs(model2.coef_)
elif hasattr(model1, "named_steps") and "linearregression" in model1.named_steps:
imp_old = np.abs(model1.named_steps["linearregression"].coef_)
imp_new = np.abs(model2.named_steps["linearregression"].coef_)
else:
imp_old = np.zeros(X_old.shape[1])
imp_new = np.zeros(X_new.shape[1])
rows.append({
"模型": mtype,
"原始R2": r2_old,
"合併新點R2": r2_new,
"原始RMSE": rmse_old,
"合併新點RMSE": rmse_new
})
# 只有計算成功的才存 importance
importances[mtype+"_old"] = imp_old
importances[mtype+"_new"] = imp_new
except Exception as model_e:
rows.append({
"模型": mtype,
"原始R2": f"Error: {model_e}",
"合併新點R2": f"Error: {model_e}",
"原始RMSE": f"Error: {model_e}",
"合併新點RMSE": f"Error: {model_e}"
})
# 不把異常模型 importance 放進去
print(f"{mtype} failed: {model_e}")
continue
table = pd.DataFrame(rows)
# ========== Feature Importance Plot (只畫有用的) ==========
fig_fi = go.Figure()
features = list(X_old.columns)
colors = [
"#4E79A7", "#F28E2B", "#76B7B2", "#E15759",
"#59A14F", "#EDC948", "#B07AA1", "#FF9DA7"
]
plot_count = 0 # 用於 color 依序遞增
for mtype in model_types:
k_old = f"{mtype}_old"
k_new = f"{mtype}_new"
# 只畫 importance > 0 的
if k_old in importances and np.sum(importances[k_old]) > 0:
fig_fi.add_trace(go.Bar(
x=features, y=importances[k_old],
name=f"{mtype} - Before", marker_color=colors[plot_count % len(colors)], opacity=0.75
))
plot_count += 1
if k_new in importances and np.sum(importances[k_new]) > 0:
fig_fi.add_trace(go.Bar(
x=features, y=importances[k_new],
name=f"{mtype} - After", marker_color=colors[plot_count % len(colors)], opacity=0.4
))
plot_count += 1
fig_fi.update_layout(
barmode="group",
title="Feature Importance Comparison",
xaxis_title="Feature",
yaxis_title="Importance",
legend_title="Model",
font=dict(size=13)
)
# ========== y Distribution Plot ==========
fig_y = go.Figure()
fig_y.add_trace(go.Histogram(
x=old_df[target], name="Before", opacity=0.7, nbinsx=16
))
fig_y.add_trace(go.Histogram(
x=new_df[target], name="After", opacity=0.7, nbinsx=16
))
fig_y.update_layout(
barmode='overlay',
title="y Distribution Comparison",
xaxis_title=target,
yaxis_title="Count",
legend_title="Dataset",
font=dict(size=13)
)
return "", table, fig_fi, fig_y
except Exception as e:
import traceback
tb = traceback.format_exc()
print("=== DEBUG ERROR ===")
print(tb)
return f"❌ 系統發生錯誤:{str(e)}", pd.DataFrame(), None, None
# = 根據R2變化自動產生效能總結摘要 =
def generate_r2_summary(table):
lines = []
for idx, row in table.iterrows():
model = row['模型']
try:
r2_before = float(row['原始R2'])
r2_after = float(row['合併新點R2'])
delta = r2_after - r2_before
if r2_after < r2_before - 0.1:
lines.append(f"**{model}:表現顯著下降(R² {r2_before:.2f}{r2_after:.2f})**")
elif r2_after > r2_before + 0.1:
lines.append(f"{model}:模型表現提升(R² {r2_before:.2f}{r2_after:.2f})")
elif abs(delta) < 0.1:
lines.append(f"{model}:R²無明顯變化(R² {r2_before:.2f}{r2_after:.2f})")
except Exception:
lines.append(f"{model}:計算失敗或資料不足。")
if not lines:
lines = ["無有效模型結果。"]
return "### AI模型R²比較摘要\n" + "\n".join(lines)
# = 整合前後回歸比較結果與自動摘要 =
def compare_models_before_after_with_summary(old_csv, new_csv, model_types, target="y"):
result = compare_models_before_after(old_csv, new_csv, model_types, target)
table = result[0]
summary = generate_r2_summary(table)
return (*result, summary)
# = 多y目標下,對比多模型在前後資料集的效能與特徵重要性變化 =
def compare_models_multi_y_before_after(old_csv, new_csv, model_types, targets):
# 防呆
if old_csv is None or new_csv is None or not model_types or not targets:
return {}, {}, {}, "請確認已上傳檔案、選擇模型與目標欄位"
old_df = pd.read_csv(old_csv.name if hasattr(old_csv, "name") else old_csv)
new_df = pd.read_csv(new_csv.name if hasattr(new_csv, "name") else new_csv)
if isinstance(targets, str): targets = [targets]
result_tables, feature_figs, ydist_figs = {}, {}, {}
summary_lines = []
for target in targets:
if target not in old_df.columns or target not in new_df.columns:
summary_lines.append(f"❌ 欄位 {target} 在資料中不存在,略過。")
continue
X_old, y_old = old_df.drop(columns=[target]), old_df[target]
X_new, y_new = new_df.drop(columns=[target]), new_df[target]
cv_num = min(5, len(y_old), len(y_new))
if cv_num < 2:
summary_lines.append(f"⚠️ {target} 資料筆數不足無法交叉驗證。")
continue
rows, importances = [], {}
for mtype in model_types:
# 建立模型
if mtype == "Random Forest":
model1 = RandomForestRegressor(n_estimators=120, random_state=42)
model2 = RandomForestRegressor(n_estimators=120, random_state=42)
elif mtype == "XGBoost":
model1 = XGBRegressor(n_estimators=100, random_state=42, verbosity=0)
model2 = XGBRegressor(n_estimators=100, random_state=42, verbosity=0)
elif mtype == "LightGBM":
model1 = LGBMRegressor(n_estimators=100, random_state=42)
model2 = LGBMRegressor(n_estimators=100, random_state=42)
elif mtype == "SVR":
model1 = make_pipeline(StandardScaler(), SVR())
model2 = make_pipeline(StandardScaler(), SVR())
elif mtype == "Linear Regression":
model1 = make_pipeline(StandardScaler(), LinearRegression())
model2 = make_pipeline(StandardScaler(), LinearRegression())
elif mtype == "Lasso":
model1 = make_pipeline(StandardScaler(), Lasso(alpha=0.0001, max_iter=5000))
model2 = make_pipeline(StandardScaler(), Lasso(alpha=0.0001, max_iter=5000))
elif mtype == "Ridge":
model1 = make_pipeline(StandardScaler(), Ridge())
model2 = make_pipeline(StandardScaler(), Ridge())
elif mtype == "ElasticNet":
model1 = make_pipeline(StandardScaler(), ElasticNet(alpha=0.0001))
model2 = make_pipeline(StandardScaler(), ElasticNet(alpha=0.0001))
else:
continue
try:
r2_old = cross_val_score(model1, X_old, y_old, cv=cv_num, scoring="r2").mean()
r2_new = cross_val_score(model2, X_new, y_new, cv=cv_num, scoring="r2").mean()
model1.fit(X_old, y_old)
model2.fit(X_new, y_new)
pred_old = model1.predict(X_old)
pred_new = model2.predict(X_new)
rmse_old = np.sqrt(np.mean((pred_old - y_old) ** 2))
rmse_new = np.sqrt(np.mean((pred_new - y_new) ** 2))
# feature importance
if hasattr(model1, "feature_importances_"):
imp_old = model1.feature_importances_
imp_new = model2.feature_importances_
elif hasattr(model1, "named_steps") and "randomforestregressor" in model1.named_steps:
imp_old = model1.named_steps["randomforestregressor"].feature_importances_
imp_new = model2.named_steps["randomforestregressor"].feature_importances_
elif hasattr(model1, "coef_"):
imp_old = np.abs(model1.coef_)
imp_new = np.abs(model2.coef_)
elif hasattr(model1, "named_steps") and "linearregression" in model1.named_steps:
imp_old = np.abs(model1.named_steps["linearregression"].coef_)
imp_new = np.abs(model2.named_steps["linearregression"].coef_)
else:
imp_old = np.zeros(X_old.shape[1])
imp_new = np.zeros(X_new.shape[1])
rows.append({
"模型": mtype,
"原始R2": r2_old,
"合併新點R2": r2_new,
"原始RMSE": rmse_old,
"合併新點RMSE": rmse_new
})
importances[mtype+"_old"] = imp_old
importances[mtype+"_new"] = imp_new
except Exception as model_e:
rows.append({
"模型": mtype,
"原始R2": f"Error: {model_e}",
"合併新點R2": f"Error: {model_e}",
"原始RMSE": f"Error: {model_e}",
"合併新點RMSE": f"Error: {model_e}"
})
table = pd.DataFrame(rows)
result_tables[target] = table
# 特徵重要性圖
features = list(X_old.columns)
fig_fi = go.Figure()
colors = [
"#4E79A7", "#F28E2B", "#76B7B2", "#E15759",
"#59A14F", "#EDC948", "#B07AA1", "#FF9DA7"
]
plot_count = 0
for mtype in model_types:
k_old = f"{mtype}_old"
k_new = f"{mtype}_new"
if k_old in importances and np.sum(importances[k_old]) > 0:
fig_fi.add_trace(go.Bar(
x=features, y=importances[k_old],
name=f"{mtype} - Before", marker_color=colors[plot_count % len(colors)], opacity=0.75
))
plot_count += 1
if k_new in importances and np.sum(importances[k_new]) > 0:
fig_fi.add_trace(go.Bar(
x=features, y=importances[k_new],
name=f"{mtype} - After", marker_color=colors[plot_count % len(colors)], opacity=0.4
))
plot_count += 1
fig_fi.update_layout(
barmode="group",
title=f"{target} Feature Importance Comparison",
xaxis_title="Feature",
yaxis_title="Importance",
legend_title="Model",
font=dict(size=13)
)
feature_figs[target] = fig_fi
# y分布圖
fig_y = go.Figure()
fig_y.add_trace(go.Histogram(
x=old_df[target], name="Before", opacity=0.7, nbinsx=16
))
fig_y.add_trace(go.Histogram(
x=new_df[target], name="After", opacity=0.7, nbinsx=16
))
fig_y.update_layout(
barmode='overlay',
title=f"{target} y Distribution Comparison",
xaxis_title=target,
yaxis_title="Count",
legend_title="Dataset",
font=dict(size=13)
)
ydist_figs[target] = fig_y
# 摘要
# 給摘要更精簡
for _, row in table.iterrows():
try:
r2_before = float(row['原始R2'])
r2_after = float(row['合併新點R2'])
model = row['模型']
delta = r2_after - r2_before
if r2_after < r2_before - 0.1:
summary_lines.append(f"<b>{target} - {model}</b>:顯著下降(R² {r2_before:.2f}{r2_after:.2f})")
elif r2_after > r2_before + 0.1:
summary_lines.append(f"<b>{target} - {model}</b>:提升(R² {r2_before:.2f}{r2_after:.2f})")
elif abs(delta) < 0.1:
summary_lines.append(f"<b>{target} - {model}</b>:無明顯變化(R² {r2_before:.2f}{r2_after:.2f})")
except Exception:
summary_lines.append(f"<b>{target} - {row['模型']}</b>:計算失敗/資料不足")
# 回傳 dict,供 Tab 動態展示
return result_tables, feature_figs, ydist_figs, "### AI多 y 回歸比較摘要\n" + "<br>".join(summary_lines)
# = 自動根據關鍵字偵測數據集y欄位 =
def detect_y_columns(csv_file, keyword_str):
if csv_file is None:
return gr.update(choices=[], value=[])
try:
df = pd.read_csv(csv_file.name if hasattr(csv_file, "name") else csv_file)
keywords = [k.strip().lower() for k in keyword_str.split(",") if k.strip()]
cols = []
for c in df.columns:
# 關鍵字優先
if any(k in str(c).lower() for k in keywords):
cols.append(c)
# 若沒關鍵字命中,則用 fallback:所有數值欄位
if not cols:
cols = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
# 避免太多,最多只選4個
if len(cols) > 4:
cols = cols[-4:]
return gr.update(choices=cols, value=cols[:2])
except Exception:
return gr.update(choices=[], value=[])
# [3D/2D分布、反應面、等高線視覺化]
# = 3D散點圖 + 預測曲面圖(隨機森林擬合)=
def plot_3d_scatter_surface(
file,
x_col, y_col, z_col, target_col,
surface_fit=False, # 是否繪製曲面
n_grid=40 # 曲面分辨率
):
# 預設空圖
empty_fig = go.Figure()
empty_fig.update_layout(template="plotly_white")
if file is None or not x_col or not y_col or not z_col or not target_col:
return empty_fig, empty_fig
try:
df = pd.read_csv(file.name if hasattr(file, "name") else file)
# 防呆
if not all([c in df.columns for c in [x_col, y_col, z_col, target_col]]):
return empty_fig, empty_fig
# 三維散點圖
fig_scatter = px.scatter_3d(
df, x=x_col, y=y_col, z=z_col, color=target_col,
title=f"3D Scatter: {x_col}, {y_col}, {z_col} / Color={target_col}",
opacity=0.85
)
# 三維曲面圖
fig_surface = empty_fig
if surface_fit:
# 只取指定欄位
X = df[[x_col, y_col, z_col]].values
y = df[target_col].values
# fit一個隨機森林
model = RandomForestRegressor(n_estimators=80, random_state=0)
model.fit(X, y)
# 建立網格
x_lin = np.linspace(df[x_col].min(), df[x_col].max(), n_grid)
y_lin = np.linspace(df[y_col].min(), df[y_col].max(), n_grid)
z_lin = np.linspace(df[z_col].min(), df[z_col].max(), n_grid)
xx, yy, zz = np.meshgrid(x_lin, y_lin, z_lin)
grid_points = np.c_[xx.ravel(), yy.ravel(), zz.ravel()]
yy_pred = model.predict(grid_points)
# 用scatter3d畫預測點
fig_surface = go.Figure(data=[
go.Scatter3d(
x=grid_points[:,0], y=grid_points[:,1], z=grid_points[:,2],
mode='markers',
marker=dict(size=2.2, color=yy_pred, colorscale='Viridis', opacity=0.35),
name="預測表面"
)
])
fig_surface.update_layout(
title=f"3D Predicted Surface: {target_col} vs {x_col}, {y_col}, {z_col}",
scene=dict(
xaxis_title=x_col, yaxis_title=y_col, zaxis_title=z_col
)
)
return fig_scatter, fig_surface
except Exception as e:
# 除錯用途可log e,但前端只回傳空圖
return empty_fig, empty_fig
# = 自動解析欄位,更新3D視覺化的欄位選單 =
def update_dropdowns(file):
print("收到 file:", file)
if file is None:
print("沒有收到檔案")
return (["(請選擇)"], "(請選擇)"), (["(請選擇)"], "(請選擇)"), (["(請選擇)"], "(請選擇)"), (["(請選擇)"], "(請選擇)")
try:
path = file.name if hasattr(file, "name") else file
print("預計讀取路徑:", path)
print("檔案存在?", os.path.exists(path))
df = pd.read_csv(path)
cols = list(df.select_dtypes(include="number").columns)
if not cols:
cols = list(df.columns)
x_def = cols[0] if len(cols) > 0 else "(請選擇)"
y_def = cols[1] if len(cols) > 1 else x_def
z_def = cols[2] if len(cols) > 2 else x_def
t_def = cols[-1] if len(cols) > 0 else x_def
print("預設值:", x_def, y_def, z_def, t_def)
return (cols, x_def), (cols, y_def), (cols, z_def), (cols, t_def)
except Exception as e:
print("讀檔失敗:", e)
return (["(請選擇)"], "(請選擇)"), (["(請選擇)"], "(請選擇)"), (["(請選擇)"], "(請選擇)"), (["(請選擇)"], "(請選擇)")
# = 二變數反應面/等高線圖(3D Surface/Contour)=
def plot_surface_and_contour(file, x_col, y_col, z_col, n_grid=40):
empty_fig = go.Figure()
empty_fig.update_layout(template="plotly_white")
empty_fig2 = go.Figure()
empty_fig2.update_layout(template="plotly_white")
if file is None or not x_col or not y_col or not z_col:
return empty_fig, empty_fig2
try:
df = pd.read_csv(file.name if hasattr(file, "name") else file)
if not all([c in df.columns for c in [x_col, y_col, z_col]]):
return empty_fig, empty_fig2
x, y, z = df[x_col].values, df[y_col].values, df[z_col].values
# 建立格點
xi = np.linspace(x.min(), x.max(), n_grid)
yi = np.linspace(y.min(), y.max(), n_grid)
xi, yi = np.meshgrid(xi, yi)
zi = griddata((x, y), z, (xi, yi), method="cubic")
# Surface 3D 曲面圖
fig_surface = go.Figure(data=[
go.Surface(x=xi, y=yi, z=zi, colorscale="Viridis", opacity=0.93, showscale=True)
])
fig_surface.update_layout(
title=f"3D 曲面圖:{z_col} vs {x_col}, {y_col}",
scene=dict(
xaxis_title=x_col,
yaxis_title=y_col,
zaxis_title=z_col
),
margin=dict(l=0, r=0, b=0, t=40)
)
# Contour 等高線圖
fig_contour = go.Figure(data=[
go.Contour(
x=xi[0], y=yi[:,0], z=zi,
colorscale="Viridis",
contours=dict(showlabels=True),
colorbar=dict(title=z_col)
)
])
fig_contour.update_layout(
title=f"等高線圖:{z_col} vs {x_col}, {y_col}",
xaxis_title=x_col,
yaxis_title=y_col,
margin=dict(l=0, r=0, b=0, t=40)
)
return fig_surface, fig_contour
except Exception as e:
print(f"3D surface/contour plot error: {e}")
return empty_fig, empty_fig2
# [複合y批次多模型回歸/交互作用]
# = 根據模型名稱產生多y回歸可用的模型物件 =
def get_model(name):
if name == "Random Forest":
return MultiOutputRegressor(RandomForestRegressor(n_estimators=100, random_state=0))
elif name == "XGBoost":
return MultiOutputRegressor(XGBRegressor(n_estimators=100, random_state=0))
elif name == "PLS Regression":
return PLSRegression(n_components=2)
elif name == "Ridge":
return MultiOutputRegressor(Ridge())
elif name == "Lasso":
return MultiOutputRegressor(Lasso())
elif name == "ElasticNet":
return MultiOutputRegressor(ElasticNet())
elif name == "Linear Regression":
return MultiOutputRegressor(LinearRegression())
elif name == "SVR":
return MultiOutputRegressor(SVR())
else:
raise ValueError(f"Unknown model: {name}")
# = 根據關鍵字從資料中自動偵測多y欄位 =
def detect_y_columns(file, keywords_str):
import re
if file is None:
return gr.update(choices=[], value=[])
df = pd.read_csv(file.name if hasattr(file, 'name') else file)
keywords = [kw.strip() for kw in keywords_str.split(",") if kw.strip()]
patt = re.compile("|".join([re.escape(k) for k in keywords]), re.IGNORECASE)
y_candidates = [c for c in df.columns if patt.search(str(c))]
return gr.update(choices=list(df.columns), value=y_candidates)
# = 多y/交互作用/多模型的批次回歸主程式 =
def run_multi_y(before_file, after_file, linear, nonlinear, ylist, add_inter, add_y_inter, degree):
df = pd.read_csv(before_file.name if hasattr(before_file, 'name') else before_file)
if not ylist or not (linear or nonlinear):
return "請選擇目標y欄位與模型", *[""]*4, *[None]*12
X = df.drop(columns=ylist)
Y = df[ylist]
X = X.select_dtypes(include=[np.number])
# 1. 特徵交互作用
if add_inter and int(degree) > 1:
poly = PolynomialFeatures(degree=int(degree), interaction_only=True, include_bias=False)
X_inter = pd.DataFrame(poly.fit_transform(X), columns=poly.get_feature_names_out(X.columns))
else:
X_inter = X.copy()
# 2. y 交互作用
if add_y_inter and len(ylist) > 1:
for i in range(len(ylist)):
for j in range(i+1, len(ylist)):
Y[f"{ylist[i]}*{ylist[j]}"] = Y[ylist[i]] * Y[ylist[j]]
X_train, X_test, Y_train, Y_test = train_test_split(X_inter, Y, test_size=0.2, random_state=42)
# 3. 多模型分析
model_names = (linear or []) + (nonlinear or [])
results, tab_results = [], []
for m in model_names:
model = get_model(m)
model.fit(X_train, Y_train)
pred = model.predict(X_test)
if isinstance(pred, np.ndarray):
pred = pd.DataFrame(pred, columns=Y.columns)
else:
pred = pd.DataFrame(pred, columns=Y.columns)
scores = {y: r2_score(Y_test[y], pred[y]) for y in Y.columns}
rmses = {y: np.sqrt(mean_squared_error(Y_test[y], pred[y])) for y in Y.columns}
model_summary = pd.DataFrame({
"Model": [m]*len(Y.columns),
"y": list(Y.columns),
"R2": [scores[y] for y in Y.columns],
"RMSE": [rmses[y] for y in Y.columns]
})
results.append(model_summary)
for i, y in enumerate(Y.columns[:4]):
# ==== Robust 特徵重要性自動判斷 ====
if hasattr(model, "estimators_"):
est = model.estimators_[i]
else:
est = model
# 樹模型
if hasattr(est, "feature_importances_"):
importances = est.feature_importances_
# 線性模型
elif hasattr(est, "coef_"):
# 注意 shape
coef = est.coef_
if coef.ndim > 1:
importances = np.abs(coef[i])
else:
importances = np.abs(coef)
# PLS (Partial Least Squares)
elif hasattr(est, "x_weights_"):
importances = np.abs(est.x_weights_[:, 0])
# 其餘模型 fallback
else:
importances = np.zeros(X_inter.shape[1])
feat_names = X_inter.columns
# ====== 畫圖 =======
fig_feat = go.Figure([go.Bar(x=feat_names, y=importances)])
fig_feat.update_layout(title=f"{m} {y} Feature Importances", height=440)
fig_dist = go.Figure()
fig_dist.add_trace(go.Histogram(x=Y_test[y], name='True', opacity=0.7))
fig_dist.add_trace(go.Histogram(x=pred[y], name='Pred', opacity=0.7))
fig_dist.update_layout(barmode='overlay', title=f"{m} {y} True vs Pred Dist", height=440)
tab_results.append((f"【{m}】y: {y}", model_summary, fig_feat, fig_dist))
# 結果組合
out_titles, out_tables, out_feats, out_ydists = [], [], [], []
N=8
for i in range(N):
if i < len(tab_results):
tab = tab_results[i]
out_titles.append(tab[0])
out_tables.append(tab[1])
out_feats.append(tab[2])
out_ydists.append(tab[3])
else:
out_titles.append("")
out_tables.append(None)
out_feats.append(None)
out_ydists.append(None)
summary = pd.concat(results, ignore_index=True) if results else ""
return summary, *out_titles, *out_tables, *out_feats, *out_ydists
# [自動function管理核心]
import re
def extract_tab_ui_and_function(pyfile="app.py"):
"""
抓每個Tab所有UI元件,以及其是否綁定function/lambda/callback
"""
try:
with open(pyfile, encoding="utf-8") as f:
code = f.read()
except Exception as e:
return f"❌ 讀取 {pyfile} 失敗:{e}"
# (1) 抓所有 def
defs = set(re.findall(r"def\s+([a-zA-Z_][\w\d_]*)\s*\(", code))
# (2) 抓所有 Tab 區塊
tab_pattern = re.compile(
r'with gr\.Tab(?:Item)?\(\s*[\'"](.+?)[\'"]\s*\):([\s\S]*?)(?=with gr\.Tab|with gr\.TabItem|\Z)', re.MULTILINE)
# (3) UI元件建立(如 gr.Markdown、gr.Dataframe、gr.File、gr.Button...)
ui_pattern = re.compile(r'(gr\.[A-Za-z_]+)\s*\(')
# (4) 互動事件
event_pattern = re.compile(
r'([a-zA-Z_][\w\d_]*)\.(click|change|submit|select)\(\s*([a-zA-Z_][\w\d_]*|lambda)(.*?)(\)|$)', re.DOTALL)
output = "# 🧩 各Tab UI元件與function/lambda/callback mapping\n\n"
for m in tab_pattern.finditer(code):
tab_name, tab_code = m.group(1), m.group(2)
output += f"## {tab_name}\n"
# 1. 抓所有 UI元件名稱
ui_list = ui_pattern.findall(tab_code)
# 統計有幾種元件
ui_count = {}
for u in ui_list:
ui_count[u] = ui_count.get(u, 0) + 1
if ui_list:
output += "### 本Tab使用UI元件:\n"
for u in sorted(set(ui_list)):
output += f"- `{u}` x {ui_count[u]}\n"
else:
output += "- (本Tab沒有任何UI元件)\n"
# 2. 掃描本Tab互動callback
func_map = []
for ev in event_pattern.findall(tab_code):
obj, trigger, fn, args, _ = ev
if fn == "lambda":
func_map.append(f"{obj}.{trigger} → lambda(匿名)")
else:
func_map.append(f"{obj}.{trigger}{fn}()")
# callback 參數
cb_matches = re.findall(r'(_callback|_js|_preprocess|_postprocess)\s*=\s*([a-zA-Z_][\w\d_]*|lambda[^\),]*)', args)
for cb_type, cb_fn in cb_matches:
if cb_fn.strip().startswith("lambda"):
func_map.append(f"{obj}.{cb_type} → lambda")
else:
func_map.append(f"{obj}.{cb_type}{cb_fn.strip()}()")
if func_map:
output += "\n### 有callback的元件/方法:\n"
for item in func_map:
output += f"- {item}\n"
else:
output += "\n- 本Tab所有UI皆為純靜態,無綁定function\n"
output += "\n"
return output
def extract_tab_functions_with_lambda_and_callback(pyfile="app.py"):
"""
同時抓取每個Tab的所有UI元件,與互動function/lambda/callback mapping。
"""
try:
with open(pyfile, encoding="utf-8") as f:
code = f.read()
except Exception as e:
return f"❌ 讀取 {pyfile} 失敗:{e}"
# (1) 抓所有 def
defs = set(re.findall(r"def\s+([a-zA-Z_][\w\d_]*)\s*\(", code))
# (2) 抓所有 Tab 區塊
tab_pattern = re.compile(
r'with gr\.Tab(?:Item)?\(\s*[\'"](.+?)[\'"]\s*\):([\s\S]*?)(?=with gr\.Tab|with gr\.TabItem|\Z)', re.MULTILINE)
# (3) UI元件建立(如 gr.Markdown、gr.Dataframe、gr.File、gr.Button...)
ui_pattern = re.compile(r'(gr\.[A-Za-z_]+)\s*\(')
# (4) 互動事件
event_pattern = re.compile(
r'([a-zA-Z_][\w\d_]*)\.(click|change|submit|select)\(\s*([a-zA-Z_][\w\d_]*|lambda)(.*?)(\)|$)', re.DOTALL)
output = "# 🧩 各Tab UI元件與function/lambda/callback mapping\n\n"
for m in tab_pattern.finditer(code):
tab_name, tab_code = m.group(1), m.group(2)
output += f"## {tab_name}\n"
# 1. 抓所有 UI元件名稱
ui_list = ui_pattern.findall(tab_code)
# 統計有幾種元件
ui_count = {}
for u in ui_list:
ui_count[u] = ui_count.get(u, 0) + 1
if ui_list:
output += "### 本Tab使用UI元件:\n"
for u in sorted(set(ui_list)):
output += f"- `{u}` x {ui_count[u]}\n"
else:
output += "- (本Tab沒有任何UI元件)\n"
# 2. 掃描本Tab互動callback
func_map = []
for ev in event_pattern.findall(tab_code):
obj, trigger, fn, args, _ = ev
if fn == "lambda":
func_map.append(f"{obj}.{trigger} → lambda(匿名)")
else:
func_map.append(f"{obj}.{trigger}{fn}()")
# callback 參數
cb_matches = re.findall(r'(_callback|_js|_preprocess|_postprocess)\s*=\s*([a-zA-Z_][\w\d_]*|lambda[^\),]*)', args)
for cb_type, cb_fn in cb_matches:
if cb_fn.strip().startswith("lambda"):
func_map.append(f"{obj}.{cb_type} → lambda")
else:
func_map.append(f"{obj}.{cb_type}{cb_fn.strip()}()")
if func_map:
output += "\n### 有callback的元件/方法:\n"
for item in func_map:
output += f"- {item}\n"
else:
output += "\n- 本Tab所有UI皆為純靜態,無綁定function\n"
output += "\n"
return output
def extract_all_functions(pyfile="app.py"):
import re
try:
with open(pyfile, encoding="utf-8") as f:
code = f.read()
except Exception as e:
return f"❌ 讀取 {pyfile} 失敗:{e}"
func_pattern = re.compile(
r"^(def [a-zA-Z_][\w\d_]*\(.*?\):(?:\n(?: |\t).*)*)",
re.MULTILINE)
output = "## 📃 所有 function 定義\n"
matches = func_pattern.findall(code)
if not matches:
return "❗ 沒有抓到任何 function (def)!"
for func in matches:
func_name = re.match(r"def ([a-zA-Z_][\w\d_]*)", func)
output += f"---\n### `{func_name.group(1) if func_name else '?'}()`\n"
func_lines = func.split("\n")
if len(func_lines) > 10:
output += "```python\n" + "\n".join(func_lines[:10]) + "\n... (略)\n```\n"
else:
output += "```python\n" + func + "\n```\n"
return output
# ======================== Gradio 多分頁主UI ========================
with gr.Blocks(css=custom_css) as demo:
gr.Markdown("## <span style='color:#2675ff;font-weight:bold'> AI化實驗設計與數據分析平台 </span>")
with gr.Tabs():
# 1️⃣ 標準DoE設計分布
with gr.Tab("1️⃣ 標準DoE "):
gr.Markdown("""
### 🧪 標準DoE設計分布
- 支援自動產生參數空間內的多種經典DoE設計法(LHS, Sobol, Halton, Uniform)
- 可視覺化設計點分布、產生對應的設計參數表
- 支援下載CSV檔、表格內容可複製
**如何使用:**
- 1️⃣ 填寫參數名稱、範圍及步進
- 2️⃣ 設定要產生的組數與亂數種子(可選)
- 3️⃣ 點選「產生設計+分布圖」
- 4️⃣ 下方各分頁可檢視不同設計法的結果、分布圖與自動摘要
**注意事項:**
- 所有參數名稱需唯一、不得重複
- 組數愈大,運算與繪圖所需時間會增加
- 請檢查參數範圍、步進格式是否正確
""")
with gr.Row():
with gr.Column(scale=1, min_width=240):
with gr.Accordion("參數設定", open=True):
param_table = gr.Dataframe(
headers=["名稱", "最小值", "最大值", "間隔(step)"],
datatype=["str", "number", "number", "number"],
row_count=(3, "dynamic"),
col_count=(4, "fixed"),
value=[["A", 10, 20, 2], ["B", 100, 200, 25], ["C", 1, 2, 0.5]],
label="參數設定"
)
n_samples = gr.Number(label="組數", value=8, precision=0)
seed = gr.Number(label="亂數種子(留空或0為隨機)", value=42, precision=0)
btn = gr.Button("🪄 產生設計與分布圖", elem_classes=["main-btn"])
with gr.Column(scale=2):
with gr.Accordion("分布結果/圖表/摘要", open=True):
tabs = []
for name in ["LHS", "Sobol", "Halton", "Uniform"]:
with gr.Tab(name):
df = gr.Dataframe(label=f"{name} 設計點表格")
fig = gr.Plot(label=f"{name} 設計分布")
csv = gr.File(label="下載CSV📥")
desc = gr.Markdown()
summary = gr.Markdown()
tabs.extend([df, fig, csv, desc, summary])
btn.click(compare_all_designs, inputs=[param_table, n_samples, seed], outputs=tabs)
# 2️⃣ 進階DoE(Box-Behnken/CCD)
with gr.Tab("2️⃣ 進階DoE(Box-Behnken/CCD)"):
gr.Markdown("""
### 🧪 進階DoE (Box-Behnken/CCD)
- 支援 Box-Behnken 與 中心組合設計 (CCD) 兩種進階DoE設計法
- 同步產生標準化設計矩陣與對應實際參數表
- 提供一鍵下載CSV,方便後續AI建模
**如何使用:**
- 1️⃣ 設定各參數的最小、最大值及間隔
- 2️⃣ 選擇所需的設計法(Box-Behnken或CCD)
- 3️⃣ 點「產生進階DoE設計」即自動產生全部設計點
**注意事項:**
- 參數欄位請完整填寫,勿留空
- 各參數區間需合理,否則設計點數量可能異常
- 若需大規模設計點,運算會稍久,請耐心等待
""")
with gr.Row():
with gr.Column(scale=1, min_width=240):
with gr.Accordion("參數設定", open=True):
param_table2 = gr.Dataframe(
headers=["名稱", "最小值", "最大值", "間隔"],
datatype=["str", "number", "number", "number"],
row_count=(3, "dynamic"),
col_count=(4, "fixed"),
value=[["溫度", 80, 120, 10], ["壓力", 1, 5, 1], ["pH", 6, 8, 1]],
label="參數設定"
)
design_type = gr.Radio(["Box-Behnken", "CCD"], value="Box-Behnken", label="設計法")
run_btn = gr.Button("🪄 產生進階DoE設計", elem_classes=["main-btn"])
with gr.Column(scale=2):
with gr.Accordion("設計矩陣/參數表", open=True):
out_std = gr.Dataframe(label="標準化設計矩陣")
download_std = gr.File(label="下載標準矩陣CSV📥")
out_real = gr.Dataframe(label="實際參數表")
download_real = gr.File(label="下載參數表CSV📥")
run_btn.click(
advanced_doe_with_mapping,
inputs=[param_table2, design_type],
outputs=[out_std, out_real, download_std, download_real]
)
# 3️⃣ AI建模/特徵重要性/SHAP
with gr.Tab("3️⃣ AI建模/特徵重要性/SHAP"):
gr.Markdown("""
### 🧠 AI建模/特徵重要性/SHAP
- 支援多種線性、非線性AI回歸模型,自動化訓練與模型評估
- 一鍵產生預測結果、模型效能指標、特徵重要性圖、SHAP全圖解釋
- 輕鬆檢視哪些參數對y預測最關鍵
**如何使用:**
- 1️⃣ 上傳DoE結果CSV,選擇目標y欄位
- 2️⃣ 勾選需比較的AI模型(可多選)
- 3️⃣ 可選擇是否顯示SHAP解釋圖
- 4️⃣ 點「一鍵訓練+特徵重要性」,即可檢視全部結果
**注意事項:**
- 資料需為數值型且無遺漏值
- 目標y欄位不可有重複
- 資料量太小時,部分模型可能無法有效學習
""")
with gr.Row():
with gr.Column(scale=1, min_width=320):
with gr.Accordion("上傳/選模型", open=True):
datafile = gr.File(label="上傳DoE結果CSV📤", file_types=[".csv"])
test_ratio = gr.Slider(label="測試集比例", value=0.3, minimum=0.1, maximum=0.5, step=0.05)
algo_linear = gr.CheckboxGroup(
["Linear Regression", "Lasso", "Ridge", "ElasticNet"],
value=[], label="線性回歸"
)
algo_nonlinear = gr.CheckboxGroup(
["Random Forest", "XGBoost", "LightGBM", "SVR"],
value=["Random Forest"], label="非線性回歸"
)
show_shap = gr.Checkbox(label="進階SHAP解釋", value=False)
btn_ai = gr.Button("🚀 一鍵訓練", elem_classes=["main-btn"])
with gr.Column(scale=2):
with gr.Accordion("預測/重要性圖", open=True):
predfig = gr.Plot(label="📊 預測/實際對比圖")
met_df = gr.Dataframe(label="模型效能指標", datatype="auto")
summary = gr.Markdown(visible=True)
feat_summary = gr.Markdown(visible=True)
feat_fig = gr.Plot(label="特徵重要性")
shap_img = gr.Image(label="SHAP解釋圖")
btn_ai.click(
lambda file, lin, nonlin, ratio, shap_flag:
train_and_predict_with_importance(
file, (lin or []) + (nonlin or []), ratio, True, shap_flag
),
inputs=[datafile, algo_linear, algo_nonlinear, test_ratio, show_shap],
outputs=[predfig, met_df, summary, feat_summary, feat_fig, shap_img]
)
# 4️⃣ 多圖資料視覺化+2D/3D/等高線
with gr.Tab("4️⃣ 數值資料視覺化處理"):
gr.Markdown("""
### 📊 多圖資料視覺化 + 2D/3D/等高線分析
- 多種常用資料視覺化工具(熱圖、pairplot、直方圖、PCA等)
- 支援三維散點、曲面、2D等高線等專業圖形
- 可自選圖形、快速比較變數分布
**如何使用:**
- 1️⃣ 上傳資料CSV,選擇要產生的視覺化圖種類
- 2️⃣ 點「產生多圖分析」可一次顯示多種圖表
- 3️⃣ 三維分布:指定x、y、z軸變數(或目標欄位),生成3D散點/曲面圖
- 4️⃣ 2D/3D反應面:輸入要分析的變數組合,產生等高線/曲面圖
**注意事項:**
- 欄位名稱需為英文/數字,不支援特殊字元
- 缺值過多資料會自動忽略
- 若圖形異常請檢查欄位型態及範圍
""")
# --- (1) 多圖區塊
with gr.Row():
with gr.Column(scale=1, min_width=230):
upfile2 = gr.File(label="上傳資料CSV📤")
plot_select = gr.CheckboxGroup(
["Heatmap", "Pairplot", "Histogram", "Scatter Matrix", "PCA"],
value=["Heatmap", "Pairplot", "Histogram", "PCA"], label="視覺化圖"
)
vizbtn = gr.Button("📊 產生多圖分析", elem_classes=["main-btn"])
with gr.Column(scale=3):
vis_outs = []
for i in range(5):
vis_outs.extend([gr.Plot(label=f"圖像{i+1}"), gr.Markdown(), gr.Markdown()])
recomm_card = gr.Markdown(visible=True, value="", elem_id="recommend-card")
vizbtn.click(
lambda f, t: (*multi_viz(f, t), auto_recommendation(f)),
inputs=[upfile2, plot_select],
outputs=vis_outs + [recomm_card]
)
# --- (2) 3D變數分布/曲面
gr.Markdown("#### 🧬 三維分析:3D變數分布/曲面圖")
with gr.Row():
with gr.Column(scale=1, min_width=260):
columns_md = gr.Markdown(label="資料欄位", value="請先上傳資料CSV,欄位將自動顯示")
x_col = gr.Textbox(label="X軸欄位", placeholder="如 A", interactive=True)
y_col = gr.Textbox(label="Y軸欄位", placeholder="如 B", interactive=True)
z_col = gr.Textbox(label="Z軸欄位", placeholder="如 C", interactive=True)
target_col = gr.Textbox(label="目標Y/顏色", placeholder="如 y", interactive=True)
surface_flag = gr.Checkbox(label="顯示三維曲面", value=False)
plot_btn = gr.Button("🧊 生成3D散點/曲面圖", elem_classes=["main-btn"])
with gr.Column(scale=3):
fig_scatter_out = gr.Plot(label="3D散點圖")
fig_surface_out = gr.Plot(label="3D預測曲面圖")
upfile2.change(
lambda file: f"資料欄位:A, B, C, y" if file else "請先上傳資料",
inputs=[upfile2],
outputs=[columns_md]
)
plot_btn.click(
plot_3d_scatter_surface,
inputs=[upfile2, x_col, y_col, z_col, target_col, surface_flag],
outputs=[fig_scatter_out, fig_surface_out]
)
# --- (3) 二變數 3D 反應面/等高線
gr.Markdown("#### 🧬 二變數 3D 反應面/等高線圖")
with gr.Row():
with gr.Column(scale=1, min_width=260):
columns_md2 = gr.Markdown(label="資料欄位", value="請先上傳資料CSV,欄位將自動顯示")
x_col2 = gr.Textbox(label="X軸欄位", placeholder="如 A", interactive=True)
y_col2 = gr.Textbox(label="Y軸欄位", placeholder="如 B", interactive=True)
z_col2 = gr.Textbox(label="目標Z(反應/產率/預測)", placeholder="如 y", interactive=True)
surface2_btn = gr.Button("🧊 生成3D曲面+等高線圖", elem_classes=["main-btn"])
with gr.Column(scale=3):
fig_surface2 = gr.Plot(label="3D曲面圖")
fig_contour2 = gr.Plot(label="等高線圖")
upfile2.change(
lambda file: f"資料欄位:A, B, y" if file else "請先上傳資料",
inputs=[upfile2],
outputs=[columns_md2]
)
surface2_btn.click(
plot_surface_and_contour,
inputs=[upfile2, x_col2, y_col2, z_col2],
outputs=[fig_surface2, fig_contour2]
)
# 5️⃣ 超參數/貝葉斯優化
with gr.Tab("5️⃣ 超參數/貝葉斯優化"):
gr.Markdown("""
### 🏆 超參數/貝葉斯優化
- 自動執行各類AI模型的超參數最佳化(貝葉斯法)
- 即時繪出優化歷程,提供最佳參數組合與效能摘要
**如何使用:**
- 1️⃣ 上傳DoE實驗結果CSV
- 2️⃣ 勾選要優化的模型(可複選),設定最大迭代次數
- 3️⃣ 點「執行Bayes超參數優化」,自動開始優化並顯示所有歷程
**注意事項:**
- 請確認資料充足且欄位型態正確
- 較複雜模型/高維空間下,優化需較多時間
- 迭代次數過少時,最佳值可能不穩定
""")
with gr.Row():
with gr.Column(scale=1, min_width=230):
with gr.Accordion("上傳/模型選擇", open=True):
upfile3 = gr.File(label="📤 上傳DoE結果CSV", file_types=[".csv"])
model_sel = gr.CheckboxGroup(
["Random Forest", "XGBoost", "LightGBM", "SVR"], value=["XGBoost"], label="模型選擇(可複選)"
)
n_iter = gr.Number(label="最大迭代次數", value=16, precision=0)
bayes_btn = gr.Button("🚀 執行Bayes超參數優化", elem_classes=["main-btn"])
with gr.Column(scale=2):
with gr.Accordion("優化歷程/結果", open=True):
multi_fig = gr.Plot(label="所有模型Bayes優化歷程 (CV RMSE)")
tab_figs = []
for mtype in ["Random Forest", "XGBoost", "LightGBM", "SVR"]:
with gr.Tab(mtype):
fig = gr.Plot(label=f"{mtype} 優化歷程")
summary = gr.Markdown()
best_param = gr.Markdown()
tab_figs.extend([fig, summary, best_param])
bayes_btn.click(
run_multi_bayes_optimization,
inputs=[upfile3, model_sel, n_iter],
outputs=[multi_fig] + tab_figs
)
# 6️⃣ 智能推薦/混合策略/合併回填
with gr.Tab("6️⃣ 智能推薦/混合策略/合併回填"):
gr.Markdown("""
### 🌟 智能推薦/混合策略/合併回填
- AI自動推薦新實驗點、混合策略智能選點
- 一鍵搜尋最佳組合、合併新舊DoE資料
**如何使用:**
- 1️⃣ 上傳現有DoE資料,選擇推薦模式與模型
- 2️⃣ 指定推薦點數與是否排除重複
- 3️⃣ 點「產生推薦點組合」可直接下載推薦點
- 4️⃣ 新舊資料合併:上傳原始與新實驗CSV,自動合併去重
**注意事項:**
- 請確認欄位名稱一致、資料格式正確
- 合併時將以欄位名為主,自動排除重複點
- 推薦模式可同時多選,增加實驗多樣性
""")
# --- (1) 多模型最佳化搜尋
with gr.Accordion("多模型最佳化搜尋", open=True):
with gr.Row():
with gr.Column(scale=1, min_width=280):
opt_file = gr.File(label="📤 上傳DoE資料(CSV)", file_types=[".csv"])
opt_model_sel = gr.CheckboxGroup(
["Random Forest", "XGBoost", "LightGBM", "SVR"],
value=["Random Forest", "XGBoost"], label="最佳化用模型"
)
direction = gr.Radio(["最大化", "最小化"], value="最大化", label="目標")
is_discrete = gr.Checkbox(label="全部參數視為離散", value=False)
n_iter2 = gr.Number(label="搜尋迭代次數", value=28, precision=0)
btn_opt = gr.Button("🏆 搜尋AI預測最佳條件", elem_classes=["main-btn"])
with gr.Column(scale=2):
opt_df = gr.Dataframe(label="最佳參數組合", datatype="auto")
opt_txt = gr.Markdown()
opt_desc = gr.Markdown()
opt_sum = gr.Markdown()
btn_opt.click(
optimize_conditions,
inputs=[opt_file, opt_model_sel, direction, is_discrete, n_iter2],
outputs=[opt_df, opt_txt, opt_desc, opt_sum]
)
# --- (2) 新點推薦
with gr.Accordion("新點推薦", open=False):
with gr.Row():
with gr.Column(scale=1, min_width=280):
rec_file = gr.File(label="📤 請上傳DoE資料(CSV)", file_types=[".csv"])
recommend_mode = gr.CheckboxGroup(
["探索型推薦", "混合策略推薦"],
value=["探索型推薦"], label="推薦模式(可複選)"
)
recommend_models = gr.CheckboxGroup(
["Random Forest", "XGBoost", "LightGBM", "SVR"],
value=["Random Forest", "XGBoost"], label="模型選擇"
)
recommend_n = gr.Number(label="推薦點數", value=4, precision=0)
recommend_exclude = gr.Checkbox(label="排除現有點", value=True)
recommend_btn = gr.Button("🎯 產生推薦點組合", elem_classes=["main-btn"])
with gr.Column(scale=2):
recommend_out = gr.Markdown(label="推薦結果", value="")
recommend_download_file = gr.File(label="📥 下載推薦點(回填y用)", interactive=False)
recommend_btn.click(
make_recommended_points,
inputs=[rec_file, recommend_models, recommend_mode, recommend_n, recommend_exclude],
outputs=[recommend_out, recommend_download_file]
)
# --- (3) 合併回填
with gr.Accordion("合併回填資料", open=False):
with gr.Row():
with gr.Column(scale=1, min_width=320):
base_csv = gr.File(label="原始DoE資料(CSV)")
new_csv = gr.File(label="新實驗資料(推薦點CSV)")
merge_btn = gr.Button("🧩 自動合併/去重", elem_classes=["main-btn"])
merge_out = gr.File(label="📥 下載合併後資料")
merge_btn.click(
merge_csvs,
inputs=[base_csv, new_csv],
outputs=merge_out
)
# AI模型回歸分析(多目標批次,支援交互作用/多模型/多y)
with gr.Tab("7️⃣ AI模型回歸分析"):
gr.Markdown("""
### 🧠 AI模型回歸分析(多目標/多模型/交互作用)
- 批次執行多種AI模型,支援多y、多特徵交互作用
- 各y可獨立檢視效能指標、重要性對比與預測分布
- 自動比較前後回填資料對AI效能之提升/變化
**如何使用:**
- 1️⃣ 上傳原始DoE資料(CSV)及合併新點(CSV,可選)
- 2️⃣ 選擇目標y欄位,可自動偵測或自行調整
- 3️⃣ 勾選所需AI模型、設定特徵交互作用階數
- 4️⃣ 點「批次回歸分析」,下方各分頁顯示每y結果
**注意事項:**
- 交互作用階數設定愈高,特徵數量愈多,模型訓練愈慢
- 目標欄位過多,僅顯示前8個y的詳細結果
- 回填資料須與原始資料欄位一致
""")
before_file = gr.File(label="原始DoE資料(CSV)")
after_file = gr.File(label="🧩 合併新點DoE(CSV)")
algo_linear = gr.CheckboxGroup(
["Linear Regression", "Lasso", "Ridge", "ElasticNet"],
value=[], label="線性回歸"
)
algo_nonlinear = gr.CheckboxGroup(
["Random Forest", "XGBoost", "PLS Regression", "SVR"],
value=["Random Forest"], label="非線性回歸"
)
# Degree控制
degree_select = gr.Dropdown([1, 2, 3], value=1, label="特徵交互作用階數 (degree)")
add_inter = gr.Checkbox(label="特徵間交互作用 (x1*x2)", value=True)
add_y_inter = gr.Checkbox(label="y間交互作用 (y1*y2)", value=False)
y_keywords = gr.Textbox(label="目標欄位關鍵字 (逗號分隔)", value="y,目標,output,target")
y_columns = gr.CheckboxGroup(label="目標y欄位 (可複選)", choices=[], value=[])
before_file.change(
detect_y_columns,
inputs=[before_file, y_keywords],
outputs=y_columns
)
y_keywords.change(
detect_y_columns,
inputs=[before_file, y_keywords],
outputs=y_columns
)
run_btn = gr.Button("🚀 批次回歸分析", elem_classes=["main-btn"])
summary_md = gr.Dataframe(label="所有模型-y效能總表")
y_titles, y_tables, y_feats, y_ydists = [], [], [], []
with gr.Tabs() as tabs_container:
for idx in range(8):
with gr.TabItem(f"Tab{idx+1}"):
y_title = gr.Markdown(value="")
y_table = gr.Dataframe(label="模型效能比較表")
with gr.Row():
y_feat = gr.Plot(label="特徵重要性對比圖")
y_ydist = gr.Plot(label="y 分布對比圖")
y_titles.append(y_title)
y_tables.append(y_table)
y_feats.append(y_feat)
y_ydists.append(y_ydist)
run_btn.click(
run_multi_y,
inputs=[before_file, after_file, algo_linear, algo_nonlinear, y_columns, add_inter, add_y_inter, degree_select],
outputs=[summary_md, *y_titles, *y_tables, *y_feats, *y_ydists]
)
# 平台說明/索引
with gr.Tab("⚙️平台說明與索引"):
gr.Markdown("""
## 🧭 功能說明 & 導航指南
本平台整合「自動實驗設計(DoE)」、「AI建模」、「資料視覺化」、「超參數優化」、「智能推薦」等模組,專為化學/材料/製程等工程應用打造,協助您**從設計點產生、數據分析到模型推薦,全流程自動化**!
---
### 🧰 主要功能分頁
- **1️⃣ 標準DoE設計分布**
- 產生經典設計法(LHS、Sobol等)的多維參數設計點,便於建立模型訓練用基礎資料。
- 直觀展示每種設計點分布、支援結果下載。
- **2️⃣ 進階DoE(Box-Behnken/CCD)**
- 支援正交型、中心組合等進階設計法,方便進行曲面反應分析(RSM)。
- 產生標準化設計矩陣、對應實際參數表。
- **3️⃣ AI建模/特徵重要性/SHAP**
- 一鍵啟動多模型AI訓練、交叉驗證、特徵重要性排序、SHAP解釋。
- 適用於尋找關鍵變數與預測能力評估。
- **4️⃣ 多圖資料視覺化 + 2D/3D/等高線**
- 提供各類視覺化工具(熱圖、pairplot、PCA、3D曲面/等高線)協助多角度理解數據分布。
- 支援高維資料降維與多種圖表疊合分析。
- **5️⃣ 超參數/貝葉斯優化**
- 針對各種AI回歸模型自動進行超參數優化(如Random Forest、XGBoost等),即時檢視優化歷程與最佳參數。
- **6️⃣ 智能推薦/混合策略/合併回填**
- 結合AI預測與探索性搜尋,自動推薦新實驗條件,並支援資料自動合併與去重。
- 適合推進次輪實驗設計及自動補齊數據。
- **7️⃣ AI模型回歸分析(多目標/多模型/交互作用)**
- 支援多y欄位、多模型批次建模、特徵交互作用分析,詳細呈現各y的訓練/預測效能。
---
### 📝 操作建議與常見注意事項
- **所有欄位均可直接點擊或複製表格內容,並可一鍵下載分析結果**
- **CSV資料須為純數值型(中文欄位會自動支援,但建議用英文/數字命名)**
- **回歸與建模功能建議資料組數大於10,避免過度擬合或模型效能不穩定**
- **每個Tab下方皆有詳細分頁說明、注意事項,建議操作前先閱讀上方說明**
---
### 🏠 應用情境
- 多參數製程最佳化
- 原型實驗規劃(探索型/補點/混合設計)
- 關鍵因子敏感度分析
- 自動推薦新實驗組合
- AI輔助反應機制推論與模型精度提升
---
**本平台持續優化,歡迎多加利用!**
""")
with gr.Tab("🧩 Function管理"):
gr.Markdown("#### 自動偵測各分頁 UI 綁定 function / lambda / callback")
mapping = extract_tab_functions_with_lambda_and_callback("app.py")
gr.Markdown(value=mapping)
gr.Markdown("#### 本程式所有 function 定義 (摘要)")
gr.Markdown(value=extract_all_functions("app.py"))
gr.Markdown("<div id='footer'> 本平台由T100團隊設計,歡迎交流建議 │ 2025_</div>")
demo.launch()