Spaces:
Sleeping
Sleeping
| # Requirements (pip) | |
| # gradio>=4.17.0 | |
| # pandas>=2.0.0 | |
| # numpy>=1.24.0 | |
| # matplotlib | |
| # scikit-learn>=1.2.0 | |
| # pyDOE2 | |
| # Pillow | |
| # xgboost>=2.0.0 | |
| # lightgbm>=4.0.0 | |
| # seaborn | |
| # scipy>=1.10.0 | |
| # plotly>=5.16.0 | |
| # scikit-optimize>=0.9.0 | |
| # optuna (不需要) | |
| # tpot>=0.12 (目前沒用到) | |
| # shap | |
| # tabulate | |
| # import blocks | |
| # [Gradio 基本UI] | |
| import gradio as gr | |
| # [數據處理/科學計算] | |
| import numpy as np | |
| import pandas as pd | |
| # [DoE設計/取樣法] | |
| from pyDOE2 import lhs, bbdesign, ccdesign | |
| from scipy.stats.qmc import Sobol, Halton | |
| # [AI/ML建模] | |
| from sklearn.ensemble import RandomForestRegressor | |
| from sklearn.svm import SVR | |
| from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet | |
| import xgboost as xgb | |
| import lightgbm as lgb | |
| from xgboost import XGBRegressor | |
| from lightgbm import LGBMRegressor | |
| # [模型訓練/評估/特徵選擇] | |
| from sklearn.decomposition import PCA | |
| from sklearn.model_selection import train_test_split, cross_val_score | |
| from sklearn.metrics import mean_squared_error, r2_score | |
| from sklearn.pipeline import make_pipeline | |
| from sklearn.preprocessing import StandardScaler | |
| from sklearn.multioutput import MultiOutputRegressor | |
| from sklearn.cross_decomposition import PLSRegression | |
| from sklearn.preprocessing import PolynomialFeatures | |
| # [可視化/統計分析] | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| import matplotlib.pyplot as plt | |
| from scipy.stats import skew, kurtosis, shapiro | |
| from scipy.interpolate import griddata | |
| import shap | |
| # [雜項工具/暫存檔] | |
| import tempfile | |
| import warnings | |
| warnings.filterwarnings("ignore") | |
| import io | |
| # [貝葉斯/自動化優化] | |
| from skopt import BayesSearchCV | |
| from skopt.space import Real, Integer, Categorical | |
| import optuna # ← 若要支援optuna,這裡一定要加 | |
| import optuna.visualization.matplotlib | |
| from scipy.optimize import minimize | |
| # [互動頁面樣式] | |
| custom_css = """ | |
| /* 主卡片 */ | |
| /* 用於主要內容區塊(如自動摘要卡片、資訊框)美化,圓角+陰影 */ | |
| .gr-card { | |
| background: #f7fafd; | |
| border-radius: 18px; | |
| box-shadow: 0 2px 8px #0002; | |
| } | |
| /* 按鈕樣式 */ | |
| /* 主要操作按鈕樣式:漸層、圓角、陰影、字體放大,滑鼠懸停有亮度與色調變化 */ | |
| .main-btn { | |
| font-size: 1.14em; | |
| padding: 10px 28px 11px 28px; | |
| border-radius: 999px; | |
| margin: 14px 0 8px 0; | |
| background: linear-gradient(90deg, #5b8cff, #76e7ff 70%); | |
| color: #fff; | |
| font-weight: 600; | |
| box-shadow: 0 2px 12px #3976d855; | |
| border: none; | |
| transition: .2s; | |
| } | |
| .main-btn:hover { | |
| filter: brightness(1.08); | |
| box-shadow: 0 2px 18px #5b8cff33; | |
| background: linear-gradient(90deg, #2259c9 55%, #4fa7d9); | |
| } | |
| /* 全平台字體 */ | |
| /* 全域字型套用 Noto Sans JP(優先日系風格),備用 Segoe UI 和 Arial,維持專案一致性 */ | |
| .gradio-container { | |
| font-family: 'Noto Sans JP', 'Segoe UI', Arial, sans-serif; | |
| } | |
| /* 區塊標題 */ | |
| /* 區塊或章節標題顯眼化,藍色、字重加粗 */ | |
| .section-title { | |
| font-size: 1.26em; | |
| font-weight: 700; | |
| color: #3976d8; | |
| margin-bottom: 6px; | |
| } | |
| /* Tab & Row 區塊間距 */ | |
| /* Tab、列區塊加上下間距,避免UI太擁擠 */ | |
| .tab-pane, .gr-row { | |
| padding: 8px 0; | |
| } | |
| /* 推薦小卡 */ | |
| /* 固定右下角推薦訊息小卡,用於快速提醒或建議,帶有柔和背景、陰影 */ | |
| #recommend-card { | |
| position: fixed; right: 28px; bottom: 28px; max-width: 360px; | |
| background: #f8fbffde; border-radius: 13px; box-shadow: 0 2px 14px #5ab2fa29; | |
| border-left: 5px solid #88a3e6; padding: 12px 18px 10px 14px; font-size: 1.07em; | |
| z-index: 9999; color: #285078; | |
| } | |
| /* 分隔線 */ | |
| /* 水平線設計:加厚、色彩淡藍灰,分隔內容用 */ | |
| hr { | |
| border: 0; | |
| border-bottom: 2.5px solid #e4e8f0; | |
| margin: 28px 0 22px 0; | |
| } | |
| /* Accordion 動畫 */ | |
| /* 摺疊區塊 summary 動畫與配色,展開時加上陰影強調 */ | |
| .accordion > summary { | |
| transition: .25s; | |
| background: #f4f8ff; | |
| } | |
| .accordion[open] > summary { | |
| background: #cfeaff; | |
| } | |
| .accordion[open] { | |
| box-shadow: 0 8px 24px #1ca7ec25; | |
| } | |
| /* Tab高亮 */ | |
| /* 當前選中 Tab 標籤高亮顯示,底色、字色、粗體 */ | |
| div[role="tab"][aria-selected="true"] { | |
| background: #e3f1ff !important; | |
| font-weight: bold; | |
| color: #2675ff; | |
| } | |
| /* 頁腳 */ | |
| /* Footer字體顏色與大小(淡灰、較小字) */ | |
| #footer { | |
| color: #888; | |
| font-size: 0.98em; | |
| } | |
| """ | |
| # [UI 樣式工具/自動摘要卡片] | |
| # = 產生HTML卡片說明 = | |
| def make_card(title, tips): | |
| return f"""<div class='gr-card'><b>{title}</b><ul style='margin:0 0 0 12px'>{''.join(['<li>'+str(x)+'</li>' for x in tips])}</ul></div>""" | |
| # = 根據圖形類型產生分圖說明卡片 = | |
| def get_viz_desc(name): | |
| idx = { | |
| "DoE Main": ("DoE設計分布圖", [ | |
| "檢查設計點分布是否均勻覆蓋整個空間", | |
| "若集中/離散,代表參數區間或設計法可優化", | |
| "高維會以降維(PCA)檢視主變異結構" | |
| ]), | |
| "Heatmap": ("Heatmap(相關係數)", [ | |
| "檢查所有數值變數的正負相關", | |
| "紅色:強正相關;藍色:強負相關", | |
| "相關係數>0.7為高度相關,< -0.7為強負相關" | |
| ]), | |
| "Pairplot": ("Pairplot(成對散點)", [ | |
| "展示任兩變數間的散點型態", | |
| "斜線型=高相關,圓形=低相關", | |
| "可發現集群、離群或特定結構" | |
| ]), | |
| "Histogram": ("Histogram(直方圖)", [ | |
| "單變數分布形態檢查", | |
| "偏態、長尾、極端值需注意", | |
| "單峰/多峰、常態/非對稱可判斷資料型態" | |
| ]), | |
| "Scatter Matrix": ("Scatter Matrix(全變數關聯)", [ | |
| "類似Pairplot但一次顯示所有成對分布", | |
| "對角線顯示每欄分布直方圖", | |
| "可發現明顯群集、離群" | |
| ]), | |
| "PCA": ("PCA(主成分分布)", [ | |
| "多維特徵壓縮到2D", | |
| "檢查主變異來源、潛在群集", | |
| "可輔助檢查是否有明顯離群" | |
| ]), | |
| "AI Predict": ("AI預測對比圖", [ | |
| "預測y與實際y對比,點貼近對角線代表高精度", | |
| "偏離對角線代表模型誤差大,建議優化特徵或模型" | |
| ]), | |
| "Bayesian Optimization": ("貝葉斯優化", [ | |
| "自動搜尋最佳參數組合,減少無效試驗", | |
| "可用於AI模型超參數、或實驗設計優化", | |
| "優化歷程圖:看最佳值逐步收斂" | |
| ]) | |
| } | |
| return make_card(*idx.get(name, ("", []))) | |
| # = 根據視覺化類型自動產生總結 = | |
| def auto_conclude_viz(df, vtype): | |
| out = [] | |
| if vtype == "Heatmap": | |
| cor = df.corr() | |
| highcorr = cor.where(np.triu(np.ones(cor.shape), 1).astype(bool)).stack() | |
| hc = highcorr[abs(highcorr) > 0.7] | |
| if not hc.empty: | |
| for idx, v in hc.items(): | |
| out.append(f"「{idx[0]}」與「{idx[1]}」高度相關 (corr={v:.2f})") | |
| else: | |
| out.append("無明顯高度相關特徵") | |
| elif vtype == "Histogram": | |
| for col in df.select_dtypes(include=np.number).columns: | |
| sk = skew(df[col].dropna()) | |
| if abs(sk) > 2: | |
| out.append(f"「{col}」極端偏態({sk:.2f})") | |
| if not out: | |
| out.append("欄位分布大致對稱") | |
| elif vtype == "PCA": | |
| pca = PCA(n_components=2).fit(df.values) | |
| expvar = pca.explained_variance_ratio_.sum() | |
| out.append(f"前2主成分共解釋 {expvar*100:.1f}% 變異") | |
| elif vtype == "Pairplot": | |
| out.append("請留意有無線性排列(高相關)或明顯群集/異常點") | |
| elif vtype == "Scatter Matrix": | |
| out.append("集群或離群點可從圖中直接辨識") | |
| else: | |
| out.append("資料檢查完成") | |
| return make_card("AI自動結論", out) | |
| # = 產生AI回歸自動總結卡片 = | |
| def auto_conclude_ai(y_test, y_pred, name): | |
| rmse = np.sqrt(mean_squared_error(y_test, y_pred)) | |
| r2 = r2_score(y_test, y_pred) | |
| out = [ | |
| f"模型:{name}", | |
| f"測試集 RMSE={rmse:.3g}", | |
| f"R²={r2:.2f}", | |
| ("模型表現佳" if r2 > 0.8 else "可進一步優化特徵/資料量") | |
| ] | |
| return make_card("AI自動結論", out) | |
| # = 貝葉斯優化RMSE曲線自動解讀摘要 = | |
| def auto_conclude_bayes_curve(rmse_curve, model_name=None): | |
| import numpy as np | |
| rmse_curve = np.array(rmse_curve) | |
| minv = np.min(rmse_curve) | |
| lastv = rmse_curve[-1] | |
| diff = np.ptp(rmse_curve) | |
| std = np.std(rmse_curve) | |
| trend = "平穩" | |
| if np.allclose(rmse_curve, rmse_curve[0], atol=0.2*std): | |
| trend = "幾乎無變化" | |
| elif rmse_curve[0] > minv and lastv > rmse_curve[0] and lastv > minv + std: | |
| trend = "尾端上升" | |
| elif np.argmin(rmse_curve) < len(rmse_curve)//2: | |
| trend = "快速下降收斂" | |
| elif std > 0.2 * minv and diff > 0.3 * minv: | |
| trend = "波動起伏" | |
| if trend == "快速下降收斂": | |
| comment = "RMSE 隨迭代明顯下降,代表最佳化收斂,已找到較佳參數組合。" | |
| elif trend == "幾乎無變化": | |
| comment = "RMSE 變動極小,代表模型/資料難以藉由超參數優化提升。" | |
| elif trend == "尾端上升": | |
| comment = "最後幾點 RMSE 明顯上升,建議忽略尾端結果,以最低點作最佳選擇。" | |
| elif trend == "波動起伏": | |
| comment = "RMSE 震盪明顯,代表模型不穩定或參數空間設過寬,建議縮小搜尋區間。" | |
| else: | |
| comment = "RMSE 變動趨勢平穩,可依最低點選定最佳參數。" | |
| model_str = f"【{model_name}】" if model_name else "" | |
| return f"{model_str}最低RMSE:**{minv:.3f}**\n- 收斂型態:**{trend}**\n- 建議:{comment}\n" | |
| # [資料品質檢查/AutoML推薦] | |
| # = 自動偵測資料品質與欄位異常 = | |
| def auto_data_quality_check(datafile): | |
| if datafile is None: | |
| return "> 尚未上傳資料" | |
| df = pd.read_csv(datafile.name if hasattr(datafile, "name") else datafile) | |
| tips = [] | |
| for col in df.columns: | |
| if df[col].isnull().sum() > 0: | |
| tips.append(f"「{col}」有缺值,建議補值或刪除") | |
| if df[col].nunique() == 1: | |
| tips.append(f"「{col}」為常數欄,建議刪除") | |
| if pd.api.types.is_numeric_dtype(df[col]): | |
| sk = (df[col].dropna().skew() if hasattr(df[col], "skew") else 0) | |
| if abs(sk) > 2: | |
| tips.append(f"「{col}」嚴重偏態(skew={sk:.2f}),建議標準化") | |
| if not tips: | |
| tips = ["資料品質良好,無明顯異常。"] | |
| return "<b>資料品質偵測:</b><ul style='margin:0 0 0 12px'>" + "".join([f"<li>{t}</li>" for t in tips]) + "</ul>" | |
| # = 簡易AutoML,推薦最佳模型並產生程式碼 = | |
| def automl_leaderboard(datafile): | |
| # (簡化) 這裡你可以調用 TPOT、auto-sklearn 或自己比較多組模型 | |
| # 這裡直接隨機選一個 | |
| best = np.random.choice(["XGBoost", "Random Forest", "LightGBM", "SVR"]) | |
| code = f"""# 範例BestModel | |
| from xgboost import XGBRegressor | |
| model = XGBRegressor(n_estimators=120, random_state=0) | |
| model.fit(X_train, y_train)""" | |
| return f"最佳模型推薦:<b>{best}</b>", code | |
| # [DoE設計/推薦點生成] | |
| # = 檢查新點是否與已存在點重複(向量距離法)= | |
| def is_close_to_existing(xrow, existing_X, tol=1e-4): | |
| existing_X = np.asarray(existing_X) | |
| if existing_X.size == 0: | |
| return False | |
| diffs = np.abs(existing_X - np.array(xrow)) | |
| if diffs.ndim == 1: | |
| return np.all(diffs < tol) | |
| return np.any(np.all(diffs < tol, axis=1)) | |
| # = 多模型AI推薦下一批DoE設計點(探索/最大化等模式)= | |
| def suggest_next_doe_points_batch( | |
| datafile, | |
| model_types = ["Random Forest", "XGBoost", "LightGBM", "SVR"], | |
| mode = "最大化", n_points = 3, | |
| exclude_existing = True, | |
| random_seed = 42, | |
| max_attempts_factor = 30, | |
| return_df = False | |
| ): | |
| import numpy as np | |
| from scipy.optimize import minimize | |
| import pandas as pd | |
| if datafile is None: | |
| if return_df: | |
| return pd.DataFrame() | |
| return "> 尚未上傳資料" | |
| df = pd.read_csv(datafile.name if hasattr(datafile, "name") else datafile) | |
| X, y = df.iloc[:, :-1], df.iloc[:, -1] | |
| colnames = X.columns | |
| # 訓練多模型 | |
| models = [] | |
| for t in model_types: | |
| if t == "Random Forest": | |
| models.append(RandomForestRegressor(n_estimators=120, random_state=random_seed)) | |
| elif t == "XGBoost": | |
| models.append(XGBRegressor(n_estimators=120, random_state=random_seed, verbosity=0)) | |
| elif t == "LightGBM": | |
| models.append(LGBMRegressor(n_estimators=120, random_state=random_seed)) | |
| elif t == "SVR": | |
| models.append(make_pipeline(StandardScaler(), SVR())) | |
| for m in models: | |
| m.fit(X, y) | |
| bounds = [(X[c].min(), X[c].max()) for c in colnames] | |
| def ensemble_pred(xrow): | |
| preds = [m.predict(np.array(xrow).reshape(1, -1))[0] for m in models] | |
| return np.mean(preds), np.std(preds) | |
| if mode == "最大化": | |
| def obj(x): return -ensemble_pred(x)[0] | |
| elif mode == "最小化": | |
| def obj(x): return ensemble_pred(x)[0] | |
| elif mode == "不確定性": | |
| def obj(x): return -ensemble_pred(x)[1] | |
| else: | |
| def obj(x): return -ensemble_pred(x)[0] | |
| found_points, preds_mean, preds_std = [], [], [] | |
| attempts = 0 | |
| max_attempts = n_points * max_attempts_factor | |
| np.random.seed(random_seed) | |
| while len(found_points) < n_points and attempts < max_attempts: | |
| x0 = np.random.uniform([b[0] for b in bounds], [b[1] for b in bounds]) | |
| res = minimize(obj, x0, bounds=bounds) | |
| best_x = res.x | |
| # 排除重複點 | |
| exist = False | |
| if exclude_existing: | |
| if is_close_to_existing(best_x, X.values) or is_close_to_existing(best_x, np.array(found_points)): | |
| exist = True | |
| if exist: | |
| attempts += 1 | |
| continue | |
| found_points.append(best_x) | |
| mean_pred, std_pred = ensemble_pred(best_x) | |
| preds_mean.append(mean_pred) | |
| preds_std.append(std_pred) | |
| attempts += 1 | |
| if not found_points: | |
| if return_df: | |
| return pd.DataFrame() | |
| return "> 無法自動產生新點(參數範圍或步階過細、模型表現過於平坦)" | |
| # == DataFrame格式(for下載用)== | |
| if return_df: | |
| df_points = pd.DataFrame(found_points, columns=colnames) | |
| df_points["y"] = "" # 給user回填 | |
| df_points["模型平均預測"] = preds_mean | |
| df_points["不確定性(std)"] = preds_std | |
| df_points["推薦策略"] = mode | |
| return df_points | |
| # == Markdown文字版 == | |
| if mode == "最大化": | |
| best_idx = int(np.argmax(preds_mean)) | |
| elif mode == "最小化": | |
| best_idx = int(np.argmin(preds_mean)) | |
| elif mode == "不確定性": | |
| best_idx = int(np.argmax(preds_std)) | |
| else: | |
| best_idx = 0 | |
| out = "<b>推薦次一輪DoE設計點(Top N):</b><br>" | |
| for i, (best_x, mu, std) in enumerate(zip(found_points, preds_mean, preds_std), 1): | |
| flag = " 🏆 <b>【最推薦】</b>" if (i-1) == best_idx else "" | |
| out += f"<b>候選{i}{flag}:</b>" | |
| out += "<ul style='margin-top:0;margin-bottom:6px;'>" | |
| for c, v in zip(colnames, best_x): | |
| out += f"<li>{c} = {v:.3f}</li>" | |
| out += f"<li>平均預測產率:<b>{mu:.3f}</b></li><li>不確定性(std):{std:.3f}</li>" | |
| if mode == "不確定性": | |
| out += "<li><i>(此點模型間意見分歧最大)</i></li>" | |
| out += "</ul>" | |
| return out | |
| # = 智能混合策略推薦DoE新點(最大化、最小化、不確定性、隨機)= | |
| def suggest_mixed_doe_points( | |
| datafile, | |
| model_types = ["Random Forest", "XGBoost", "LightGBM", "SVR"], | |
| n_total = 4, # 推薦總點數 | |
| exclude_existing = True, | |
| random_seed = 2025, | |
| return_df = False | |
| ): | |
| import numpy as np | |
| from scipy.optimize import minimize | |
| import pandas as pd | |
| if datafile is None: | |
| if return_df: | |
| return pd.DataFrame() | |
| return "> 尚未上傳資料" | |
| df = pd.read_csv(datafile.name if hasattr(datafile, "name") else datafile) | |
| X, y = df.iloc[:, :-1], df.iloc[:, -1] | |
| colnames = X.columns | |
| # 訓練多模型 | |
| models = [] | |
| for t in model_types: | |
| if t == "Random Forest": | |
| models.append(RandomForestRegressor(n_estimators=120, random_state=random_seed)) | |
| elif t == "XGBoost": | |
| models.append(XGBRegressor(n_estimators=120, random_state=random_seed, verbosity=0)) | |
| elif t == "LightGBM": | |
| models.append(LGBMRegressor(n_estimators=120, random_state=random_seed)) | |
| elif t == "SVR": | |
| models.append(make_pipeline(StandardScaler(), SVR())) | |
| for m in models: | |
| m.fit(X, y) | |
| bounds = [(X[c].min(), X[c].max()) for c in colnames] | |
| def ensemble_pred(xrow): | |
| preds = [m.predict(np.array(xrow).reshape(1, -1))[0] for m in models] | |
| return np.mean(preds), np.std(preds) | |
| def obj_max(x): return -ensemble_pred(x)[0] | |
| def obj_min(x): return ensemble_pred(x)[0] | |
| def obj_uncert(x): return -ensemble_pred(x)[1] | |
| np.random.seed(random_seed) | |
| found_points, point_types, mu_list, std_list = [], [], [], [] | |
| attempts = 0 | |
| max_attempts = n_total * 30 | |
| # 1. 最大化 | |
| res1 = minimize(obj_max, np.random.uniform([b[0] for b in bounds], [b[1] for b in bounds]), bounds=bounds) | |
| x1 = res1.x | |
| if not (exclude_existing and is_close_to_existing(x1, X.values)): | |
| found_points.append(x1) | |
| point_types.append("最大化(exploit)") | |
| mu, std = ensemble_pred(x1) | |
| mu_list.append(mu) | |
| std_list.append(std) | |
| # 2. 最小化 | |
| res2 = minimize(obj_min, np.random.uniform([b[0] for b in bounds], [b[1] for b in bounds]), bounds=bounds) | |
| x2 = res2.x | |
| if not (exclude_existing and (is_close_to_existing(x2, X.values) or is_close_to_existing(x2, np.array(found_points)))): | |
| found_points.append(x2) | |
| point_types.append("最小化(exploit)") | |
| mu, std = ensemble_pred(x2) | |
| mu_list.append(mu) | |
| std_list.append(std) | |
| # 3. 最大不確定性 | |
| res3 = minimize(obj_uncert, np.random.uniform([b[0] for b in bounds], [b[1] for b in bounds]), bounds=bounds) | |
| x3 = res3.x | |
| if not (exclude_existing and (is_close_to_existing(x3, X.values) or is_close_to_existing(x3, np.array(found_points)))): | |
| found_points.append(x3) | |
| point_types.append("最大不確定性(exploration)") | |
| mu, std = ensemble_pred(x3) | |
| mu_list.append(mu) | |
| std_list.append(std) | |
| # 4. 隨機探索補滿 | |
| while len(found_points) < n_total and attempts < max_attempts: | |
| x0 = np.random.uniform([b[0] for b in bounds], [b[1] for b in bounds]) | |
| if exclude_existing and (is_close_to_existing(x0, X.values) or is_close_to_existing(x0, np.array(found_points))): | |
| attempts += 1 | |
| continue | |
| found_points.append(x0) | |
| point_types.append("隨機探索") | |
| mu, std = ensemble_pred(x0) | |
| mu_list.append(mu) | |
| std_list.append(std) | |
| attempts += 1 | |
| # === DataFrame for CSV 下載 === | |
| if return_df: | |
| df_points = pd.DataFrame(found_points, columns=colnames) | |
| df_points["y"] = "" | |
| return df_points | |
| # === Markdown展示 === | |
| out = "<b>智能推薦多重新DoE設計點(混合策略)</b><br>" | |
| for i, (best_x, mu, std, label) in enumerate(zip(found_points, mu_list, std_list, point_types), 1): | |
| flag = " 🏆 <b>【最推薦】</b>" if label.startswith("最大化") else "" | |
| out += f"<b>候選{i}{flag}:</b><i>{label}</i>" | |
| out += "<ul style='margin-top:0;margin-bottom:6px;'>" | |
| for c, v in zip(colnames, best_x): | |
| out += f"<li>{c} = {v:.3f}</li>" | |
| out += f"<li>平均預測產率:<b>{mu:.3f}</b></li><li>不確定性(std):{std:.3f}</li>" | |
| if label.startswith("最大不確定性"): | |
| out += "<li><i>(模型分歧最大,探索新區域)</i></li>" | |
| out += "</ul>" | |
| return out | |
| # = 組合各推薦策略生成新點,合併成單一下載檔 = | |
| def make_recommended_points(file, models, modes, n, exclude): | |
| import pandas as pd | |
| import tempfile | |
| outs = [] | |
| df_list = [] | |
| n = int(n) | |
| if file is None or not modes or not models: | |
| tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", encoding="utf-8") | |
| pd.DataFrame().to_csv(tmp.name, index=False) | |
| return "請確認已上傳資料、選模式與模型", tmp.name | |
| for mode in modes: | |
| if mode == "探索型推薦": | |
| out = suggest_next_doe_points_batch(file, models, "最大化", n, exclude, return_df=False) | |
| df = suggest_next_doe_points_batch(file, models, "最大化", n, exclude, return_df=True) | |
| outs.append(f"<b>【探索型推薦】</b><br>{out}") | |
| if isinstance(df, pd.DataFrame) and not df.empty: | |
| df_list.append(df) | |
| elif mode == "混合策略推薦": | |
| out = suggest_mixed_doe_points(file, models, n, exclude, return_df=False) | |
| df = suggest_mixed_doe_points(file, models, n, exclude, return_df=True) | |
| outs.append(f"<b>【混合策略推薦】</b><br>{out}") | |
| if isinstance(df, pd.DataFrame) and not df.empty: | |
| df_list.append(df) | |
| if df_list: | |
| all_df = pd.concat(df_list, ignore_index=True).drop_duplicates() | |
| else: | |
| all_df = pd.DataFrame() | |
| tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", encoding="utf-8-sig") | |
| all_df.to_csv(tmp.name, index=False) | |
| tmp.flush() | |
| tmp.close() | |
| return "<br><hr>".join(outs), tmp.name # ok, 兩個 output | |
| # = 下載推薦DoE點組合(CSV)= | |
| def download_recommended_points(file, models, mode, n, exclude): | |
| # 支援模式切換 | |
| if mode == "混合策略推薦": | |
| df_points = suggest_mixed_doe_points(file, models, int(n), exclude, return_df=True) | |
| else: | |
| # 預設探索型推薦 | |
| df_points = suggest_next_doe_points_batch(file, models, mode, int(n), exclude, return_df=True) | |
| if df_points is None or len(df_points) == 0: | |
| return None | |
| # 存成臨時檔 | |
| with tempfile.NamedTemporaryFile(suffix=".csv", mode="w", delete=False, encoding="utf-8-sig") as f: | |
| df_points.to_csv(f.name, index=False) | |
| return f.name | |
| # [資料合併/CSV工具] | |
| # = 兩份CSV資料自動合併、去重、優先保留已填y者 = | |
| def merge_csvs(base_csv, new_csv): | |
| import pandas as pd | |
| import tempfile | |
| if base_csv is None or new_csv is None: | |
| return None | |
| df1 = pd.read_csv(base_csv.name if hasattr(base_csv, "name") else base_csv) | |
| df2 = pd.read_csv(new_csv.name if hasattr(new_csv, "name") else new_csv) | |
| # 將y(目標)欄位填過的優先保留,未填y只補點不覆蓋 | |
| key_cols = [c for c in df1.columns if c != "y"] | |
| merged = pd.concat([df1, df2], ignore_index=True) | |
| # 去重優先保留已填y者 | |
| merged = merged.sort_values(by=["y"], ascending=[False]).drop_duplicates(subset=key_cols, keep="first") | |
| merged = merged.reset_index(drop=True) | |
| tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", encoding="utf-8-sig") | |
| merged.to_csv(tmp.name, index=False) | |
| tmp.flush() | |
| tmp.close() | |
| return tmp.name | |
| # [標準DoE設計/分布比較] | |
| # = 檢查輸入參數列是否有效 = | |
| def is_valid_row(row): | |
| if not isinstance(row, (list, tuple)) or len(row) < 4: | |
| return False | |
| try: | |
| if str(row[0]).strip() == "": | |
| return False | |
| float(row[1]) | |
| float(row[2]) | |
| float(row[3]) | |
| return True | |
| except Exception: | |
| return False | |
| # = 產生指定類型的標準DoE設計點(LHS/Sobol/Halton/Uniform)= | |
| def gen_design(design_type, n_params, n_samples, param_lows, param_highs, param_steps, seed): | |
| if seed is not None and str(seed).strip() != "" and int(seed) != 0: | |
| my_seed = int(seed) | |
| else: | |
| my_seed = None | |
| if design_type == "LHS": | |
| if my_seed is not None: | |
| np.random.seed(my_seed) | |
| design = lhs(n_params, samples=n_samples, criterion='maximin') | |
| elif design_type == "Sobol": | |
| sampler = Sobol(d=n_params, scramble=True, seed=my_seed) | |
| design = sampler.random(n_samples) | |
| elif design_type == "Halton": | |
| sampler = Halton(d=n_params, scramble=True, seed=my_seed) | |
| design = sampler.random(n_samples) | |
| elif design_type == "Uniform": | |
| if my_seed is not None: | |
| np.random.seed(my_seed) | |
| design = np.random.rand(n_samples, n_params) | |
| else: | |
| raise ValueError("Unknown SFD type!") | |
| real_samples = np.zeros_like(design) | |
| for idx, (low, high, step) in enumerate(zip(param_lows, param_highs, param_steps)): | |
| real_samples[:, idx] = design[:, idx] * (high - low) + low | |
| if step > 0: | |
| real_samples[:, idx] = np.round((real_samples[:, idx] - low) / step) * step + low | |
| else: | |
| decimals = str(step)[::-1].find('.') | |
| real_samples[:, idx] = np.round(real_samples[:, idx], decimals) | |
| real_samples[:, idx] = np.clip(real_samples[:, idx], low, high) | |
| return pd.DataFrame(real_samples) | |
| # = 2D參數分布圖(Plotly)= | |
| def plot_scatter_2d(df, title): | |
| fig = px.scatter(df, x=df.columns[0], y=df.columns[1], title=title) | |
| return fig | |
| # = 3D參數分布圖(Plotly)= | |
| def plot_scatter_3d(df, title): | |
| fig = px.scatter_3d(df, x=df.columns[0], y=df.columns[1], z=df.columns[2], title=title) | |
| return fig | |
| # = 多維參數成對散點圖(Plotly)= | |
| def plot_pairplot(df, title): | |
| return px.scatter_matrix(df, title=title) | |
| # = PCA主成分降維分布圖(Plotly)= | |
| def plot_pca(df, title): | |
| X = df.values | |
| pca = PCA(n_components=2) | |
| X_pca = pca.fit_transform(X) | |
| df_pca = pd.DataFrame(X_pca, columns=['PCA1', 'PCA2']) | |
| return px.scatter(df_pca, x='PCA1', y='PCA2', title=title + " (PCA降維)") | |
| # = 依參數設定產生所有主流程設計法(四種)並比較分布 = | |
| def compare_all_designs(param_table, n_samples, seed): | |
| all_types = ["LHS", "Sobol", "Halton", "Uniform"] | |
| outs = [] | |
| if isinstance(param_table, pd.DataFrame): | |
| param_table = param_table.values.tolist() | |
| param_names, param_lows, param_highs, param_steps = [], [], [], [] | |
| for row in param_table: | |
| if not is_valid_row(row): | |
| continue | |
| try: | |
| param_names.append(str(row[0]).strip()) | |
| param_lows.append(float(row[1])) | |
| param_highs.append(float(row[2])) | |
| param_steps.append(float(row[3])) | |
| except Exception: | |
| continue | |
| n_params = len(param_names) | |
| if n_params == 0: | |
| return pd.DataFrame({"提醒": ["請正確輸入至少一列參數"]}), None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None | |
| for des in all_types: | |
| df = gen_design(des, n_params, int(n_samples), param_lows, param_highs, param_steps, seed) | |
| df.columns = param_names | |
| # 主分布圖 | |
| if n_params == 2: | |
| mainfig = plot_scatter_2d(df, des + " 分布圖") | |
| elif n_params == 3: | |
| mainfig = plot_scatter_3d(df, des + " 分布圖") | |
| elif n_params >= 4 and n_params <= 8: | |
| mainfig = plot_pairplot(df, des + " Pairplot") | |
| else: | |
| mainfig = plot_pca(df, des + " PCA") | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=f'_{des}_design.csv', mode='w', encoding='utf-8-sig') as tmpfile: | |
| df.to_csv(tmpfile, index=False) | |
| outs.extend([df, mainfig, tmpfile.name, get_viz_desc("DoE Main"), auto_conclude_viz(df, "DoE Main")]) | |
| return outs | |
| # [進階DoE: Box-Behnken/CCD設計] | |
| # = 將標準化DoE設計矩陣轉換為實際參數 = | |
| def doe_normal_to_actual(doe_matrix, param_info): | |
| df = pd.DataFrame(doe_matrix) | |
| df_out = pd.DataFrame() | |
| param_names = [] | |
| for i, info in enumerate(param_info): | |
| name, pmin, pmax, *_ = info | |
| param_names.append(name) | |
| pmin = float(pmin) | |
| pmax = float(pmax) | |
| vals = df.iloc[:, i].values | |
| mid = (pmin + pmax) / 2 | |
| half_range = (pmax - pmin) / 2 | |
| df_out[name] = mid + vals * half_range | |
| df_out = df_out.round(6) | |
| return df_out | |
| # = 產生Box-Behnken或CCD設計法的標準化/實際參數表 = | |
| def advanced_doe_with_mapping(param_table, design_type): | |
| param_list = [] | |
| if isinstance(param_table, pd.DataFrame): | |
| values = param_table.values.tolist() | |
| else: | |
| values = param_table | |
| for row in values: | |
| try: | |
| if not str(row[0]).strip(): continue | |
| param_list.append([row[0], float(row[1]), float(row[2]), float(row[3])]) | |
| except Exception: | |
| continue | |
| n_param = len(param_list) | |
| if n_param < 2: | |
| return (pd.DataFrame({"提醒":["請至少輸入2個參數"]}), | |
| pd.DataFrame(), | |
| None, None) | |
| # 產生DoE矩陣 | |
| if design_type == "Box-Behnken": | |
| mat = bbdesign(n_param, center=1) | |
| elif design_type == "CCD": | |
| mat = ccdesign(n_param, center=(1,1), face='ccc') | |
| else: | |
| return (pd.DataFrame({"提醒":["不支援的設計法"]}), | |
| pd.DataFrame(), | |
| None, None) | |
| # 產生標準化表 | |
| colnames = [f"X{i+1}" for i in range(n_param)] | |
| df_std = pd.DataFrame(mat, columns=colnames) | |
| # 實際參數表 | |
| df_real = doe_normal_to_actual(mat, param_list) | |
| # CSV暫存檔 | |
| std_fd, std_path = tempfile.mkstemp(suffix="_std.csv") | |
| df_std.to_csv(std_path, index=False) | |
| real_fd, real_path = tempfile.mkstemp(suffix="_real.csv") | |
| df_real.to_csv(real_path, index=False) | |
| return df_std, df_real, std_path, real_path | |
| # [AI建模/特徵重要性/SHAP] | |
| # = 獲取模型特徵重要性圖與摘要 = | |
| def get_feature_importance(model, feature_names): | |
| if hasattr(model, "feature_importances_"): | |
| importances = model.feature_importances_ | |
| indices = np.argsort(importances)[::-1] | |
| fig = px.bar( | |
| x=[feature_names[i] for i in indices], | |
| y=importances[indices], | |
| orientation='v', title="特徵重要性(Feature Importance)", | |
| labels={"x":"特徵","y":"重要性"} | |
| ) | |
| top3 = ", ".join([feature_names[i] for i in indices[:3]]) | |
| summary = f"最重要特徵前三名:{top3}" | |
| return fig, summary | |
| return None, "此模型無 feature_importances_" | |
| # = 產生SHAP特徵解釋圖像檔 = | |
| def get_shap_summary(model, X, feature_names): | |
| explainer = shap.TreeExplainer(model) | |
| shap_values = explainer.shap_values(X) | |
| plt.figure(figsize=(7,4)) | |
| shap.summary_plot(shap_values, X, feature_names=feature_names, show=False) | |
| buf = tempfile.NamedTemporaryFile(delete=False, suffix=".png") | |
| plt.tight_layout() | |
| plt.savefig(buf, format="png", bbox_inches='tight') | |
| plt.close() | |
| return buf.name | |
| # = 訓練多模型、產生預測與特徵重要性/SHAP圖,返回各指標 = | |
| def train_and_predict_with_importance(datafile, algos, test_ratio, show_importance=True, show_shap=False): | |
| if datafile is None or algos is None or len(algos) == 0: | |
| return None, pd.DataFrame({"提醒": ["請上傳DoE資料並選擇演算法"]}), "", "", None, "" | |
| df = pd.read_csv(datafile.name if hasattr(datafile, "name") else datafile) | |
| X = df.iloc[:, :-1].values | |
| y = df.iloc[:, -1].values | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_ratio, random_state=42) | |
| feature_names = list(df.columns[:-1]) | |
| results = [] | |
| y_pred_dict = {} | |
| outlines = [] | |
| feature_fig = None | |
| feature_summary = "" | |
| shap_img = None | |
| for algo in algos: | |
| try: | |
| # 使用標準化管線讓小資料集下更穩定 | |
| if algo == "Random Forest": | |
| model = RandomForestRegressor(n_estimators=150, random_state=0) | |
| elif algo == "XGBoost": | |
| model = xgb.XGBRegressor(n_estimators=120, random_state=0, verbosity=0) | |
| elif algo == "LightGBM": | |
| model = lgb.LGBMRegressor(n_estimators=120, random_state=0) | |
| elif algo == "SVR": | |
| model = make_pipeline(StandardScaler(), SVR()) | |
| elif algo == "Linear Regression": | |
| model = make_pipeline(StandardScaler(), LinearRegression()) | |
| elif algo == "Lasso": | |
| model = make_pipeline(StandardScaler(), Lasso(alpha=0.0001, max_iter=5000)) | |
| elif algo == "Ridge": | |
| model = make_pipeline(StandardScaler(), Ridge()) | |
| elif algo == "ElasticNet": | |
| model = make_pipeline(StandardScaler(), ElasticNet(alpha=0.0001)) | |
| else: | |
| continue | |
| model.fit(X_train, y_train) | |
| y_pred = model.predict(X_test) | |
| # 避免y_pred為scalar | |
| if np.isscalar(y_pred): | |
| y_pred = np.full_like(y_test, y_pred) | |
| rmse = np.sqrt(mean_squared_error(y_test, y_pred)) | |
| r2 = r2_score(y_test, y_pred) | |
| results.append({ | |
| "模型": algo, | |
| "測試RMSE": rmse, | |
| "測試R²": r2, | |
| "訓練數": len(X_train), | |
| "測試數": len(X_test), | |
| }) | |
| y_pred_dict[algo] = y_pred | |
| outlines.append(auto_conclude_ai(y_test, y_pred, algo)) | |
| if show_importance and feature_fig is None and hasattr(getattr(model, "named_steps", model), "feature_importances_"): | |
| base_model = model.named_steps.get(list(model.named_steps)[-1], model) if hasattr(model, "named_steps") else model | |
| feature_fig, feature_summary = get_feature_importance(base_model, feature_names) | |
| if show_shap and shap_img is None and hasattr(getattr(model, "named_steps", model), "feature_importances_"): | |
| base_model = model.named_steps.get(list(model.named_steps)[-1], model) if hasattr(model, "named_steps") else model | |
| shap_img = get_shap_summary(base_model, X_test, feature_names) | |
| except Exception as e: | |
| print(f"模型 {algo} 失敗: {e}") | |
| continue | |
| res_df = pd.DataFrame(results) | |
| model_colors = { | |
| "Random Forest": "#7c82f6", "XGBoost": "#ff686b", "LightGBM": "#54b984", "SVR": "#7e4a99", | |
| "Linear Regression": "#229aff", "Lasso": "#f8d90f", "Ridge": "#9edafe", "ElasticNet": "#f9a15b" | |
| } | |
| model_markers = { | |
| "Random Forest": "circle", "XGBoost": "diamond", "LightGBM": "triangle-up", "SVR": "square", | |
| "Linear Regression": "star", "Lasso": "cross", "Ridge": "hexagon", "ElasticNet": "x" | |
| } | |
| fig = px.scatter() | |
| for algo, y_pred in y_pred_dict.items(): | |
| # 只畫有效的預測點 | |
| if y_pred is not None and len(y_pred) == len(y_test) and not np.isnan(y_pred).all(): | |
| fig.add_scatter( | |
| x=y_test, y=y_pred, mode='markers', name=algo, | |
| marker=dict( | |
| size=13, color=model_colors.get(algo, "#888"), | |
| symbol=model_markers.get(algo, "circle"), | |
| line=dict(width=1.5, color="#222") | |
| ), | |
| showlegend=True | |
| ) | |
| minv, maxv = np.min(y_test), np.max(y_test) | |
| fig.add_scatter( | |
| x=[minv, maxv], y=[minv, maxv], mode='lines', name='Ideal', | |
| line=dict(dash='dash', color='black'), showlegend=True | |
| ) | |
| fig.update_layout( | |
| title="Test Set Prediction(預測/實際)", | |
| xaxis_title="True Output", yaxis_title="Predicted", | |
| legend=dict(font=dict(size=17)), | |
| margin=dict(l=40, r=20, t=60, b=40) | |
| ) | |
| fig.update_yaxes(scaleanchor="x", scaleratio=1) | |
| return fig, res_df, get_viz_desc("AI Predict"), "<br>".join(outlines), feature_fig, shap_img | |
| # [多圖資料視覺化/自動建議卡片] | |
| # = 產生多種資料視覺化圖與摘要(最多五種)= | |
| def multi_viz(file, plot_types): | |
| # 準備三個 list 裝圖、描述、總結 | |
| figs, descs, sums = [], [], [] | |
| # 防呆:如果沒檔案或沒選圖,直接回傳 15 個空 output | |
| if file is None or not plot_types: | |
| return [None, "", ""] * 5 | |
| # 載入資料 | |
| df = pd.read_csv(file.name if hasattr(file, "name") else file) | |
| plot_types = plot_types[:5] # 最多五種 | |
| # 每一種圖型各自 try,出錯補空 | |
| for t in plot_types: | |
| fig, desc, summ = None, "", "" | |
| try: | |
| if t == "Heatmap": | |
| fig = px.imshow(df.corr(), text_auto=True, title="相關係數Heatmap") | |
| desc = get_viz_desc("Heatmap") | |
| summ = auto_conclude_viz(df, "Heatmap") | |
| elif t == "Pairplot": | |
| fig = px.scatter_matrix(df, title="資料 Pairplot") | |
| desc = get_viz_desc("Pairplot") | |
| summ = auto_conclude_viz(df, "Pairplot") | |
| elif t == "Histogram": | |
| fig = px.histogram(df, nbins=10, title="資料 Histogram") | |
| desc = get_viz_desc("Histogram") | |
| summ = auto_conclude_viz(df, "Histogram") | |
| elif t == "Scatter Matrix": | |
| fig = px.scatter_matrix(df, title="Scatter Matrix") | |
| desc = get_viz_desc("Scatter Matrix") | |
| summ = auto_conclude_viz(df, "Scatter Matrix") | |
| elif t == "PCA": | |
| X = df.values | |
| pca = PCA(n_components=2) | |
| X_pca = pca.fit_transform(X) | |
| df_pca = pd.DataFrame(X_pca, columns=['PCA1', 'PCA2']) | |
| fig = px.scatter(df_pca, x='PCA1', y='PCA2', title="PCA降維") | |
| desc = get_viz_desc("PCA") | |
| summ = auto_conclude_viz(df, "PCA") | |
| except Exception as e: | |
| print(f"[multi_viz error]: {t}", e) | |
| fig, desc, summ = None, "", "" | |
| figs.append(fig) | |
| descs.append(desc) | |
| sums.append(summ) | |
| # 不足5組的話補空 | |
| while len(figs) < 5: | |
| figs.append(None) | |
| descs.append("") | |
| sums.append("") | |
| # 將三個 list interleave 成 [fig, desc, sum, ...] | |
| outs = [] | |
| for i in range(5): | |
| outs.extend([figs[i], descs[i], sums[i]]) | |
| # Debug:確認長度一定是15 | |
| print("multi_viz 輸出數量:", len(outs)) | |
| assert len(outs) == 15, f"multi_viz 輸出長度異常:{len(outs)}" | |
| return outs | |
| # = 根據資料欄位自動產生系統建議 = | |
| def auto_recommendation(file): | |
| df = pd.read_csv(file.name if hasattr(file, "name") else file) | |
| tips = [] | |
| for col in df.columns: | |
| if df[col].isnull().sum() > 0: | |
| tips.append(f"「{col}」有缺值,建議補值或刪除") | |
| if df[col].nunique() == 1: | |
| tips.append(f"「{col}」為常數欄,建議刪除") | |
| if pd.api.types.is_numeric_dtype(df[col]): | |
| sk = skew(df[col].dropna()) | |
| if abs(sk) > 2: | |
| tips.append(f"「{col}」嚴重偏態(skew={sk:.2f}),建議標準化") | |
| cor = df.corr().abs() | |
| for c1 in cor.columns: | |
| for c2 in cor.columns: | |
| if c1 != c2 and cor.loc[c1, c2] > 0.8: | |
| tips.append(f"「{c1}」與「{c2}」高度相關,建議後續特徵選擇") | |
| if not tips: | |
| tips = ["資料品質良好,無明顯異常。"] | |
| else: | |
| tips = tips[:5] | |
| return "<b>系統建議:</b><ul style='margin:0 0 0 12px'>" + "".join([f"<li>{t}</li>" for t in tips]) + "</ul>" | |
| # = 多圖視覺化+自動建議卡片綁定 = | |
| def multi_viz_and_recommend(file, plot_types): | |
| vis = multi_viz(file, plot_types) | |
| recomm = auto_recommendation(file) | |
| print("vis型態", type(vis), "vis長度", len(vis)) | |
| result = (*vis, recomm) | |
| print("最終回傳型態", type(result), "長度", len(result)) | |
| return result | |
| with gr.Blocks() as demo: | |
| upfile2 = gr.File(label="上傳檔案") | |
| plot_select = gr.CheckboxGroup( | |
| choices=["Heatmap", "Pairplot", "Histogram", "Scatter Matrix", "PCA"], | |
| value=["Heatmap", "Pairplot", "Histogram", "PCA"], | |
| label="選擇圖像類型" | |
| ) | |
| vis_outs = [gr.Plot() for _ in range(5)] + [gr.Markdown() for _ in range(10)] | |
| recomm_card = gr.Markdown() | |
| vizbtn = gr.Button("產生多圖分析", elem_classes=["main-btn"]) | |
| vizbtn.click( | |
| lambda f, t: (*multi_viz(f, t), auto_recommendation(f)), | |
| inputs=[upfile2, plot_select], | |
| outputs=vis_outs + [recomm_card] | |
| ) | |
| # [貝葉斯優化/超參數搜尋] | |
| model_spaces = { | |
| "XGBoost": ( | |
| XGBRegressor(verbosity=0, random_state=42), | |
| { | |
| "max_depth": Integer(2, 10), | |
| "n_estimators": Integer(50, 300), | |
| "learning_rate": Real(0.01, 0.2, prior="log-uniform") | |
| } | |
| ), | |
| "Random Forest": ( | |
| RandomForestRegressor(random_state=42), | |
| { | |
| "max_depth": Integer(2, 15), | |
| "n_estimators": Integer(50, 300) | |
| } | |
| ), | |
| "LightGBM": ( | |
| LGBMRegressor(random_state=42), | |
| { | |
| "max_depth": Integer(2, 15), | |
| "n_estimators": Integer(50, 300), | |
| "learning_rate": Real(0.01, 0.2, prior="log-uniform") | |
| } | |
| ), | |
| "SVR": ( | |
| SVR(), | |
| { | |
| "C": Real(0.01, 100, prior="log-uniform"), | |
| "gamma": Real(0.001, 1.0, prior="log-uniform") | |
| } | |
| ) | |
| } | |
| # = 格式化最佳參數組合摘要(Markdown)= | |
| def format_best_params(best_params, best_score): | |
| txt = "<b>最佳參數組合:</b><ul style='margin:0 0 0 16px;'>" | |
| for k, v in best_params.items(): | |
| txt += f"<li><b>{k}</b>: {v}</li>" | |
| txt += f"</ul><b>最佳CV RMSE:</b> {best_score:.4f}" | |
| return txt | |
| # = 單模型貝葉斯優化超參數、產生收斂曲線與自動摘要 = | |
| def run_bayes_optimization(datafile, model_name, n_iter=20): | |
| if datafile is None: | |
| return "請上傳CSV資料", None, "", "" | |
| df = pd.read_csv(datafile.name if hasattr(datafile, "name") else datafile) | |
| X, y = df.iloc[:, :-1], df.iloc[:, -1] | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) | |
| model, search_space = model_spaces.get(model_name, (None, None)) | |
| if model is None: | |
| return "不支援的模型", None, "", "" | |
| if not search_space: | |
| model.fit(X_train, y_train) | |
| score = model.score(X_test, y_test) | |
| return f"{model_name}為單純模型,無可優化超參數。\n測試集R2={score:.3f}", None, "", "" | |
| opt = BayesSearchCV( | |
| model, | |
| search_spaces=search_space, | |
| n_iter=n_iter, | |
| scoring='neg_root_mean_squared_error', | |
| cv=3, | |
| n_jobs=-1, | |
| random_state=42, | |
| verbose=0 | |
| ) | |
| opt.fit(X_train, y_train) | |
| best_params = opt.best_params_ | |
| best_score = -opt.best_score_ | |
| results = opt.cv_results_["mean_test_score"] | |
| rmse_curve = -1 * results # 轉正的RMSE | |
| fig, ax = plt.subplots(figsize=(6,3)) | |
| ax.plot(rmse_curve, marker='o') | |
| ax.set_title("優化歷程 (CV RMSE)") | |
| ax.set_xlabel("Iteration") | |
| ax.set_ylabel("CV RMSE") | |
| ax.grid(True) | |
| plt.tight_layout() | |
| plt.close(fig) | |
| # === 自動判讀收斂趨勢,產生 markdown === | |
| auto_summary = auto_conclude_bayes_curve(rmse_curve, model_name) | |
| return ( | |
| format_best_params(best_params, best_score), # 超參數最佳組合摘要(markdown) | |
| fig, # RMSE收斂圖 | |
| get_viz_desc("Bayesian Optimization"), # 圖說 | |
| auto_summary # << 這裡就是自動結論 | |
| ) | |
| # = 多模型同時Bayes優化,產生多條收斂曲線 = | |
| def run_multi_bayes_optimization(datafile, model_types, n_iter=20): | |
| # UI要的模型順序 | |
| MODEL_ORDER = ["Random Forest", "XGBoost", "LightGBM", "SVR"] | |
| N_PER_MODEL = 3 # [fig, summary, best_param] | |
| if datafile is None or not model_types: | |
| # 沒資料或沒選模型,直接全空 | |
| return [None] + [None, "", ""] * len(MODEL_ORDER) | |
| df = pd.read_csv(datafile.name if hasattr(datafile, "name") else datafile) | |
| X, y = df.iloc[:, :-1], df.iloc[:, -1] | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) | |
| rmse_curves = {} | |
| summaries = {} | |
| best_params_dict = {} | |
| for mtype in model_types: | |
| model, search_space = model_spaces.get(mtype, (None, None)) | |
| if model is None or not search_space: | |
| rmse_curves[mtype] = [np.nan] | |
| summaries[mtype] = f"{mtype} 無法執行最佳化" | |
| best_params_dict[mtype] = f"{mtype} 不支援" | |
| continue | |
| opt = BayesSearchCV( | |
| model, | |
| search_spaces=search_space, | |
| n_iter=n_iter, | |
| scoring='neg_root_mean_squared_error', | |
| cv=3, | |
| n_jobs=-1, | |
| random_state=42, | |
| verbose=0 | |
| ) | |
| opt.fit(X_train, y_train) | |
| rmse_curve = -1 * opt.cv_results_["mean_test_score"] | |
| rmse_curves[mtype] = rmse_curve | |
| summaries[mtype] = auto_conclude_bayes_curve(rmse_curve, mtype) | |
| best_params_dict[mtype] = format_best_params(opt.best_params_, -opt.best_score_) | |
| # 多模型共線互動圖 | |
| fig = go.Figure() | |
| for mtype in model_types: | |
| curve = rmse_curves.get(mtype, []) | |
| fig.add_trace(go.Scatter( | |
| x=list(range(len(curve))), | |
| y=curve, | |
| mode='lines+markers', | |
| name=mtype | |
| )) | |
| fig.update_layout( | |
| title="所有模型Bayes優化歷程 (CV RMSE)", | |
| xaxis_title="Iteration", | |
| yaxis_title="CV RMSE", | |
| hovermode="x unified" | |
| ) | |
| outs = [fig] | |
| # 確保 outputs 長度一定固定 | |
| for mtype in MODEL_ORDER: | |
| if mtype in model_types: | |
| # 有選就給內容 | |
| curve = rmse_curves.get(mtype, []) | |
| fig_tab = go.Figure(go.Scatter( | |
| x=list(range(len(curve))), | |
| y=curve, | |
| mode='lines+markers', | |
| name=mtype | |
| )) | |
| summary = summaries.get(mtype, "") | |
| best_param = best_params_dict.get(mtype, "") | |
| outs.extend([fig_tab, summary, best_param]) | |
| else: | |
| outs.extend([None, "", ""]) | |
| return outs | |
| # [AI搜尋最佳化條件] | |
| # = 自動判斷欄位型態(連續/離散)以建立搜尋空間 = | |
| def suggest_optimization_space(df, discrete_cols=[]): | |
| space = {} | |
| for col in df.columns[:-1]: | |
| if col in discrete_cols: | |
| vals = sorted(df[col].unique()) | |
| space[col] = Categorical(vals) | |
| else: | |
| vmin, vmax = df[col].min(), df[col].max() | |
| if len(df[col].unique()) < 8: | |
| space[col] = Categorical(sorted(df[col].unique())) | |
| else: | |
| space[col] = Real(vmin, vmax) | |
| return space | |
| # = 搜尋單模型之預測最佳/最差條件組合 = | |
| def find_best_feature(model, X, maximize=True): | |
| print(">>> maximize=", maximize) # Debug 印出 | |
| # 建立每個feature的上下限作為搜尋範圍 | |
| bounds = [] | |
| for i in range(X.shape[1]): | |
| vmin, vmax = X.iloc[:, i].min(), X.iloc[:, i].max() | |
| bounds.append((vmin, vmax)) | |
| def obj(x): | |
| y_pred = model.predict(np.array(x).reshape(1, -1))[0] | |
| print("obj() y_pred:", y_pred, "| maximize:", maximize) | |
| return -y_pred if maximize else y_pred | |
| x0 = X.mean().values | |
| res = minimize(obj, x0, bounds=bounds) | |
| best_x = res.x | |
| best_pred = -res.fun if maximize else res.fun | |
| return best_x, best_pred | |
| # = 多模型搜尋最佳化參數組合(最大化/最小化)= | |
| def optimize_conditions(datafile, model_types, direction, is_discrete=False, n_iter=32): | |
| if datafile is None or not model_types: | |
| return pd.DataFrame({"提醒": ["請上傳DoE數據與選擇模型"]}), "", "", "" # 四個 output | |
| df = pd.read_csv(datafile.name if hasattr(datafile, "name") else datafile) | |
| X, y = df.iloc[:, :-1], df.iloc[:, -1] | |
| maximize = (direction == "最大化") | |
| summary = [] | |
| all_results = [] | |
| for mtype in model_types: | |
| if mtype == "Random Forest": | |
| model = RandomForestRegressor(n_estimators=160, random_state=42) | |
| elif mtype == "XGBoost": | |
| model = XGBRegressor(n_estimators=100, random_state=42, verbosity=0) | |
| elif mtype == "LightGBM": | |
| model = LGBMRegressor(n_estimators=100, random_state=42) | |
| elif mtype == "SVR": | |
| model = make_pipeline(StandardScaler(), SVR()) | |
| elif mtype == "Linear Regression": | |
| model = make_pipeline(StandardScaler(), LinearRegression()) | |
| elif mtype == "Lasso": | |
| model = make_pipeline(StandardScaler(), Lasso(alpha=0.0001, max_iter=5000)) | |
| elif mtype == "Ridge": | |
| model = make_pipeline(StandardScaler(), Ridge()) | |
| elif mtype == "ElasticNet": | |
| model = make_pipeline(StandardScaler(), ElasticNet(alpha=0.0001)) | |
| else: | |
| continue | |
| model.fit(X, y) | |
| if "大" in str(direction): # 有「大」字就是最大化 | |
| maximize = True | |
| else: | |
| maximize = False | |
| best_x, best_pred = find_best_feature(model, X, maximize=maximize) | |
| feature_dict = {k: float(v) for k, v in zip(X.columns, best_x)} | |
| feature_dict["模型"] = mtype | |
| feature_dict["預測產率"] = float(best_pred) | |
| all_results.append(feature_dict) | |
| summary.append( | |
| f"【{mtype}】最佳反應條件:{feature_dict}<br>預測產率/目標值:<b>{best_pred:.3g}</b><br>" | |
| ) | |
| df_out = pd.DataFrame(all_results) | |
| txt = "<br>".join(summary) | |
| desc = "" # 可寫 "依據多模型搜尋理論極值" | |
| sum_ = "" # 你可以回傳模型特性、附註等 | |
| return df_out, txt, desc, sum_ | |
| # [AI模型訓練前後回歸比較] | |
| # = 單一y,對比多模型在前後資料集的R2/RMSE與特徵重要性變化 = | |
| def compare_models_before_after(old_csv, new_csv, model_linear, model_nonlinear, target): | |
| try: | |
| # 合併用戶選的模型 | |
| model_types = (model_linear or []) + (model_nonlinear or []) | |
| if old_csv is None or new_csv is None or not model_types: | |
| return "⚠️ 請上傳原始/合併DoE並選擇模型", pd.DataFrame(), None, None | |
| old_df = pd.read_csv(old_csv.name if hasattr(old_csv, "name") else old_csv) | |
| new_df = pd.read_csv(new_csv.name if hasattr(new_csv, "name") else new_csv) | |
| if target not in old_df.columns or target not in new_df.columns: | |
| return f"⚠️ 缺少 '{target}' 欄位", pd.DataFrame(), None, None | |
| X_old, y_old = old_df.drop(columns=[target]), old_df[target] | |
| X_new, y_new = new_df.drop(columns=[target]), new_df[target] | |
| cv_num = min(5, len(y_old), len(y_new)) | |
| if cv_num < 2: | |
| return "⚠️ 資料筆數太少,無法交叉驗證(至少需2列)", pd.DataFrame(), None, None | |
| rows = [] | |
| importances = {} | |
| for mtype in model_types: | |
| if mtype == "Random Forest": | |
| model1 = RandomForestRegressor(n_estimators=120, random_state=42) | |
| model2 = RandomForestRegressor(n_estimators=120, random_state=42) | |
| elif mtype == "XGBoost": | |
| model1 = XGBRegressor(n_estimators=100, random_state=42, verbosity=0) | |
| model2 = XGBRegressor(n_estimators=100, random_state=42, verbosity=0) | |
| elif mtype == "LightGBM": | |
| model1 = LGBMRegressor(n_estimators=100, random_state=42) | |
| model2 = LGBMRegressor(n_estimators=100, random_state=42) | |
| elif mtype == "SVR": | |
| model1 = make_pipeline(StandardScaler(), SVR()) | |
| model2 = make_pipeline(StandardScaler(), SVR()) | |
| elif mtype == "Linear Regression": | |
| model1 = make_pipeline(StandardScaler(), LinearRegression()) | |
| model2 = make_pipeline(StandardScaler(), LinearRegression()) | |
| elif mtype == "Lasso": | |
| model1 = make_pipeline(StandardScaler(), Lasso(alpha=0.0001, max_iter=5000)) | |
| model2 = make_pipeline(StandardScaler(), Lasso(alpha=0.0001, max_iter=5000)) | |
| elif mtype == "Ridge": | |
| model1 = make_pipeline(StandardScaler(), Ridge()) | |
| model2 = make_pipeline(StandardScaler(), Ridge()) | |
| elif mtype == "ElasticNet": | |
| model1 = make_pipeline(StandardScaler(), ElasticNet(alpha=0.0001)) | |
| model2 = make_pipeline(StandardScaler(), ElasticNet(alpha=0.0001)) | |
| else: | |
| continue | |
| try: | |
| # R² (CV) | |
| r2_old = cross_val_score(model1, X_old, y_old, cv=cv_num, scoring="r2").mean() | |
| r2_new = cross_val_score(model2, X_new, y_new, cv=cv_num, scoring="r2").mean() | |
| # RMSE (fit on all, compute on all) | |
| model1.fit(X_old, y_old) | |
| model2.fit(X_new, y_new) | |
| pred_old = model1.predict(X_old) | |
| pred_new = model2.predict(X_new) | |
| rmse_old = np.sqrt(np.mean((pred_old - y_old) ** 2)) | |
| rmse_new = np.sqrt(np.mean((pred_new - y_new) ** 2)) | |
| # feature importance | |
| if hasattr(model1, "feature_importances_"): | |
| imp_old = model1.feature_importances_ | |
| imp_new = model2.feature_importances_ | |
| elif hasattr(model1, "named_steps") and "randomforestregressor" in model1.named_steps: | |
| imp_old = model1.named_steps["randomforestregressor"].feature_importances_ | |
| imp_new = model2.named_steps["randomforestregressor"].feature_importances_ | |
| elif hasattr(model1, "coef_"): | |
| imp_old = np.abs(model1.coef_) | |
| imp_new = np.abs(model2.coef_) | |
| elif hasattr(model1, "named_steps") and "linearregression" in model1.named_steps: | |
| imp_old = np.abs(model1.named_steps["linearregression"].coef_) | |
| imp_new = np.abs(model2.named_steps["linearregression"].coef_) | |
| else: | |
| imp_old = np.zeros(X_old.shape[1]) | |
| imp_new = np.zeros(X_new.shape[1]) | |
| rows.append({ | |
| "模型": mtype, | |
| "原始R2": r2_old, | |
| "合併新點R2": r2_new, | |
| "原始RMSE": rmse_old, | |
| "合併新點RMSE": rmse_new | |
| }) | |
| # 只有計算成功的才存 importance | |
| importances[mtype+"_old"] = imp_old | |
| importances[mtype+"_new"] = imp_new | |
| except Exception as model_e: | |
| rows.append({ | |
| "模型": mtype, | |
| "原始R2": f"Error: {model_e}", | |
| "合併新點R2": f"Error: {model_e}", | |
| "原始RMSE": f"Error: {model_e}", | |
| "合併新點RMSE": f"Error: {model_e}" | |
| }) | |
| # 不把異常模型 importance 放進去 | |
| print(f"{mtype} failed: {model_e}") | |
| continue | |
| table = pd.DataFrame(rows) | |
| # ========== Feature Importance Plot (只畫有用的) ========== | |
| fig_fi = go.Figure() | |
| features = list(X_old.columns) | |
| colors = [ | |
| "#4E79A7", "#F28E2B", "#76B7B2", "#E15759", | |
| "#59A14F", "#EDC948", "#B07AA1", "#FF9DA7" | |
| ] | |
| plot_count = 0 # 用於 color 依序遞增 | |
| for mtype in model_types: | |
| k_old = f"{mtype}_old" | |
| k_new = f"{mtype}_new" | |
| # 只畫 importance > 0 的 | |
| if k_old in importances and np.sum(importances[k_old]) > 0: | |
| fig_fi.add_trace(go.Bar( | |
| x=features, y=importances[k_old], | |
| name=f"{mtype} - Before", marker_color=colors[plot_count % len(colors)], opacity=0.75 | |
| )) | |
| plot_count += 1 | |
| if k_new in importances and np.sum(importances[k_new]) > 0: | |
| fig_fi.add_trace(go.Bar( | |
| x=features, y=importances[k_new], | |
| name=f"{mtype} - After", marker_color=colors[plot_count % len(colors)], opacity=0.4 | |
| )) | |
| plot_count += 1 | |
| fig_fi.update_layout( | |
| barmode="group", | |
| title="Feature Importance Comparison", | |
| xaxis_title="Feature", | |
| yaxis_title="Importance", | |
| legend_title="Model", | |
| font=dict(size=13) | |
| ) | |
| # ========== y Distribution Plot ========== | |
| fig_y = go.Figure() | |
| fig_y.add_trace(go.Histogram( | |
| x=old_df[target], name="Before", opacity=0.7, nbinsx=16 | |
| )) | |
| fig_y.add_trace(go.Histogram( | |
| x=new_df[target], name="After", opacity=0.7, nbinsx=16 | |
| )) | |
| fig_y.update_layout( | |
| barmode='overlay', | |
| title="y Distribution Comparison", | |
| xaxis_title=target, | |
| yaxis_title="Count", | |
| legend_title="Dataset", | |
| font=dict(size=13) | |
| ) | |
| return "", table, fig_fi, fig_y | |
| except Exception as e: | |
| import traceback | |
| tb = traceback.format_exc() | |
| print("=== DEBUG ERROR ===") | |
| print(tb) | |
| return f"❌ 系統發生錯誤:{str(e)}", pd.DataFrame(), None, None | |
| # = 根據R2變化自動產生效能總結摘要 = | |
| def generate_r2_summary(table): | |
| lines = [] | |
| for idx, row in table.iterrows(): | |
| model = row['模型'] | |
| try: | |
| r2_before = float(row['原始R2']) | |
| r2_after = float(row['合併新點R2']) | |
| delta = r2_after - r2_before | |
| if r2_after < r2_before - 0.1: | |
| lines.append(f"**{model}:表現顯著下降(R² {r2_before:.2f} → {r2_after:.2f})**") | |
| elif r2_after > r2_before + 0.1: | |
| lines.append(f"{model}:模型表現提升(R² {r2_before:.2f} → {r2_after:.2f})") | |
| elif abs(delta) < 0.1: | |
| lines.append(f"{model}:R²無明顯變化(R² {r2_before:.2f} → {r2_after:.2f})") | |
| except Exception: | |
| lines.append(f"{model}:計算失敗或資料不足。") | |
| if not lines: | |
| lines = ["無有效模型結果。"] | |
| return "### AI模型R²比較摘要\n" + "\n".join(lines) | |
| # = 整合前後回歸比較結果與自動摘要 = | |
| def compare_models_before_after_with_summary(old_csv, new_csv, model_types, target="y"): | |
| result = compare_models_before_after(old_csv, new_csv, model_types, target) | |
| table = result[0] | |
| summary = generate_r2_summary(table) | |
| return (*result, summary) | |
| # = 多y目標下,對比多模型在前後資料集的效能與特徵重要性變化 = | |
| def compare_models_multi_y_before_after(old_csv, new_csv, model_types, targets): | |
| # 防呆 | |
| if old_csv is None or new_csv is None or not model_types or not targets: | |
| return {}, {}, {}, "請確認已上傳檔案、選擇模型與目標欄位" | |
| old_df = pd.read_csv(old_csv.name if hasattr(old_csv, "name") else old_csv) | |
| new_df = pd.read_csv(new_csv.name if hasattr(new_csv, "name") else new_csv) | |
| if isinstance(targets, str): targets = [targets] | |
| result_tables, feature_figs, ydist_figs = {}, {}, {} | |
| summary_lines = [] | |
| for target in targets: | |
| if target not in old_df.columns or target not in new_df.columns: | |
| summary_lines.append(f"❌ 欄位 {target} 在資料中不存在,略過。") | |
| continue | |
| X_old, y_old = old_df.drop(columns=[target]), old_df[target] | |
| X_new, y_new = new_df.drop(columns=[target]), new_df[target] | |
| cv_num = min(5, len(y_old), len(y_new)) | |
| if cv_num < 2: | |
| summary_lines.append(f"⚠️ {target} 資料筆數不足無法交叉驗證。") | |
| continue | |
| rows, importances = [], {} | |
| for mtype in model_types: | |
| # 建立模型 | |
| if mtype == "Random Forest": | |
| model1 = RandomForestRegressor(n_estimators=120, random_state=42) | |
| model2 = RandomForestRegressor(n_estimators=120, random_state=42) | |
| elif mtype == "XGBoost": | |
| model1 = XGBRegressor(n_estimators=100, random_state=42, verbosity=0) | |
| model2 = XGBRegressor(n_estimators=100, random_state=42, verbosity=0) | |
| elif mtype == "LightGBM": | |
| model1 = LGBMRegressor(n_estimators=100, random_state=42) | |
| model2 = LGBMRegressor(n_estimators=100, random_state=42) | |
| elif mtype == "SVR": | |
| model1 = make_pipeline(StandardScaler(), SVR()) | |
| model2 = make_pipeline(StandardScaler(), SVR()) | |
| elif mtype == "Linear Regression": | |
| model1 = make_pipeline(StandardScaler(), LinearRegression()) | |
| model2 = make_pipeline(StandardScaler(), LinearRegression()) | |
| elif mtype == "Lasso": | |
| model1 = make_pipeline(StandardScaler(), Lasso(alpha=0.0001, max_iter=5000)) | |
| model2 = make_pipeline(StandardScaler(), Lasso(alpha=0.0001, max_iter=5000)) | |
| elif mtype == "Ridge": | |
| model1 = make_pipeline(StandardScaler(), Ridge()) | |
| model2 = make_pipeline(StandardScaler(), Ridge()) | |
| elif mtype == "ElasticNet": | |
| model1 = make_pipeline(StandardScaler(), ElasticNet(alpha=0.0001)) | |
| model2 = make_pipeline(StandardScaler(), ElasticNet(alpha=0.0001)) | |
| else: | |
| continue | |
| try: | |
| r2_old = cross_val_score(model1, X_old, y_old, cv=cv_num, scoring="r2").mean() | |
| r2_new = cross_val_score(model2, X_new, y_new, cv=cv_num, scoring="r2").mean() | |
| model1.fit(X_old, y_old) | |
| model2.fit(X_new, y_new) | |
| pred_old = model1.predict(X_old) | |
| pred_new = model2.predict(X_new) | |
| rmse_old = np.sqrt(np.mean((pred_old - y_old) ** 2)) | |
| rmse_new = np.sqrt(np.mean((pred_new - y_new) ** 2)) | |
| # feature importance | |
| if hasattr(model1, "feature_importances_"): | |
| imp_old = model1.feature_importances_ | |
| imp_new = model2.feature_importances_ | |
| elif hasattr(model1, "named_steps") and "randomforestregressor" in model1.named_steps: | |
| imp_old = model1.named_steps["randomforestregressor"].feature_importances_ | |
| imp_new = model2.named_steps["randomforestregressor"].feature_importances_ | |
| elif hasattr(model1, "coef_"): | |
| imp_old = np.abs(model1.coef_) | |
| imp_new = np.abs(model2.coef_) | |
| elif hasattr(model1, "named_steps") and "linearregression" in model1.named_steps: | |
| imp_old = np.abs(model1.named_steps["linearregression"].coef_) | |
| imp_new = np.abs(model2.named_steps["linearregression"].coef_) | |
| else: | |
| imp_old = np.zeros(X_old.shape[1]) | |
| imp_new = np.zeros(X_new.shape[1]) | |
| rows.append({ | |
| "模型": mtype, | |
| "原始R2": r2_old, | |
| "合併新點R2": r2_new, | |
| "原始RMSE": rmse_old, | |
| "合併新點RMSE": rmse_new | |
| }) | |
| importances[mtype+"_old"] = imp_old | |
| importances[mtype+"_new"] = imp_new | |
| except Exception as model_e: | |
| rows.append({ | |
| "模型": mtype, | |
| "原始R2": f"Error: {model_e}", | |
| "合併新點R2": f"Error: {model_e}", | |
| "原始RMSE": f"Error: {model_e}", | |
| "合併新點RMSE": f"Error: {model_e}" | |
| }) | |
| table = pd.DataFrame(rows) | |
| result_tables[target] = table | |
| # 特徵重要性圖 | |
| features = list(X_old.columns) | |
| fig_fi = go.Figure() | |
| colors = [ | |
| "#4E79A7", "#F28E2B", "#76B7B2", "#E15759", | |
| "#59A14F", "#EDC948", "#B07AA1", "#FF9DA7" | |
| ] | |
| plot_count = 0 | |
| for mtype in model_types: | |
| k_old = f"{mtype}_old" | |
| k_new = f"{mtype}_new" | |
| if k_old in importances and np.sum(importances[k_old]) > 0: | |
| fig_fi.add_trace(go.Bar( | |
| x=features, y=importances[k_old], | |
| name=f"{mtype} - Before", marker_color=colors[plot_count % len(colors)], opacity=0.75 | |
| )) | |
| plot_count += 1 | |
| if k_new in importances and np.sum(importances[k_new]) > 0: | |
| fig_fi.add_trace(go.Bar( | |
| x=features, y=importances[k_new], | |
| name=f"{mtype} - After", marker_color=colors[plot_count % len(colors)], opacity=0.4 | |
| )) | |
| plot_count += 1 | |
| fig_fi.update_layout( | |
| barmode="group", | |
| title=f"{target} Feature Importance Comparison", | |
| xaxis_title="Feature", | |
| yaxis_title="Importance", | |
| legend_title="Model", | |
| font=dict(size=13) | |
| ) | |
| feature_figs[target] = fig_fi | |
| # y分布圖 | |
| fig_y = go.Figure() | |
| fig_y.add_trace(go.Histogram( | |
| x=old_df[target], name="Before", opacity=0.7, nbinsx=16 | |
| )) | |
| fig_y.add_trace(go.Histogram( | |
| x=new_df[target], name="After", opacity=0.7, nbinsx=16 | |
| )) | |
| fig_y.update_layout( | |
| barmode='overlay', | |
| title=f"{target} y Distribution Comparison", | |
| xaxis_title=target, | |
| yaxis_title="Count", | |
| legend_title="Dataset", | |
| font=dict(size=13) | |
| ) | |
| ydist_figs[target] = fig_y | |
| # 摘要 | |
| # 給摘要更精簡 | |
| for _, row in table.iterrows(): | |
| try: | |
| r2_before = float(row['原始R2']) | |
| r2_after = float(row['合併新點R2']) | |
| model = row['模型'] | |
| delta = r2_after - r2_before | |
| if r2_after < r2_before - 0.1: | |
| summary_lines.append(f"<b>{target} - {model}</b>:顯著下降(R² {r2_before:.2f} → {r2_after:.2f})") | |
| elif r2_after > r2_before + 0.1: | |
| summary_lines.append(f"<b>{target} - {model}</b>:提升(R² {r2_before:.2f} → {r2_after:.2f})") | |
| elif abs(delta) < 0.1: | |
| summary_lines.append(f"<b>{target} - {model}</b>:無明顯變化(R² {r2_before:.2f} → {r2_after:.2f})") | |
| except Exception: | |
| summary_lines.append(f"<b>{target} - {row['模型']}</b>:計算失敗/資料不足") | |
| # 回傳 dict,供 Tab 動態展示 | |
| return result_tables, feature_figs, ydist_figs, "### AI多 y 回歸比較摘要\n" + "<br>".join(summary_lines) | |
| # = 自動根據關鍵字偵測數據集y欄位 = | |
| def detect_y_columns(csv_file, keyword_str): | |
| if csv_file is None: | |
| return gr.update(choices=[], value=[]) | |
| try: | |
| df = pd.read_csv(csv_file.name if hasattr(csv_file, "name") else csv_file) | |
| keywords = [k.strip().lower() for k in keyword_str.split(",") if k.strip()] | |
| cols = [] | |
| for c in df.columns: | |
| # 關鍵字優先 | |
| if any(k in str(c).lower() for k in keywords): | |
| cols.append(c) | |
| # 若沒關鍵字命中,則用 fallback:所有數值欄位 | |
| if not cols: | |
| cols = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])] | |
| # 避免太多,最多只選4個 | |
| if len(cols) > 4: | |
| cols = cols[-4:] | |
| return gr.update(choices=cols, value=cols[:2]) | |
| except Exception: | |
| return gr.update(choices=[], value=[]) | |
| # [3D/2D分布、反應面、等高線視覺化] | |
| # = 3D散點圖 + 預測曲面圖(隨機森林擬合)= | |
| def plot_3d_scatter_surface( | |
| file, | |
| x_col, y_col, z_col, target_col, | |
| surface_fit=False, # 是否繪製曲面 | |
| n_grid=40 # 曲面分辨率 | |
| ): | |
| # 預設空圖 | |
| empty_fig = go.Figure() | |
| empty_fig.update_layout(template="plotly_white") | |
| if file is None or not x_col or not y_col or not z_col or not target_col: | |
| return empty_fig, empty_fig | |
| try: | |
| df = pd.read_csv(file.name if hasattr(file, "name") else file) | |
| # 防呆 | |
| if not all([c in df.columns for c in [x_col, y_col, z_col, target_col]]): | |
| return empty_fig, empty_fig | |
| # 三維散點圖 | |
| fig_scatter = px.scatter_3d( | |
| df, x=x_col, y=y_col, z=z_col, color=target_col, | |
| title=f"3D Scatter: {x_col}, {y_col}, {z_col} / Color={target_col}", | |
| opacity=0.85 | |
| ) | |
| # 三維曲面圖 | |
| fig_surface = empty_fig | |
| if surface_fit: | |
| # 只取指定欄位 | |
| X = df[[x_col, y_col, z_col]].values | |
| y = df[target_col].values | |
| # fit一個隨機森林 | |
| model = RandomForestRegressor(n_estimators=80, random_state=0) | |
| model.fit(X, y) | |
| # 建立網格 | |
| x_lin = np.linspace(df[x_col].min(), df[x_col].max(), n_grid) | |
| y_lin = np.linspace(df[y_col].min(), df[y_col].max(), n_grid) | |
| z_lin = np.linspace(df[z_col].min(), df[z_col].max(), n_grid) | |
| xx, yy, zz = np.meshgrid(x_lin, y_lin, z_lin) | |
| grid_points = np.c_[xx.ravel(), yy.ravel(), zz.ravel()] | |
| yy_pred = model.predict(grid_points) | |
| # 用scatter3d畫預測點 | |
| fig_surface = go.Figure(data=[ | |
| go.Scatter3d( | |
| x=grid_points[:,0], y=grid_points[:,1], z=grid_points[:,2], | |
| mode='markers', | |
| marker=dict(size=2.2, color=yy_pred, colorscale='Viridis', opacity=0.35), | |
| name="預測表面" | |
| ) | |
| ]) | |
| fig_surface.update_layout( | |
| title=f"3D Predicted Surface: {target_col} vs {x_col}, {y_col}, {z_col}", | |
| scene=dict( | |
| xaxis_title=x_col, yaxis_title=y_col, zaxis_title=z_col | |
| ) | |
| ) | |
| return fig_scatter, fig_surface | |
| except Exception as e: | |
| # 除錯用途可log e,但前端只回傳空圖 | |
| return empty_fig, empty_fig | |
| # = 自動解析欄位,更新3D視覺化的欄位選單 = | |
| def update_dropdowns(file): | |
| print("收到 file:", file) | |
| if file is None: | |
| print("沒有收到檔案") | |
| return (["(請選擇)"], "(請選擇)"), (["(請選擇)"], "(請選擇)"), (["(請選擇)"], "(請選擇)"), (["(請選擇)"], "(請選擇)") | |
| try: | |
| path = file.name if hasattr(file, "name") else file | |
| print("預計讀取路徑:", path) | |
| print("檔案存在?", os.path.exists(path)) | |
| df = pd.read_csv(path) | |
| cols = list(df.select_dtypes(include="number").columns) | |
| if not cols: | |
| cols = list(df.columns) | |
| x_def = cols[0] if len(cols) > 0 else "(請選擇)" | |
| y_def = cols[1] if len(cols) > 1 else x_def | |
| z_def = cols[2] if len(cols) > 2 else x_def | |
| t_def = cols[-1] if len(cols) > 0 else x_def | |
| print("預設值:", x_def, y_def, z_def, t_def) | |
| return (cols, x_def), (cols, y_def), (cols, z_def), (cols, t_def) | |
| except Exception as e: | |
| print("讀檔失敗:", e) | |
| return (["(請選擇)"], "(請選擇)"), (["(請選擇)"], "(請選擇)"), (["(請選擇)"], "(請選擇)"), (["(請選擇)"], "(請選擇)") | |
| # = 二變數反應面/等高線圖(3D Surface/Contour)= | |
| def plot_surface_and_contour(file, x_col, y_col, z_col, n_grid=40): | |
| empty_fig = go.Figure() | |
| empty_fig.update_layout(template="plotly_white") | |
| empty_fig2 = go.Figure() | |
| empty_fig2.update_layout(template="plotly_white") | |
| if file is None or not x_col or not y_col or not z_col: | |
| return empty_fig, empty_fig2 | |
| try: | |
| df = pd.read_csv(file.name if hasattr(file, "name") else file) | |
| if not all([c in df.columns for c in [x_col, y_col, z_col]]): | |
| return empty_fig, empty_fig2 | |
| x, y, z = df[x_col].values, df[y_col].values, df[z_col].values | |
| # 建立格點 | |
| xi = np.linspace(x.min(), x.max(), n_grid) | |
| yi = np.linspace(y.min(), y.max(), n_grid) | |
| xi, yi = np.meshgrid(xi, yi) | |
| zi = griddata((x, y), z, (xi, yi), method="cubic") | |
| # Surface 3D 曲面圖 | |
| fig_surface = go.Figure(data=[ | |
| go.Surface(x=xi, y=yi, z=zi, colorscale="Viridis", opacity=0.93, showscale=True) | |
| ]) | |
| fig_surface.update_layout( | |
| title=f"3D 曲面圖:{z_col} vs {x_col}, {y_col}", | |
| scene=dict( | |
| xaxis_title=x_col, | |
| yaxis_title=y_col, | |
| zaxis_title=z_col | |
| ), | |
| margin=dict(l=0, r=0, b=0, t=40) | |
| ) | |
| # Contour 等高線圖 | |
| fig_contour = go.Figure(data=[ | |
| go.Contour( | |
| x=xi[0], y=yi[:,0], z=zi, | |
| colorscale="Viridis", | |
| contours=dict(showlabels=True), | |
| colorbar=dict(title=z_col) | |
| ) | |
| ]) | |
| fig_contour.update_layout( | |
| title=f"等高線圖:{z_col} vs {x_col}, {y_col}", | |
| xaxis_title=x_col, | |
| yaxis_title=y_col, | |
| margin=dict(l=0, r=0, b=0, t=40) | |
| ) | |
| return fig_surface, fig_contour | |
| except Exception as e: | |
| print(f"3D surface/contour plot error: {e}") | |
| return empty_fig, empty_fig2 | |
| # [複合y批次多模型回歸/交互作用] | |
| # = 根據模型名稱產生多y回歸可用的模型物件 = | |
| def get_model(name): | |
| if name == "Random Forest": | |
| return MultiOutputRegressor(RandomForestRegressor(n_estimators=100, random_state=0)) | |
| elif name == "XGBoost": | |
| return MultiOutputRegressor(XGBRegressor(n_estimators=100, random_state=0)) | |
| elif name == "PLS Regression": | |
| return PLSRegression(n_components=2) | |
| elif name == "Ridge": | |
| return MultiOutputRegressor(Ridge()) | |
| elif name == "Lasso": | |
| return MultiOutputRegressor(Lasso()) | |
| elif name == "ElasticNet": | |
| return MultiOutputRegressor(ElasticNet()) | |
| elif name == "Linear Regression": | |
| return MultiOutputRegressor(LinearRegression()) | |
| elif name == "SVR": | |
| return MultiOutputRegressor(SVR()) | |
| else: | |
| raise ValueError(f"Unknown model: {name}") | |
| # = 根據關鍵字從資料中自動偵測多y欄位 = | |
| def detect_y_columns(file, keywords_str): | |
| import re | |
| if file is None: | |
| return gr.update(choices=[], value=[]) | |
| df = pd.read_csv(file.name if hasattr(file, 'name') else file) | |
| keywords = [kw.strip() for kw in keywords_str.split(",") if kw.strip()] | |
| patt = re.compile("|".join([re.escape(k) for k in keywords]), re.IGNORECASE) | |
| y_candidates = [c for c in df.columns if patt.search(str(c))] | |
| return gr.update(choices=list(df.columns), value=y_candidates) | |
| # = 多y/交互作用/多模型的批次回歸主程式 = | |
| def run_multi_y(before_file, after_file, linear, nonlinear, ylist, add_inter, add_y_inter, degree): | |
| df = pd.read_csv(before_file.name if hasattr(before_file, 'name') else before_file) | |
| if not ylist or not (linear or nonlinear): | |
| return "請選擇目標y欄位與模型", *[""]*4, *[None]*12 | |
| X = df.drop(columns=ylist) | |
| Y = df[ylist] | |
| X = X.select_dtypes(include=[np.number]) | |
| # 1. 特徵交互作用 | |
| if add_inter and int(degree) > 1: | |
| poly = PolynomialFeatures(degree=int(degree), interaction_only=True, include_bias=False) | |
| X_inter = pd.DataFrame(poly.fit_transform(X), columns=poly.get_feature_names_out(X.columns)) | |
| else: | |
| X_inter = X.copy() | |
| # 2. y 交互作用 | |
| if add_y_inter and len(ylist) > 1: | |
| for i in range(len(ylist)): | |
| for j in range(i+1, len(ylist)): | |
| Y[f"{ylist[i]}*{ylist[j]}"] = Y[ylist[i]] * Y[ylist[j]] | |
| X_train, X_test, Y_train, Y_test = train_test_split(X_inter, Y, test_size=0.2, random_state=42) | |
| # 3. 多模型分析 | |
| model_names = (linear or []) + (nonlinear or []) | |
| results, tab_results = [], [] | |
| for m in model_names: | |
| model = get_model(m) | |
| model.fit(X_train, Y_train) | |
| pred = model.predict(X_test) | |
| if isinstance(pred, np.ndarray): | |
| pred = pd.DataFrame(pred, columns=Y.columns) | |
| else: | |
| pred = pd.DataFrame(pred, columns=Y.columns) | |
| scores = {y: r2_score(Y_test[y], pred[y]) for y in Y.columns} | |
| rmses = {y: np.sqrt(mean_squared_error(Y_test[y], pred[y])) for y in Y.columns} | |
| model_summary = pd.DataFrame({ | |
| "Model": [m]*len(Y.columns), | |
| "y": list(Y.columns), | |
| "R2": [scores[y] for y in Y.columns], | |
| "RMSE": [rmses[y] for y in Y.columns] | |
| }) | |
| results.append(model_summary) | |
| for i, y in enumerate(Y.columns[:4]): | |
| # ==== Robust 特徵重要性自動判斷 ==== | |
| if hasattr(model, "estimators_"): | |
| est = model.estimators_[i] | |
| else: | |
| est = model | |
| # 樹模型 | |
| if hasattr(est, "feature_importances_"): | |
| importances = est.feature_importances_ | |
| # 線性模型 | |
| elif hasattr(est, "coef_"): | |
| # 注意 shape | |
| coef = est.coef_ | |
| if coef.ndim > 1: | |
| importances = np.abs(coef[i]) | |
| else: | |
| importances = np.abs(coef) | |
| # PLS (Partial Least Squares) | |
| elif hasattr(est, "x_weights_"): | |
| importances = np.abs(est.x_weights_[:, 0]) | |
| # 其餘模型 fallback | |
| else: | |
| importances = np.zeros(X_inter.shape[1]) | |
| feat_names = X_inter.columns | |
| # ====== 畫圖 ======= | |
| fig_feat = go.Figure([go.Bar(x=feat_names, y=importances)]) | |
| fig_feat.update_layout(title=f"{m} {y} Feature Importances", height=440) | |
| fig_dist = go.Figure() | |
| fig_dist.add_trace(go.Histogram(x=Y_test[y], name='True', opacity=0.7)) | |
| fig_dist.add_trace(go.Histogram(x=pred[y], name='Pred', opacity=0.7)) | |
| fig_dist.update_layout(barmode='overlay', title=f"{m} {y} True vs Pred Dist", height=440) | |
| tab_results.append((f"【{m}】y: {y}", model_summary, fig_feat, fig_dist)) | |
| # 結果組合 | |
| out_titles, out_tables, out_feats, out_ydists = [], [], [], [] | |
| N=8 | |
| for i in range(N): | |
| if i < len(tab_results): | |
| tab = tab_results[i] | |
| out_titles.append(tab[0]) | |
| out_tables.append(tab[1]) | |
| out_feats.append(tab[2]) | |
| out_ydists.append(tab[3]) | |
| else: | |
| out_titles.append("") | |
| out_tables.append(None) | |
| out_feats.append(None) | |
| out_ydists.append(None) | |
| summary = pd.concat(results, ignore_index=True) if results else "" | |
| return summary, *out_titles, *out_tables, *out_feats, *out_ydists | |
| # [自動function管理核心] | |
| import re | |
| def extract_tab_ui_and_function(pyfile="app.py"): | |
| """ | |
| 抓每個Tab所有UI元件,以及其是否綁定function/lambda/callback | |
| """ | |
| try: | |
| with open(pyfile, encoding="utf-8") as f: | |
| code = f.read() | |
| except Exception as e: | |
| return f"❌ 讀取 {pyfile} 失敗:{e}" | |
| # (1) 抓所有 def | |
| defs = set(re.findall(r"def\s+([a-zA-Z_][\w\d_]*)\s*\(", code)) | |
| # (2) 抓所有 Tab 區塊 | |
| tab_pattern = re.compile( | |
| r'with gr\.Tab(?:Item)?\(\s*[\'"](.+?)[\'"]\s*\):([\s\S]*?)(?=with gr\.Tab|with gr\.TabItem|\Z)', re.MULTILINE) | |
| # (3) UI元件建立(如 gr.Markdown、gr.Dataframe、gr.File、gr.Button...) | |
| ui_pattern = re.compile(r'(gr\.[A-Za-z_]+)\s*\(') | |
| # (4) 互動事件 | |
| event_pattern = re.compile( | |
| r'([a-zA-Z_][\w\d_]*)\.(click|change|submit|select)\(\s*([a-zA-Z_][\w\d_]*|lambda)(.*?)(\)|$)', re.DOTALL) | |
| output = "# 🧩 各Tab UI元件與function/lambda/callback mapping\n\n" | |
| for m in tab_pattern.finditer(code): | |
| tab_name, tab_code = m.group(1), m.group(2) | |
| output += f"## {tab_name}\n" | |
| # 1. 抓所有 UI元件名稱 | |
| ui_list = ui_pattern.findall(tab_code) | |
| # 統計有幾種元件 | |
| ui_count = {} | |
| for u in ui_list: | |
| ui_count[u] = ui_count.get(u, 0) + 1 | |
| if ui_list: | |
| output += "### 本Tab使用UI元件:\n" | |
| for u in sorted(set(ui_list)): | |
| output += f"- `{u}` x {ui_count[u]}\n" | |
| else: | |
| output += "- (本Tab沒有任何UI元件)\n" | |
| # 2. 掃描本Tab互動callback | |
| func_map = [] | |
| for ev in event_pattern.findall(tab_code): | |
| obj, trigger, fn, args, _ = ev | |
| if fn == "lambda": | |
| func_map.append(f"{obj}.{trigger} → lambda(匿名)") | |
| else: | |
| func_map.append(f"{obj}.{trigger} → {fn}()") | |
| # callback 參數 | |
| cb_matches = re.findall(r'(_callback|_js|_preprocess|_postprocess)\s*=\s*([a-zA-Z_][\w\d_]*|lambda[^\),]*)', args) | |
| for cb_type, cb_fn in cb_matches: | |
| if cb_fn.strip().startswith("lambda"): | |
| func_map.append(f"{obj}.{cb_type} → lambda") | |
| else: | |
| func_map.append(f"{obj}.{cb_type} → {cb_fn.strip()}()") | |
| if func_map: | |
| output += "\n### 有callback的元件/方法:\n" | |
| for item in func_map: | |
| output += f"- {item}\n" | |
| else: | |
| output += "\n- 本Tab所有UI皆為純靜態,無綁定function\n" | |
| output += "\n" | |
| return output | |
| def extract_tab_functions_with_lambda_and_callback(pyfile="app.py"): | |
| """ | |
| 同時抓取每個Tab的所有UI元件,與互動function/lambda/callback mapping。 | |
| """ | |
| try: | |
| with open(pyfile, encoding="utf-8") as f: | |
| code = f.read() | |
| except Exception as e: | |
| return f"❌ 讀取 {pyfile} 失敗:{e}" | |
| # (1) 抓所有 def | |
| defs = set(re.findall(r"def\s+([a-zA-Z_][\w\d_]*)\s*\(", code)) | |
| # (2) 抓所有 Tab 區塊 | |
| tab_pattern = re.compile( | |
| r'with gr\.Tab(?:Item)?\(\s*[\'"](.+?)[\'"]\s*\):([\s\S]*?)(?=with gr\.Tab|with gr\.TabItem|\Z)', re.MULTILINE) | |
| # (3) UI元件建立(如 gr.Markdown、gr.Dataframe、gr.File、gr.Button...) | |
| ui_pattern = re.compile(r'(gr\.[A-Za-z_]+)\s*\(') | |
| # (4) 互動事件 | |
| event_pattern = re.compile( | |
| r'([a-zA-Z_][\w\d_]*)\.(click|change|submit|select)\(\s*([a-zA-Z_][\w\d_]*|lambda)(.*?)(\)|$)', re.DOTALL) | |
| output = "# 🧩 各Tab UI元件與function/lambda/callback mapping\n\n" | |
| for m in tab_pattern.finditer(code): | |
| tab_name, tab_code = m.group(1), m.group(2) | |
| output += f"## {tab_name}\n" | |
| # 1. 抓所有 UI元件名稱 | |
| ui_list = ui_pattern.findall(tab_code) | |
| # 統計有幾種元件 | |
| ui_count = {} | |
| for u in ui_list: | |
| ui_count[u] = ui_count.get(u, 0) + 1 | |
| if ui_list: | |
| output += "### 本Tab使用UI元件:\n" | |
| for u in sorted(set(ui_list)): | |
| output += f"- `{u}` x {ui_count[u]}\n" | |
| else: | |
| output += "- (本Tab沒有任何UI元件)\n" | |
| # 2. 掃描本Tab互動callback | |
| func_map = [] | |
| for ev in event_pattern.findall(tab_code): | |
| obj, trigger, fn, args, _ = ev | |
| if fn == "lambda": | |
| func_map.append(f"{obj}.{trigger} → lambda(匿名)") | |
| else: | |
| func_map.append(f"{obj}.{trigger} → {fn}()") | |
| # callback 參數 | |
| cb_matches = re.findall(r'(_callback|_js|_preprocess|_postprocess)\s*=\s*([a-zA-Z_][\w\d_]*|lambda[^\),]*)', args) | |
| for cb_type, cb_fn in cb_matches: | |
| if cb_fn.strip().startswith("lambda"): | |
| func_map.append(f"{obj}.{cb_type} → lambda") | |
| else: | |
| func_map.append(f"{obj}.{cb_type} → {cb_fn.strip()}()") | |
| if func_map: | |
| output += "\n### 有callback的元件/方法:\n" | |
| for item in func_map: | |
| output += f"- {item}\n" | |
| else: | |
| output += "\n- 本Tab所有UI皆為純靜態,無綁定function\n" | |
| output += "\n" | |
| return output | |
| def extract_all_functions(pyfile="app.py"): | |
| import re | |
| try: | |
| with open(pyfile, encoding="utf-8") as f: | |
| code = f.read() | |
| except Exception as e: | |
| return f"❌ 讀取 {pyfile} 失敗:{e}" | |
| func_pattern = re.compile( | |
| r"^(def [a-zA-Z_][\w\d_]*\(.*?\):(?:\n(?: |\t).*)*)", | |
| re.MULTILINE) | |
| output = "## 📃 所有 function 定義\n" | |
| matches = func_pattern.findall(code) | |
| if not matches: | |
| return "❗ 沒有抓到任何 function (def)!" | |
| for func in matches: | |
| func_name = re.match(r"def ([a-zA-Z_][\w\d_]*)", func) | |
| output += f"---\n### `{func_name.group(1) if func_name else '?'}()`\n" | |
| func_lines = func.split("\n") | |
| if len(func_lines) > 10: | |
| output += "```python\n" + "\n".join(func_lines[:10]) + "\n... (略)\n```\n" | |
| else: | |
| output += "```python\n" + func + "\n```\n" | |
| return output | |
| # ======================== Gradio 多分頁主UI ======================== | |
| with gr.Blocks(css=custom_css) as demo: | |
| gr.Markdown("## <span style='color:#2675ff;font-weight:bold'> AI化實驗設計與數據分析平台 </span>") | |
| with gr.Tabs(): | |
| # 1️⃣ 標準DoE設計分布 | |
| with gr.Tab("1️⃣ 標準DoE "): | |
| gr.Markdown(""" | |
| ### 🧪 標準DoE設計分布 | |
| - 支援自動產生參數空間內的多種經典DoE設計法(LHS, Sobol, Halton, Uniform) | |
| - 可視覺化設計點分布、產生對應的設計參數表 | |
| - 支援下載CSV檔、表格內容可複製 | |
| **如何使用:** | |
| - 1️⃣ 填寫參數名稱、範圍及步進 | |
| - 2️⃣ 設定要產生的組數與亂數種子(可選) | |
| - 3️⃣ 點選「產生設計+分布圖」 | |
| - 4️⃣ 下方各分頁可檢視不同設計法的結果、分布圖與自動摘要 | |
| **注意事項:** | |
| - 所有參數名稱需唯一、不得重複 | |
| - 組數愈大,運算與繪圖所需時間會增加 | |
| - 請檢查參數範圍、步進格式是否正確 | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1, min_width=240): | |
| with gr.Accordion("參數設定", open=True): | |
| param_table = gr.Dataframe( | |
| headers=["名稱", "最小值", "最大值", "間隔(step)"], | |
| datatype=["str", "number", "number", "number"], | |
| row_count=(3, "dynamic"), | |
| col_count=(4, "fixed"), | |
| value=[["A", 10, 20, 2], ["B", 100, 200, 25], ["C", 1, 2, 0.5]], | |
| label="參數設定" | |
| ) | |
| n_samples = gr.Number(label="組數", value=8, precision=0) | |
| seed = gr.Number(label="亂數種子(留空或0為隨機)", value=42, precision=0) | |
| btn = gr.Button("🪄 產生設計與分布圖", elem_classes=["main-btn"]) | |
| with gr.Column(scale=2): | |
| with gr.Accordion("分布結果/圖表/摘要", open=True): | |
| tabs = [] | |
| for name in ["LHS", "Sobol", "Halton", "Uniform"]: | |
| with gr.Tab(name): | |
| df = gr.Dataframe(label=f"{name} 設計點表格") | |
| fig = gr.Plot(label=f"{name} 設計分布") | |
| csv = gr.File(label="下載CSV📥") | |
| desc = gr.Markdown() | |
| summary = gr.Markdown() | |
| tabs.extend([df, fig, csv, desc, summary]) | |
| btn.click(compare_all_designs, inputs=[param_table, n_samples, seed], outputs=tabs) | |
| # 2️⃣ 進階DoE(Box-Behnken/CCD) | |
| with gr.Tab("2️⃣ 進階DoE(Box-Behnken/CCD)"): | |
| gr.Markdown(""" | |
| ### 🧪 進階DoE (Box-Behnken/CCD) | |
| - 支援 Box-Behnken 與 中心組合設計 (CCD) 兩種進階DoE設計法 | |
| - 同步產生標準化設計矩陣與對應實際參數表 | |
| - 提供一鍵下載CSV,方便後續AI建模 | |
| **如何使用:** | |
| - 1️⃣ 設定各參數的最小、最大值及間隔 | |
| - 2️⃣ 選擇所需的設計法(Box-Behnken或CCD) | |
| - 3️⃣ 點「產生進階DoE設計」即自動產生全部設計點 | |
| **注意事項:** | |
| - 參數欄位請完整填寫,勿留空 | |
| - 各參數區間需合理,否則設計點數量可能異常 | |
| - 若需大規模設計點,運算會稍久,請耐心等待 | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1, min_width=240): | |
| with gr.Accordion("參數設定", open=True): | |
| param_table2 = gr.Dataframe( | |
| headers=["名稱", "最小值", "最大值", "間隔"], | |
| datatype=["str", "number", "number", "number"], | |
| row_count=(3, "dynamic"), | |
| col_count=(4, "fixed"), | |
| value=[["溫度", 80, 120, 10], ["壓力", 1, 5, 1], ["pH", 6, 8, 1]], | |
| label="參數設定" | |
| ) | |
| design_type = gr.Radio(["Box-Behnken", "CCD"], value="Box-Behnken", label="設計法") | |
| run_btn = gr.Button("🪄 產生進階DoE設計", elem_classes=["main-btn"]) | |
| with gr.Column(scale=2): | |
| with gr.Accordion("設計矩陣/參數表", open=True): | |
| out_std = gr.Dataframe(label="標準化設計矩陣") | |
| download_std = gr.File(label="下載標準矩陣CSV📥") | |
| out_real = gr.Dataframe(label="實際參數表") | |
| download_real = gr.File(label="下載參數表CSV📥") | |
| run_btn.click( | |
| advanced_doe_with_mapping, | |
| inputs=[param_table2, design_type], | |
| outputs=[out_std, out_real, download_std, download_real] | |
| ) | |
| # 3️⃣ AI建模/特徵重要性/SHAP | |
| with gr.Tab("3️⃣ AI建模/特徵重要性/SHAP"): | |
| gr.Markdown(""" | |
| ### 🧠 AI建模/特徵重要性/SHAP | |
| - 支援多種線性、非線性AI回歸模型,自動化訓練與模型評估 | |
| - 一鍵產生預測結果、模型效能指標、特徵重要性圖、SHAP全圖解釋 | |
| - 輕鬆檢視哪些參數對y預測最關鍵 | |
| **如何使用:** | |
| - 1️⃣ 上傳DoE結果CSV,選擇目標y欄位 | |
| - 2️⃣ 勾選需比較的AI模型(可多選) | |
| - 3️⃣ 可選擇是否顯示SHAP解釋圖 | |
| - 4️⃣ 點「一鍵訓練+特徵重要性」,即可檢視全部結果 | |
| **注意事項:** | |
| - 資料需為數值型且無遺漏值 | |
| - 目標y欄位不可有重複 | |
| - 資料量太小時,部分模型可能無法有效學習 | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1, min_width=320): | |
| with gr.Accordion("上傳/選模型", open=True): | |
| datafile = gr.File(label="上傳DoE結果CSV📤", file_types=[".csv"]) | |
| test_ratio = gr.Slider(label="測試集比例", value=0.3, minimum=0.1, maximum=0.5, step=0.05) | |
| algo_linear = gr.CheckboxGroup( | |
| ["Linear Regression", "Lasso", "Ridge", "ElasticNet"], | |
| value=[], label="線性回歸" | |
| ) | |
| algo_nonlinear = gr.CheckboxGroup( | |
| ["Random Forest", "XGBoost", "LightGBM", "SVR"], | |
| value=["Random Forest"], label="非線性回歸" | |
| ) | |
| show_shap = gr.Checkbox(label="進階SHAP解釋", value=False) | |
| btn_ai = gr.Button("🚀 一鍵訓練", elem_classes=["main-btn"]) | |
| with gr.Column(scale=2): | |
| with gr.Accordion("預測/重要性圖", open=True): | |
| predfig = gr.Plot(label="📊 預測/實際對比圖") | |
| met_df = gr.Dataframe(label="模型效能指標", datatype="auto") | |
| summary = gr.Markdown(visible=True) | |
| feat_summary = gr.Markdown(visible=True) | |
| feat_fig = gr.Plot(label="特徵重要性") | |
| shap_img = gr.Image(label="SHAP解釋圖") | |
| btn_ai.click( | |
| lambda file, lin, nonlin, ratio, shap_flag: | |
| train_and_predict_with_importance( | |
| file, (lin or []) + (nonlin or []), ratio, True, shap_flag | |
| ), | |
| inputs=[datafile, algo_linear, algo_nonlinear, test_ratio, show_shap], | |
| outputs=[predfig, met_df, summary, feat_summary, feat_fig, shap_img] | |
| ) | |
| # 4️⃣ 多圖資料視覺化+2D/3D/等高線 | |
| with gr.Tab("4️⃣ 數值資料視覺化處理"): | |
| gr.Markdown(""" | |
| ### 📊 多圖資料視覺化 + 2D/3D/等高線分析 | |
| - 多種常用資料視覺化工具(熱圖、pairplot、直方圖、PCA等) | |
| - 支援三維散點、曲面、2D等高線等專業圖形 | |
| - 可自選圖形、快速比較變數分布 | |
| **如何使用:** | |
| - 1️⃣ 上傳資料CSV,選擇要產生的視覺化圖種類 | |
| - 2️⃣ 點「產生多圖分析」可一次顯示多種圖表 | |
| - 3️⃣ 三維分布:指定x、y、z軸變數(或目標欄位),生成3D散點/曲面圖 | |
| - 4️⃣ 2D/3D反應面:輸入要分析的變數組合,產生等高線/曲面圖 | |
| **注意事項:** | |
| - 欄位名稱需為英文/數字,不支援特殊字元 | |
| - 缺值過多資料會自動忽略 | |
| - 若圖形異常請檢查欄位型態及範圍 | |
| """) | |
| # --- (1) 多圖區塊 | |
| with gr.Row(): | |
| with gr.Column(scale=1, min_width=230): | |
| upfile2 = gr.File(label="上傳資料CSV📤") | |
| plot_select = gr.CheckboxGroup( | |
| ["Heatmap", "Pairplot", "Histogram", "Scatter Matrix", "PCA"], | |
| value=["Heatmap", "Pairplot", "Histogram", "PCA"], label="視覺化圖" | |
| ) | |
| vizbtn = gr.Button("📊 產生多圖分析", elem_classes=["main-btn"]) | |
| with gr.Column(scale=3): | |
| vis_outs = [] | |
| for i in range(5): | |
| vis_outs.extend([gr.Plot(label=f"圖像{i+1}"), gr.Markdown(), gr.Markdown()]) | |
| recomm_card = gr.Markdown(visible=True, value="", elem_id="recommend-card") | |
| vizbtn.click( | |
| lambda f, t: (*multi_viz(f, t), auto_recommendation(f)), | |
| inputs=[upfile2, plot_select], | |
| outputs=vis_outs + [recomm_card] | |
| ) | |
| # --- (2) 3D變數分布/曲面 | |
| gr.Markdown("#### 🧬 三維分析:3D變數分布/曲面圖") | |
| with gr.Row(): | |
| with gr.Column(scale=1, min_width=260): | |
| columns_md = gr.Markdown(label="資料欄位", value="請先上傳資料CSV,欄位將自動顯示") | |
| x_col = gr.Textbox(label="X軸欄位", placeholder="如 A", interactive=True) | |
| y_col = gr.Textbox(label="Y軸欄位", placeholder="如 B", interactive=True) | |
| z_col = gr.Textbox(label="Z軸欄位", placeholder="如 C", interactive=True) | |
| target_col = gr.Textbox(label="目標Y/顏色", placeholder="如 y", interactive=True) | |
| surface_flag = gr.Checkbox(label="顯示三維曲面", value=False) | |
| plot_btn = gr.Button("🧊 生成3D散點/曲面圖", elem_classes=["main-btn"]) | |
| with gr.Column(scale=3): | |
| fig_scatter_out = gr.Plot(label="3D散點圖") | |
| fig_surface_out = gr.Plot(label="3D預測曲面圖") | |
| upfile2.change( | |
| lambda file: f"資料欄位:A, B, C, y" if file else "請先上傳資料", | |
| inputs=[upfile2], | |
| outputs=[columns_md] | |
| ) | |
| plot_btn.click( | |
| plot_3d_scatter_surface, | |
| inputs=[upfile2, x_col, y_col, z_col, target_col, surface_flag], | |
| outputs=[fig_scatter_out, fig_surface_out] | |
| ) | |
| # --- (3) 二變數 3D 反應面/等高線 | |
| gr.Markdown("#### 🧬 二變數 3D 反應面/等高線圖") | |
| with gr.Row(): | |
| with gr.Column(scale=1, min_width=260): | |
| columns_md2 = gr.Markdown(label="資料欄位", value="請先上傳資料CSV,欄位將自動顯示") | |
| x_col2 = gr.Textbox(label="X軸欄位", placeholder="如 A", interactive=True) | |
| y_col2 = gr.Textbox(label="Y軸欄位", placeholder="如 B", interactive=True) | |
| z_col2 = gr.Textbox(label="目標Z(反應/產率/預測)", placeholder="如 y", interactive=True) | |
| surface2_btn = gr.Button("🧊 生成3D曲面+等高線圖", elem_classes=["main-btn"]) | |
| with gr.Column(scale=3): | |
| fig_surface2 = gr.Plot(label="3D曲面圖") | |
| fig_contour2 = gr.Plot(label="等高線圖") | |
| upfile2.change( | |
| lambda file: f"資料欄位:A, B, y" if file else "請先上傳資料", | |
| inputs=[upfile2], | |
| outputs=[columns_md2] | |
| ) | |
| surface2_btn.click( | |
| plot_surface_and_contour, | |
| inputs=[upfile2, x_col2, y_col2, z_col2], | |
| outputs=[fig_surface2, fig_contour2] | |
| ) | |
| # 5️⃣ 超參數/貝葉斯優化 | |
| with gr.Tab("5️⃣ 超參數/貝葉斯優化"): | |
| gr.Markdown(""" | |
| ### 🏆 超參數/貝葉斯優化 | |
| - 自動執行各類AI模型的超參數最佳化(貝葉斯法) | |
| - 即時繪出優化歷程,提供最佳參數組合與效能摘要 | |
| **如何使用:** | |
| - 1️⃣ 上傳DoE實驗結果CSV | |
| - 2️⃣ 勾選要優化的模型(可複選),設定最大迭代次數 | |
| - 3️⃣ 點「執行Bayes超參數優化」,自動開始優化並顯示所有歷程 | |
| **注意事項:** | |
| - 請確認資料充足且欄位型態正確 | |
| - 較複雜模型/高維空間下,優化需較多時間 | |
| - 迭代次數過少時,最佳值可能不穩定 | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1, min_width=230): | |
| with gr.Accordion("上傳/模型選擇", open=True): | |
| upfile3 = gr.File(label="📤 上傳DoE結果CSV", file_types=[".csv"]) | |
| model_sel = gr.CheckboxGroup( | |
| ["Random Forest", "XGBoost", "LightGBM", "SVR"], value=["XGBoost"], label="模型選擇(可複選)" | |
| ) | |
| n_iter = gr.Number(label="最大迭代次數", value=16, precision=0) | |
| bayes_btn = gr.Button("🚀 執行Bayes超參數優化", elem_classes=["main-btn"]) | |
| with gr.Column(scale=2): | |
| with gr.Accordion("優化歷程/結果", open=True): | |
| multi_fig = gr.Plot(label="所有模型Bayes優化歷程 (CV RMSE)") | |
| tab_figs = [] | |
| for mtype in ["Random Forest", "XGBoost", "LightGBM", "SVR"]: | |
| with gr.Tab(mtype): | |
| fig = gr.Plot(label=f"{mtype} 優化歷程") | |
| summary = gr.Markdown() | |
| best_param = gr.Markdown() | |
| tab_figs.extend([fig, summary, best_param]) | |
| bayes_btn.click( | |
| run_multi_bayes_optimization, | |
| inputs=[upfile3, model_sel, n_iter], | |
| outputs=[multi_fig] + tab_figs | |
| ) | |
| # 6️⃣ 智能推薦/混合策略/合併回填 | |
| with gr.Tab("6️⃣ 智能推薦/混合策略/合併回填"): | |
| gr.Markdown(""" | |
| ### 🌟 智能推薦/混合策略/合併回填 | |
| - AI自動推薦新實驗點、混合策略智能選點 | |
| - 一鍵搜尋最佳組合、合併新舊DoE資料 | |
| **如何使用:** | |
| - 1️⃣ 上傳現有DoE資料,選擇推薦模式與模型 | |
| - 2️⃣ 指定推薦點數與是否排除重複 | |
| - 3️⃣ 點「產生推薦點組合」可直接下載推薦點 | |
| - 4️⃣ 新舊資料合併:上傳原始與新實驗CSV,自動合併去重 | |
| **注意事項:** | |
| - 請確認欄位名稱一致、資料格式正確 | |
| - 合併時將以欄位名為主,自動排除重複點 | |
| - 推薦模式可同時多選,增加實驗多樣性 | |
| """) | |
| # --- (1) 多模型最佳化搜尋 | |
| with gr.Accordion("多模型最佳化搜尋", open=True): | |
| with gr.Row(): | |
| with gr.Column(scale=1, min_width=280): | |
| opt_file = gr.File(label="📤 上傳DoE資料(CSV)", file_types=[".csv"]) | |
| opt_model_sel = gr.CheckboxGroup( | |
| ["Random Forest", "XGBoost", "LightGBM", "SVR"], | |
| value=["Random Forest", "XGBoost"], label="最佳化用模型" | |
| ) | |
| direction = gr.Radio(["最大化", "最小化"], value="最大化", label="目標") | |
| is_discrete = gr.Checkbox(label="全部參數視為離散", value=False) | |
| n_iter2 = gr.Number(label="搜尋迭代次數", value=28, precision=0) | |
| btn_opt = gr.Button("🏆 搜尋AI預測最佳條件", elem_classes=["main-btn"]) | |
| with gr.Column(scale=2): | |
| opt_df = gr.Dataframe(label="最佳參數組合", datatype="auto") | |
| opt_txt = gr.Markdown() | |
| opt_desc = gr.Markdown() | |
| opt_sum = gr.Markdown() | |
| btn_opt.click( | |
| optimize_conditions, | |
| inputs=[opt_file, opt_model_sel, direction, is_discrete, n_iter2], | |
| outputs=[opt_df, opt_txt, opt_desc, opt_sum] | |
| ) | |
| # --- (2) 新點推薦 | |
| with gr.Accordion("新點推薦", open=False): | |
| with gr.Row(): | |
| with gr.Column(scale=1, min_width=280): | |
| rec_file = gr.File(label="📤 請上傳DoE資料(CSV)", file_types=[".csv"]) | |
| recommend_mode = gr.CheckboxGroup( | |
| ["探索型推薦", "混合策略推薦"], | |
| value=["探索型推薦"], label="推薦模式(可複選)" | |
| ) | |
| recommend_models = gr.CheckboxGroup( | |
| ["Random Forest", "XGBoost", "LightGBM", "SVR"], | |
| value=["Random Forest", "XGBoost"], label="模型選擇" | |
| ) | |
| recommend_n = gr.Number(label="推薦點數", value=4, precision=0) | |
| recommend_exclude = gr.Checkbox(label="排除現有點", value=True) | |
| recommend_btn = gr.Button("🎯 產生推薦點組合", elem_classes=["main-btn"]) | |
| with gr.Column(scale=2): | |
| recommend_out = gr.Markdown(label="推薦結果", value="") | |
| recommend_download_file = gr.File(label="📥 下載推薦點(回填y用)", interactive=False) | |
| recommend_btn.click( | |
| make_recommended_points, | |
| inputs=[rec_file, recommend_models, recommend_mode, recommend_n, recommend_exclude], | |
| outputs=[recommend_out, recommend_download_file] | |
| ) | |
| # --- (3) 合併回填 | |
| with gr.Accordion("合併回填資料", open=False): | |
| with gr.Row(): | |
| with gr.Column(scale=1, min_width=320): | |
| base_csv = gr.File(label="原始DoE資料(CSV)") | |
| new_csv = gr.File(label="新實驗資料(推薦點CSV)") | |
| merge_btn = gr.Button("🧩 自動合併/去重", elem_classes=["main-btn"]) | |
| merge_out = gr.File(label="📥 下載合併後資料") | |
| merge_btn.click( | |
| merge_csvs, | |
| inputs=[base_csv, new_csv], | |
| outputs=merge_out | |
| ) | |
| # AI模型回歸分析(多目標批次,支援交互作用/多模型/多y) | |
| with gr.Tab("7️⃣ AI模型回歸分析"): | |
| gr.Markdown(""" | |
| ### 🧠 AI模型回歸分析(多目標/多模型/交互作用) | |
| - 批次執行多種AI模型,支援多y、多特徵交互作用 | |
| - 各y可獨立檢視效能指標、重要性對比與預測分布 | |
| - 自動比較前後回填資料對AI效能之提升/變化 | |
| **如何使用:** | |
| - 1️⃣ 上傳原始DoE資料(CSV)及合併新點(CSV,可選) | |
| - 2️⃣ 選擇目標y欄位,可自動偵測或自行調整 | |
| - 3️⃣ 勾選所需AI模型、設定特徵交互作用階數 | |
| - 4️⃣ 點「批次回歸分析」,下方各分頁顯示每y結果 | |
| **注意事項:** | |
| - 交互作用階數設定愈高,特徵數量愈多,模型訓練愈慢 | |
| - 目標欄位過多,僅顯示前8個y的詳細結果 | |
| - 回填資料須與原始資料欄位一致 | |
| """) | |
| before_file = gr.File(label="原始DoE資料(CSV)") | |
| after_file = gr.File(label="🧩 合併新點DoE(CSV)") | |
| algo_linear = gr.CheckboxGroup( | |
| ["Linear Regression", "Lasso", "Ridge", "ElasticNet"], | |
| value=[], label="線性回歸" | |
| ) | |
| algo_nonlinear = gr.CheckboxGroup( | |
| ["Random Forest", "XGBoost", "PLS Regression", "SVR"], | |
| value=["Random Forest"], label="非線性回歸" | |
| ) | |
| # Degree控制 | |
| degree_select = gr.Dropdown([1, 2, 3], value=1, label="特徵交互作用階數 (degree)") | |
| add_inter = gr.Checkbox(label="特徵間交互作用 (x1*x2)", value=True) | |
| add_y_inter = gr.Checkbox(label="y間交互作用 (y1*y2)", value=False) | |
| y_keywords = gr.Textbox(label="目標欄位關鍵字 (逗號分隔)", value="y,目標,output,target") | |
| y_columns = gr.CheckboxGroup(label="目標y欄位 (可複選)", choices=[], value=[]) | |
| before_file.change( | |
| detect_y_columns, | |
| inputs=[before_file, y_keywords], | |
| outputs=y_columns | |
| ) | |
| y_keywords.change( | |
| detect_y_columns, | |
| inputs=[before_file, y_keywords], | |
| outputs=y_columns | |
| ) | |
| run_btn = gr.Button("🚀 批次回歸分析", elem_classes=["main-btn"]) | |
| summary_md = gr.Dataframe(label="所有模型-y效能總表") | |
| y_titles, y_tables, y_feats, y_ydists = [], [], [], [] | |
| with gr.Tabs() as tabs_container: | |
| for idx in range(8): | |
| with gr.TabItem(f"Tab{idx+1}"): | |
| y_title = gr.Markdown(value="") | |
| y_table = gr.Dataframe(label="模型效能比較表") | |
| with gr.Row(): | |
| y_feat = gr.Plot(label="特徵重要性對比圖") | |
| y_ydist = gr.Plot(label="y 分布對比圖") | |
| y_titles.append(y_title) | |
| y_tables.append(y_table) | |
| y_feats.append(y_feat) | |
| y_ydists.append(y_ydist) | |
| run_btn.click( | |
| run_multi_y, | |
| inputs=[before_file, after_file, algo_linear, algo_nonlinear, y_columns, add_inter, add_y_inter, degree_select], | |
| outputs=[summary_md, *y_titles, *y_tables, *y_feats, *y_ydists] | |
| ) | |
| # 平台說明/索引 | |
| with gr.Tab("⚙️平台說明與索引"): | |
| gr.Markdown(""" | |
| ## 🧭 功能說明 & 導航指南 | |
| 本平台整合「自動實驗設計(DoE)」、「AI建模」、「資料視覺化」、「超參數優化」、「智能推薦」等模組,專為化學/材料/製程等工程應用打造,協助您**從設計點產生、數據分析到模型推薦,全流程自動化**! | |
| --- | |
| ### 🧰 主要功能分頁 | |
| - **1️⃣ 標準DoE設計分布** | |
| - 產生經典設計法(LHS、Sobol等)的多維參數設計點,便於建立模型訓練用基礎資料。 | |
| - 直觀展示每種設計點分布、支援結果下載。 | |
| - **2️⃣ 進階DoE(Box-Behnken/CCD)** | |
| - 支援正交型、中心組合等進階設計法,方便進行曲面反應分析(RSM)。 | |
| - 產生標準化設計矩陣、對應實際參數表。 | |
| - **3️⃣ AI建模/特徵重要性/SHAP** | |
| - 一鍵啟動多模型AI訓練、交叉驗證、特徵重要性排序、SHAP解釋。 | |
| - 適用於尋找關鍵變數與預測能力評估。 | |
| - **4️⃣ 多圖資料視覺化 + 2D/3D/等高線** | |
| - 提供各類視覺化工具(熱圖、pairplot、PCA、3D曲面/等高線)協助多角度理解數據分布。 | |
| - 支援高維資料降維與多種圖表疊合分析。 | |
| - **5️⃣ 超參數/貝葉斯優化** | |
| - 針對各種AI回歸模型自動進行超參數優化(如Random Forest、XGBoost等),即時檢視優化歷程與最佳參數。 | |
| - **6️⃣ 智能推薦/混合策略/合併回填** | |
| - 結合AI預測與探索性搜尋,自動推薦新實驗條件,並支援資料自動合併與去重。 | |
| - 適合推進次輪實驗設計及自動補齊數據。 | |
| - **7️⃣ AI模型回歸分析(多目標/多模型/交互作用)** | |
| - 支援多y欄位、多模型批次建模、特徵交互作用分析,詳細呈現各y的訓練/預測效能。 | |
| --- | |
| ### 📝 操作建議與常見注意事項 | |
| - **所有欄位均可直接點擊或複製表格內容,並可一鍵下載分析結果** | |
| - **CSV資料須為純數值型(中文欄位會自動支援,但建議用英文/數字命名)** | |
| - **回歸與建模功能建議資料組數大於10,避免過度擬合或模型效能不穩定** | |
| - **每個Tab下方皆有詳細分頁說明、注意事項,建議操作前先閱讀上方說明** | |
| --- | |
| ### 🏠 應用情境 | |
| - 多參數製程最佳化 | |
| - 原型實驗規劃(探索型/補點/混合設計) | |
| - 關鍵因子敏感度分析 | |
| - 自動推薦新實驗組合 | |
| - AI輔助反應機制推論與模型精度提升 | |
| --- | |
| **本平台持續優化,歡迎多加利用!** | |
| """) | |
| with gr.Tab("🧩 Function管理"): | |
| gr.Markdown("#### 自動偵測各分頁 UI 綁定 function / lambda / callback") | |
| mapping = extract_tab_functions_with_lambda_and_callback("app.py") | |
| gr.Markdown(value=mapping) | |
| gr.Markdown("#### 本程式所有 function 定義 (摘要)") | |
| gr.Markdown(value=extract_all_functions("app.py")) | |
| gr.Markdown("<div id='footer'> 本平台由T100團隊設計,歡迎交流建議 │ 2025_</div>") | |
| demo.launch() | |