# Requirements (pip) # gradio>=4.17.0 # pandas>=2.0.0 # numpy>=1.24.0 # matplotlib # scikit-learn>=1.2.0 # pyDOE2 # Pillow # xgboost>=2.0.0 # lightgbm>=4.0.0 # seaborn # scipy>=1.10.0 # plotly>=5.16.0 # scikit-optimize>=0.9.0 # optuna (不需要) # tpot>=0.12 (目前沒用到) # shap # tabulate # import blocks # [Gradio 基本UI] import gradio as gr # [數據處理/科學計算] import numpy as np import pandas as pd # [DoE設計/取樣法] from pyDOE2 import lhs, bbdesign, ccdesign from scipy.stats.qmc import Sobol, Halton # [AI/ML建模] from sklearn.ensemble import RandomForestRegressor from sklearn.svm import SVR from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet import xgboost as xgb import lightgbm as lgb from xgboost import XGBRegressor from lightgbm import LGBMRegressor # [模型訓練/評估/特徵選擇] from sklearn.decomposition import PCA from sklearn.model_selection import train_test_split, cross_val_score from sklearn.metrics import mean_squared_error, r2_score from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.multioutput import MultiOutputRegressor from sklearn.cross_decomposition import PLSRegression from sklearn.preprocessing import PolynomialFeatures # [可視化/統計分析] import plotly.express as px import plotly.graph_objects as go import matplotlib.pyplot as plt from scipy.stats import skew, kurtosis, shapiro from scipy.interpolate import griddata import shap # [雜項工具/暫存檔] import tempfile import warnings warnings.filterwarnings("ignore") import io # [貝葉斯/自動化優化] from skopt import BayesSearchCV from skopt.space import Real, Integer, Categorical import optuna # ← 若要支援optuna,這裡一定要加 import optuna.visualization.matplotlib from scipy.optimize import minimize # [互動頁面樣式] custom_css = """ /* 主卡片 */ /* 用於主要內容區塊(如自動摘要卡片、資訊框)美化,圓角+陰影 */ .gr-card { background: #f7fafd; border-radius: 18px; box-shadow: 0 2px 8px #0002; } /* 按鈕樣式 */ /* 主要操作按鈕樣式:漸層、圓角、陰影、字體放大,滑鼠懸停有亮度與色調變化 */ .main-btn { font-size: 1.14em; padding: 10px 28px 11px 28px; border-radius: 999px; margin: 14px 0 8px 0; background: linear-gradient(90deg, #5b8cff, #76e7ff 70%); color: #fff; font-weight: 600; box-shadow: 0 2px 12px #3976d855; border: none; transition: .2s; } .main-btn:hover { filter: brightness(1.08); box-shadow: 0 2px 18px #5b8cff33; background: linear-gradient(90deg, #2259c9 55%, #4fa7d9); } /* 全平台字體 */ /* 全域字型套用 Noto Sans JP(優先日系風格),備用 Segoe UI 和 Arial,維持專案一致性 */ .gradio-container { font-family: 'Noto Sans JP', 'Segoe UI', Arial, sans-serif; } /* 區塊標題 */ /* 區塊或章節標題顯眼化,藍色、字重加粗 */ .section-title { font-size: 1.26em; font-weight: 700; color: #3976d8; margin-bottom: 6px; } /* Tab & Row 區塊間距 */ /* Tab、列區塊加上下間距,避免UI太擁擠 */ .tab-pane, .gr-row { padding: 8px 0; } /* 推薦小卡 */ /* 固定右下角推薦訊息小卡,用於快速提醒或建議,帶有柔和背景、陰影 */ #recommend-card { position: fixed; right: 28px; bottom: 28px; max-width: 360px; background: #f8fbffde; border-radius: 13px; box-shadow: 0 2px 14px #5ab2fa29; border-left: 5px solid #88a3e6; padding: 12px 18px 10px 14px; font-size: 1.07em; z-index: 9999; color: #285078; } /* 分隔線 */ /* 水平線設計:加厚、色彩淡藍灰,分隔內容用 */ hr { border: 0; border-bottom: 2.5px solid #e4e8f0; margin: 28px 0 22px 0; } /* Accordion 動畫 */ /* 摺疊區塊 summary 動畫與配色,展開時加上陰影強調 */ .accordion > summary { transition: .25s; background: #f4f8ff; } .accordion[open] > summary { background: #cfeaff; } .accordion[open] { box-shadow: 0 8px 24px #1ca7ec25; } /* Tab高亮 */ /* 當前選中 Tab 標籤高亮顯示,底色、字色、粗體 */ div[role="tab"][aria-selected="true"] { background: #e3f1ff !important; font-weight: bold; color: #2675ff; } /* 頁腳 */ /* Footer字體顏色與大小(淡灰、較小字) */ #footer { color: #888; font-size: 0.98em; } """ # [UI 樣式工具/自動摘要卡片] # = 產生HTML卡片說明 = def make_card(title, tips): return f"""
{title}
""" # = 根據圖形類型產生分圖說明卡片 = def get_viz_desc(name): idx = { "DoE Main": ("DoE設計分布圖", [ "檢查設計點分布是否均勻覆蓋整個空間", "若集中/離散,代表參數區間或設計法可優化", "高維會以降維(PCA)檢視主變異結構" ]), "Heatmap": ("Heatmap(相關係數)", [ "檢查所有數值變數的正負相關", "紅色:強正相關;藍色:強負相關", "相關係數>0.7為高度相關,< -0.7為強負相關" ]), "Pairplot": ("Pairplot(成對散點)", [ "展示任兩變數間的散點型態", "斜線型=高相關,圓形=低相關", "可發現集群、離群或特定結構" ]), "Histogram": ("Histogram(直方圖)", [ "單變數分布形態檢查", "偏態、長尾、極端值需注意", "單峰/多峰、常態/非對稱可判斷資料型態" ]), "Scatter Matrix": ("Scatter Matrix(全變數關聯)", [ "類似Pairplot但一次顯示所有成對分布", "對角線顯示每欄分布直方圖", "可發現明顯群集、離群" ]), "PCA": ("PCA(主成分分布)", [ "多維特徵壓縮到2D", "檢查主變異來源、潛在群集", "可輔助檢查是否有明顯離群" ]), "AI Predict": ("AI預測對比圖", [ "預測y與實際y對比,點貼近對角線代表高精度", "偏離對角線代表模型誤差大,建議優化特徵或模型" ]), "Bayesian Optimization": ("貝葉斯優化", [ "自動搜尋最佳參數組合,減少無效試驗", "可用於AI模型超參數、或實驗設計優化", "優化歷程圖:看最佳值逐步收斂" ]) } return make_card(*idx.get(name, ("", []))) # = 根據視覺化類型自動產生總結 = def auto_conclude_viz(df, vtype): out = [] if vtype == "Heatmap": cor = df.corr() highcorr = cor.where(np.triu(np.ones(cor.shape), 1).astype(bool)).stack() hc = highcorr[abs(highcorr) > 0.7] if not hc.empty: for idx, v in hc.items(): out.append(f"「{idx[0]}」與「{idx[1]}」高度相關 (corr={v:.2f})") else: out.append("無明顯高度相關特徵") elif vtype == "Histogram": for col in df.select_dtypes(include=np.number).columns: sk = skew(df[col].dropna()) if abs(sk) > 2: out.append(f"「{col}」極端偏態({sk:.2f})") if not out: out.append("欄位分布大致對稱") elif vtype == "PCA": pca = PCA(n_components=2).fit(df.values) expvar = pca.explained_variance_ratio_.sum() out.append(f"前2主成分共解釋 {expvar*100:.1f}% 變異") elif vtype == "Pairplot": out.append("請留意有無線性排列(高相關)或明顯群集/異常點") elif vtype == "Scatter Matrix": out.append("集群或離群點可從圖中直接辨識") else: out.append("資料檢查完成") return make_card("AI自動結論", out) # = 產生AI回歸自動總結卡片 = def auto_conclude_ai(y_test, y_pred, name): rmse = np.sqrt(mean_squared_error(y_test, y_pred)) r2 = r2_score(y_test, y_pred) out = [ f"模型:{name}", f"測試集 RMSE={rmse:.3g}", f"R²={r2:.2f}", ("模型表現佳" if r2 > 0.8 else "可進一步優化特徵/資料量") ] return make_card("AI自動結論", out) # = 貝葉斯優化RMSE曲線自動解讀摘要 = def auto_conclude_bayes_curve(rmse_curve, model_name=None): import numpy as np rmse_curve = np.array(rmse_curve) minv = np.min(rmse_curve) lastv = rmse_curve[-1] diff = np.ptp(rmse_curve) std = np.std(rmse_curve) trend = "平穩" if np.allclose(rmse_curve, rmse_curve[0], atol=0.2*std): trend = "幾乎無變化" elif rmse_curve[0] > minv and lastv > rmse_curve[0] and lastv > minv + std: trend = "尾端上升" elif np.argmin(rmse_curve) < len(rmse_curve)//2: trend = "快速下降收斂" elif std > 0.2 * minv and diff > 0.3 * minv: trend = "波動起伏" if trend == "快速下降收斂": comment = "RMSE 隨迭代明顯下降,代表最佳化收斂,已找到較佳參數組合。" elif trend == "幾乎無變化": comment = "RMSE 變動極小,代表模型/資料難以藉由超參數優化提升。" elif trend == "尾端上升": comment = "最後幾點 RMSE 明顯上升,建議忽略尾端結果,以最低點作最佳選擇。" elif trend == "波動起伏": comment = "RMSE 震盪明顯,代表模型不穩定或參數空間設過寬,建議縮小搜尋區間。" else: comment = "RMSE 變動趨勢平穩,可依最低點選定最佳參數。" model_str = f"【{model_name}】" if model_name else "" return f"{model_str}最低RMSE:**{minv:.3f}**\n- 收斂型態:**{trend}**\n- 建議:{comment}\n" # [資料品質檢查/AutoML推薦] # = 自動偵測資料品質與欄位異常 = def auto_data_quality_check(datafile): if datafile is None: return "> 尚未上傳資料" df = pd.read_csv(datafile.name if hasattr(datafile, "name") else datafile) tips = [] for col in df.columns: if df[col].isnull().sum() > 0: tips.append(f"「{col}」有缺值,建議補值或刪除") if df[col].nunique() == 1: tips.append(f"「{col}」為常數欄,建議刪除") if pd.api.types.is_numeric_dtype(df[col]): sk = (df[col].dropna().skew() if hasattr(df[col], "skew") else 0) if abs(sk) > 2: tips.append(f"「{col}」嚴重偏態(skew={sk:.2f}),建議標準化") if not tips: tips = ["資料品質良好,無明顯異常。"] return "資料品質偵測:" # = 簡易AutoML,推薦最佳模型並產生程式碼 = def automl_leaderboard(datafile): # (簡化) 這裡你可以調用 TPOT、auto-sklearn 或自己比較多組模型 # 這裡直接隨機選一個 best = np.random.choice(["XGBoost", "Random Forest", "LightGBM", "SVR"]) code = f"""# 範例BestModel from xgboost import XGBRegressor model = XGBRegressor(n_estimators=120, random_state=0) model.fit(X_train, y_train)""" return f"最佳模型推薦:{best}", code # [DoE設計/推薦點生成] # = 檢查新點是否與已存在點重複(向量距離法)= def is_close_to_existing(xrow, existing_X, tol=1e-4): existing_X = np.asarray(existing_X) if existing_X.size == 0: return False diffs = np.abs(existing_X - np.array(xrow)) if diffs.ndim == 1: return np.all(diffs < tol) return np.any(np.all(diffs < tol, axis=1)) # = 多模型AI推薦下一批DoE設計點(探索/最大化等模式)= def suggest_next_doe_points_batch( datafile, model_types = ["Random Forest", "XGBoost", "LightGBM", "SVR"], mode = "最大化", n_points = 3, exclude_existing = True, random_seed = 42, max_attempts_factor = 30, return_df = False ): import numpy as np from scipy.optimize import minimize import pandas as pd if datafile is None: if return_df: return pd.DataFrame() return "> 尚未上傳資料" df = pd.read_csv(datafile.name if hasattr(datafile, "name") else datafile) X, y = df.iloc[:, :-1], df.iloc[:, -1] colnames = X.columns # 訓練多模型 models = [] for t in model_types: if t == "Random Forest": models.append(RandomForestRegressor(n_estimators=120, random_state=random_seed)) elif t == "XGBoost": models.append(XGBRegressor(n_estimators=120, random_state=random_seed, verbosity=0)) elif t == "LightGBM": models.append(LGBMRegressor(n_estimators=120, random_state=random_seed)) elif t == "SVR": models.append(make_pipeline(StandardScaler(), SVR())) for m in models: m.fit(X, y) bounds = [(X[c].min(), X[c].max()) for c in colnames] def ensemble_pred(xrow): preds = [m.predict(np.array(xrow).reshape(1, -1))[0] for m in models] return np.mean(preds), np.std(preds) if mode == "最大化": def obj(x): return -ensemble_pred(x)[0] elif mode == "最小化": def obj(x): return ensemble_pred(x)[0] elif mode == "不確定性": def obj(x): return -ensemble_pred(x)[1] else: def obj(x): return -ensemble_pred(x)[0] found_points, preds_mean, preds_std = [], [], [] attempts = 0 max_attempts = n_points * max_attempts_factor np.random.seed(random_seed) while len(found_points) < n_points and attempts < max_attempts: x0 = np.random.uniform([b[0] for b in bounds], [b[1] for b in bounds]) res = minimize(obj, x0, bounds=bounds) best_x = res.x # 排除重複點 exist = False if exclude_existing: if is_close_to_existing(best_x, X.values) or is_close_to_existing(best_x, np.array(found_points)): exist = True if exist: attempts += 1 continue found_points.append(best_x) mean_pred, std_pred = ensemble_pred(best_x) preds_mean.append(mean_pred) preds_std.append(std_pred) attempts += 1 if not found_points: if return_df: return pd.DataFrame() return "> 無法自動產生新點(參數範圍或步階過細、模型表現過於平坦)" # == DataFrame格式(for下載用)== if return_df: df_points = pd.DataFrame(found_points, columns=colnames) df_points["y"] = "" # 給user回填 df_points["模型平均預測"] = preds_mean df_points["不確定性(std)"] = preds_std df_points["推薦策略"] = mode return df_points # == Markdown文字版 == if mode == "最大化": best_idx = int(np.argmax(preds_mean)) elif mode == "最小化": best_idx = int(np.argmin(preds_mean)) elif mode == "不確定性": best_idx = int(np.argmax(preds_std)) else: best_idx = 0 out = "推薦次一輪DoE設計點(Top N):
" for i, (best_x, mu, std) in enumerate(zip(found_points, preds_mean, preds_std), 1): flag = " 🏆 【最推薦】" if (i-1) == best_idx else "" out += f"候選{i}{flag}:" out += "" return out # = 智能混合策略推薦DoE新點(最大化、最小化、不確定性、隨機)= def suggest_mixed_doe_points( datafile, model_types = ["Random Forest", "XGBoost", "LightGBM", "SVR"], n_total = 4, # 推薦總點數 exclude_existing = True, random_seed = 2025, return_df = False ): import numpy as np from scipy.optimize import minimize import pandas as pd if datafile is None: if return_df: return pd.DataFrame() return "> 尚未上傳資料" df = pd.read_csv(datafile.name if hasattr(datafile, "name") else datafile) X, y = df.iloc[:, :-1], df.iloc[:, -1] colnames = X.columns # 訓練多模型 models = [] for t in model_types: if t == "Random Forest": models.append(RandomForestRegressor(n_estimators=120, random_state=random_seed)) elif t == "XGBoost": models.append(XGBRegressor(n_estimators=120, random_state=random_seed, verbosity=0)) elif t == "LightGBM": models.append(LGBMRegressor(n_estimators=120, random_state=random_seed)) elif t == "SVR": models.append(make_pipeline(StandardScaler(), SVR())) for m in models: m.fit(X, y) bounds = [(X[c].min(), X[c].max()) for c in colnames] def ensemble_pred(xrow): preds = [m.predict(np.array(xrow).reshape(1, -1))[0] for m in models] return np.mean(preds), np.std(preds) def obj_max(x): return -ensemble_pred(x)[0] def obj_min(x): return ensemble_pred(x)[0] def obj_uncert(x): return -ensemble_pred(x)[1] np.random.seed(random_seed) found_points, point_types, mu_list, std_list = [], [], [], [] attempts = 0 max_attempts = n_total * 30 # 1. 最大化 res1 = minimize(obj_max, np.random.uniform([b[0] for b in bounds], [b[1] for b in bounds]), bounds=bounds) x1 = res1.x if not (exclude_existing and is_close_to_existing(x1, X.values)): found_points.append(x1) point_types.append("最大化(exploit)") mu, std = ensemble_pred(x1) mu_list.append(mu) std_list.append(std) # 2. 最小化 res2 = minimize(obj_min, np.random.uniform([b[0] for b in bounds], [b[1] for b in bounds]), bounds=bounds) x2 = res2.x if not (exclude_existing and (is_close_to_existing(x2, X.values) or is_close_to_existing(x2, np.array(found_points)))): found_points.append(x2) point_types.append("最小化(exploit)") mu, std = ensemble_pred(x2) mu_list.append(mu) std_list.append(std) # 3. 最大不確定性 res3 = minimize(obj_uncert, np.random.uniform([b[0] for b in bounds], [b[1] for b in bounds]), bounds=bounds) x3 = res3.x if not (exclude_existing and (is_close_to_existing(x3, X.values) or is_close_to_existing(x3, np.array(found_points)))): found_points.append(x3) point_types.append("最大不確定性(exploration)") mu, std = ensemble_pred(x3) mu_list.append(mu) std_list.append(std) # 4. 隨機探索補滿 while len(found_points) < n_total and attempts < max_attempts: x0 = np.random.uniform([b[0] for b in bounds], [b[1] for b in bounds]) if exclude_existing and (is_close_to_existing(x0, X.values) or is_close_to_existing(x0, np.array(found_points))): attempts += 1 continue found_points.append(x0) point_types.append("隨機探索") mu, std = ensemble_pred(x0) mu_list.append(mu) std_list.append(std) attempts += 1 # === DataFrame for CSV 下載 === if return_df: df_points = pd.DataFrame(found_points, columns=colnames) df_points["y"] = "" return df_points # === Markdown展示 === out = "智能推薦多重新DoE設計點(混合策略)
" for i, (best_x, mu, std, label) in enumerate(zip(found_points, mu_list, std_list, point_types), 1): flag = " 🏆 【最推薦】" if label.startswith("最大化") else "" out += f"候選{i}{flag}:{label}" out += "" return out # = 組合各推薦策略生成新點,合併成單一下載檔 = def make_recommended_points(file, models, modes, n, exclude): import pandas as pd import tempfile outs = [] df_list = [] n = int(n) if file is None or not modes or not models: tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", encoding="utf-8") pd.DataFrame().to_csv(tmp.name, index=False) return "請確認已上傳資料、選模式與模型", tmp.name for mode in modes: if mode == "探索型推薦": out = suggest_next_doe_points_batch(file, models, "最大化", n, exclude, return_df=False) df = suggest_next_doe_points_batch(file, models, "最大化", n, exclude, return_df=True) outs.append(f"【探索型推薦】
{out}") if isinstance(df, pd.DataFrame) and not df.empty: df_list.append(df) elif mode == "混合策略推薦": out = suggest_mixed_doe_points(file, models, n, exclude, return_df=False) df = suggest_mixed_doe_points(file, models, n, exclude, return_df=True) outs.append(f"【混合策略推薦】
{out}") if isinstance(df, pd.DataFrame) and not df.empty: df_list.append(df) if df_list: all_df = pd.concat(df_list, ignore_index=True).drop_duplicates() else: all_df = pd.DataFrame() tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", encoding="utf-8-sig") all_df.to_csv(tmp.name, index=False) tmp.flush() tmp.close() return "

".join(outs), tmp.name # ok, 兩個 output # = 下載推薦DoE點組合(CSV)= def download_recommended_points(file, models, mode, n, exclude): # 支援模式切換 if mode == "混合策略推薦": df_points = suggest_mixed_doe_points(file, models, int(n), exclude, return_df=True) else: # 預設探索型推薦 df_points = suggest_next_doe_points_batch(file, models, mode, int(n), exclude, return_df=True) if df_points is None or len(df_points) == 0: return None # 存成臨時檔 with tempfile.NamedTemporaryFile(suffix=".csv", mode="w", delete=False, encoding="utf-8-sig") as f: df_points.to_csv(f.name, index=False) return f.name # [資料合併/CSV工具] # = 兩份CSV資料自動合併、去重、優先保留已填y者 = def merge_csvs(base_csv, new_csv): import pandas as pd import tempfile if base_csv is None or new_csv is None: return None df1 = pd.read_csv(base_csv.name if hasattr(base_csv, "name") else base_csv) df2 = pd.read_csv(new_csv.name if hasattr(new_csv, "name") else new_csv) # 將y(目標)欄位填過的優先保留,未填y只補點不覆蓋 key_cols = [c for c in df1.columns if c != "y"] merged = pd.concat([df1, df2], ignore_index=True) # 去重優先保留已填y者 merged = merged.sort_values(by=["y"], ascending=[False]).drop_duplicates(subset=key_cols, keep="first") merged = merged.reset_index(drop=True) tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", encoding="utf-8-sig") merged.to_csv(tmp.name, index=False) tmp.flush() tmp.close() return tmp.name # [標準DoE設計/分布比較] # = 檢查輸入參數列是否有效 = def is_valid_row(row): if not isinstance(row, (list, tuple)) or len(row) < 4: return False try: if str(row[0]).strip() == "": return False float(row[1]) float(row[2]) float(row[3]) return True except Exception: return False # = 產生指定類型的標準DoE設計點(LHS/Sobol/Halton/Uniform)= def gen_design(design_type, n_params, n_samples, param_lows, param_highs, param_steps, seed): if seed is not None and str(seed).strip() != "" and int(seed) != 0: my_seed = int(seed) else: my_seed = None if design_type == "LHS": if my_seed is not None: np.random.seed(my_seed) design = lhs(n_params, samples=n_samples, criterion='maximin') elif design_type == "Sobol": sampler = Sobol(d=n_params, scramble=True, seed=my_seed) design = sampler.random(n_samples) elif design_type == "Halton": sampler = Halton(d=n_params, scramble=True, seed=my_seed) design = sampler.random(n_samples) elif design_type == "Uniform": if my_seed is not None: np.random.seed(my_seed) design = np.random.rand(n_samples, n_params) else: raise ValueError("Unknown SFD type!") real_samples = np.zeros_like(design) for idx, (low, high, step) in enumerate(zip(param_lows, param_highs, param_steps)): real_samples[:, idx] = design[:, idx] * (high - low) + low if step > 0: real_samples[:, idx] = np.round((real_samples[:, idx] - low) / step) * step + low else: decimals = str(step)[::-1].find('.') real_samples[:, idx] = np.round(real_samples[:, idx], decimals) real_samples[:, idx] = np.clip(real_samples[:, idx], low, high) return pd.DataFrame(real_samples) # = 2D參數分布圖(Plotly)= def plot_scatter_2d(df, title): fig = px.scatter(df, x=df.columns[0], y=df.columns[1], title=title) return fig # = 3D參數分布圖(Plotly)= def plot_scatter_3d(df, title): fig = px.scatter_3d(df, x=df.columns[0], y=df.columns[1], z=df.columns[2], title=title) return fig # = 多維參數成對散點圖(Plotly)= def plot_pairplot(df, title): return px.scatter_matrix(df, title=title) # = PCA主成分降維分布圖(Plotly)= def plot_pca(df, title): X = df.values pca = PCA(n_components=2) X_pca = pca.fit_transform(X) df_pca = pd.DataFrame(X_pca, columns=['PCA1', 'PCA2']) return px.scatter(df_pca, x='PCA1', y='PCA2', title=title + " (PCA降維)") # = 依參數設定產生所有主流程設計法(四種)並比較分布 = def compare_all_designs(param_table, n_samples, seed): all_types = ["LHS", "Sobol", "Halton", "Uniform"] outs = [] if isinstance(param_table, pd.DataFrame): param_table = param_table.values.tolist() param_names, param_lows, param_highs, param_steps = [], [], [], [] for row in param_table: if not is_valid_row(row): continue try: param_names.append(str(row[0]).strip()) param_lows.append(float(row[1])) param_highs.append(float(row[2])) param_steps.append(float(row[3])) except Exception: continue n_params = len(param_names) if n_params == 0: return pd.DataFrame({"提醒": ["請正確輸入至少一列參數"]}), None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None for des in all_types: df = gen_design(des, n_params, int(n_samples), param_lows, param_highs, param_steps, seed) df.columns = param_names # 主分布圖 if n_params == 2: mainfig = plot_scatter_2d(df, des + " 分布圖") elif n_params == 3: mainfig = plot_scatter_3d(df, des + " 分布圖") elif n_params >= 4 and n_params <= 8: mainfig = plot_pairplot(df, des + " Pairplot") else: mainfig = plot_pca(df, des + " PCA") with tempfile.NamedTemporaryFile(delete=False, suffix=f'_{des}_design.csv', mode='w', encoding='utf-8-sig') as tmpfile: df.to_csv(tmpfile, index=False) outs.extend([df, mainfig, tmpfile.name, get_viz_desc("DoE Main"), auto_conclude_viz(df, "DoE Main")]) return outs # [進階DoE: Box-Behnken/CCD設計] # = 將標準化DoE設計矩陣轉換為實際參數 = def doe_normal_to_actual(doe_matrix, param_info): df = pd.DataFrame(doe_matrix) df_out = pd.DataFrame() param_names = [] for i, info in enumerate(param_info): name, pmin, pmax, *_ = info param_names.append(name) pmin = float(pmin) pmax = float(pmax) vals = df.iloc[:, i].values mid = (pmin + pmax) / 2 half_range = (pmax - pmin) / 2 df_out[name] = mid + vals * half_range df_out = df_out.round(6) return df_out # = 產生Box-Behnken或CCD設計法的標準化/實際參數表 = def advanced_doe_with_mapping(param_table, design_type): param_list = [] if isinstance(param_table, pd.DataFrame): values = param_table.values.tolist() else: values = param_table for row in values: try: if not str(row[0]).strip(): continue param_list.append([row[0], float(row[1]), float(row[2]), float(row[3])]) except Exception: continue n_param = len(param_list) if n_param < 2: return (pd.DataFrame({"提醒":["請至少輸入2個參數"]}), pd.DataFrame(), None, None) # 產生DoE矩陣 if design_type == "Box-Behnken": mat = bbdesign(n_param, center=1) elif design_type == "CCD": mat = ccdesign(n_param, center=(1,1), face='ccc') else: return (pd.DataFrame({"提醒":["不支援的設計法"]}), pd.DataFrame(), None, None) # 產生標準化表 colnames = [f"X{i+1}" for i in range(n_param)] df_std = pd.DataFrame(mat, columns=colnames) # 實際參數表 df_real = doe_normal_to_actual(mat, param_list) # CSV暫存檔 std_fd, std_path = tempfile.mkstemp(suffix="_std.csv") df_std.to_csv(std_path, index=False) real_fd, real_path = tempfile.mkstemp(suffix="_real.csv") df_real.to_csv(real_path, index=False) return df_std, df_real, std_path, real_path # [AI建模/特徵重要性/SHAP] # = 獲取模型特徵重要性圖與摘要 = def get_feature_importance(model, feature_names): if hasattr(model, "feature_importances_"): importances = model.feature_importances_ indices = np.argsort(importances)[::-1] fig = px.bar( x=[feature_names[i] for i in indices], y=importances[indices], orientation='v', title="特徵重要性(Feature Importance)", labels={"x":"特徵","y":"重要性"} ) top3 = ", ".join([feature_names[i] for i in indices[:3]]) summary = f"最重要特徵前三名:{top3}" return fig, summary return None, "此模型無 feature_importances_" # = 產生SHAP特徵解釋圖像檔 = def get_shap_summary(model, X, feature_names): explainer = shap.TreeExplainer(model) shap_values = explainer.shap_values(X) plt.figure(figsize=(7,4)) shap.summary_plot(shap_values, X, feature_names=feature_names, show=False) buf = tempfile.NamedTemporaryFile(delete=False, suffix=".png") plt.tight_layout() plt.savefig(buf, format="png", bbox_inches='tight') plt.close() return buf.name # = 訓練多模型、產生預測與特徵重要性/SHAP圖,返回各指標 = def train_and_predict_with_importance(datafile, algos, test_ratio, show_importance=True, show_shap=False): if datafile is None or algos is None or len(algos) == 0: return None, pd.DataFrame({"提醒": ["請上傳DoE資料並選擇演算法"]}), "", "", None, "" df = pd.read_csv(datafile.name if hasattr(datafile, "name") else datafile) X = df.iloc[:, :-1].values y = df.iloc[:, -1].values X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_ratio, random_state=42) feature_names = list(df.columns[:-1]) results = [] y_pred_dict = {} outlines = [] feature_fig = None feature_summary = "" shap_img = None for algo in algos: try: # 使用標準化管線讓小資料集下更穩定 if algo == "Random Forest": model = RandomForestRegressor(n_estimators=150, random_state=0) elif algo == "XGBoost": model = xgb.XGBRegressor(n_estimators=120, random_state=0, verbosity=0) elif algo == "LightGBM": model = lgb.LGBMRegressor(n_estimators=120, random_state=0) elif algo == "SVR": model = make_pipeline(StandardScaler(), SVR()) elif algo == "Linear Regression": model = make_pipeline(StandardScaler(), LinearRegression()) elif algo == "Lasso": model = make_pipeline(StandardScaler(), Lasso(alpha=0.0001, max_iter=5000)) elif algo == "Ridge": model = make_pipeline(StandardScaler(), Ridge()) elif algo == "ElasticNet": model = make_pipeline(StandardScaler(), ElasticNet(alpha=0.0001)) else: continue model.fit(X_train, y_train) y_pred = model.predict(X_test) # 避免y_pred為scalar if np.isscalar(y_pred): y_pred = np.full_like(y_test, y_pred) rmse = np.sqrt(mean_squared_error(y_test, y_pred)) r2 = r2_score(y_test, y_pred) results.append({ "模型": algo, "測試RMSE": rmse, "測試R²": r2, "訓練數": len(X_train), "測試數": len(X_test), }) y_pred_dict[algo] = y_pred outlines.append(auto_conclude_ai(y_test, y_pred, algo)) if show_importance and feature_fig is None and hasattr(getattr(model, "named_steps", model), "feature_importances_"): base_model = model.named_steps.get(list(model.named_steps)[-1], model) if hasattr(model, "named_steps") else model feature_fig, feature_summary = get_feature_importance(base_model, feature_names) if show_shap and shap_img is None and hasattr(getattr(model, "named_steps", model), "feature_importances_"): base_model = model.named_steps.get(list(model.named_steps)[-1], model) if hasattr(model, "named_steps") else model shap_img = get_shap_summary(base_model, X_test, feature_names) except Exception as e: print(f"模型 {algo} 失敗: {e}") continue res_df = pd.DataFrame(results) model_colors = { "Random Forest": "#7c82f6", "XGBoost": "#ff686b", "LightGBM": "#54b984", "SVR": "#7e4a99", "Linear Regression": "#229aff", "Lasso": "#f8d90f", "Ridge": "#9edafe", "ElasticNet": "#f9a15b" } model_markers = { "Random Forest": "circle", "XGBoost": "diamond", "LightGBM": "triangle-up", "SVR": "square", "Linear Regression": "star", "Lasso": "cross", "Ridge": "hexagon", "ElasticNet": "x" } fig = px.scatter() for algo, y_pred in y_pred_dict.items(): # 只畫有效的預測點 if y_pred is not None and len(y_pred) == len(y_test) and not np.isnan(y_pred).all(): fig.add_scatter( x=y_test, y=y_pred, mode='markers', name=algo, marker=dict( size=13, color=model_colors.get(algo, "#888"), symbol=model_markers.get(algo, "circle"), line=dict(width=1.5, color="#222") ), showlegend=True ) minv, maxv = np.min(y_test), np.max(y_test) fig.add_scatter( x=[minv, maxv], y=[minv, maxv], mode='lines', name='Ideal', line=dict(dash='dash', color='black'), showlegend=True ) fig.update_layout( title="Test Set Prediction(預測/實際)", xaxis_title="True Output", yaxis_title="Predicted", legend=dict(font=dict(size=17)), margin=dict(l=40, r=20, t=60, b=40) ) fig.update_yaxes(scaleanchor="x", scaleratio=1) return fig, res_df, get_viz_desc("AI Predict"), "
".join(outlines), feature_fig, shap_img # [多圖資料視覺化/自動建議卡片] # = 產生多種資料視覺化圖與摘要(最多五種)= def multi_viz(file, plot_types): # 準備三個 list 裝圖、描述、總結 figs, descs, sums = [], [], [] # 防呆:如果沒檔案或沒選圖,直接回傳 15 個空 output if file is None or not plot_types: return [None, "", ""] * 5 # 載入資料 df = pd.read_csv(file.name if hasattr(file, "name") else file) plot_types = plot_types[:5] # 最多五種 # 每一種圖型各自 try,出錯補空 for t in plot_types: fig, desc, summ = None, "", "" try: if t == "Heatmap": fig = px.imshow(df.corr(), text_auto=True, title="相關係數Heatmap") desc = get_viz_desc("Heatmap") summ = auto_conclude_viz(df, "Heatmap") elif t == "Pairplot": fig = px.scatter_matrix(df, title="資料 Pairplot") desc = get_viz_desc("Pairplot") summ = auto_conclude_viz(df, "Pairplot") elif t == "Histogram": fig = px.histogram(df, nbins=10, title="資料 Histogram") desc = get_viz_desc("Histogram") summ = auto_conclude_viz(df, "Histogram") elif t == "Scatter Matrix": fig = px.scatter_matrix(df, title="Scatter Matrix") desc = get_viz_desc("Scatter Matrix") summ = auto_conclude_viz(df, "Scatter Matrix") elif t == "PCA": X = df.values pca = PCA(n_components=2) X_pca = pca.fit_transform(X) df_pca = pd.DataFrame(X_pca, columns=['PCA1', 'PCA2']) fig = px.scatter(df_pca, x='PCA1', y='PCA2', title="PCA降維") desc = get_viz_desc("PCA") summ = auto_conclude_viz(df, "PCA") except Exception as e: print(f"[multi_viz error]: {t}", e) fig, desc, summ = None, "", "" figs.append(fig) descs.append(desc) sums.append(summ) # 不足5組的話補空 while len(figs) < 5: figs.append(None) descs.append("") sums.append("") # 將三個 list interleave 成 [fig, desc, sum, ...] outs = [] for i in range(5): outs.extend([figs[i], descs[i], sums[i]]) # Debug:確認長度一定是15 print("multi_viz 輸出數量:", len(outs)) assert len(outs) == 15, f"multi_viz 輸出長度異常:{len(outs)}" return outs # = 根據資料欄位自動產生系統建議 = def auto_recommendation(file): df = pd.read_csv(file.name if hasattr(file, "name") else file) tips = [] for col in df.columns: if df[col].isnull().sum() > 0: tips.append(f"「{col}」有缺值,建議補值或刪除") if df[col].nunique() == 1: tips.append(f"「{col}」為常數欄,建議刪除") if pd.api.types.is_numeric_dtype(df[col]): sk = skew(df[col].dropna()) if abs(sk) > 2: tips.append(f"「{col}」嚴重偏態(skew={sk:.2f}),建議標準化") cor = df.corr().abs() for c1 in cor.columns: for c2 in cor.columns: if c1 != c2 and cor.loc[c1, c2] > 0.8: tips.append(f"「{c1}」與「{c2}」高度相關,建議後續特徵選擇") if not tips: tips = ["資料品質良好,無明顯異常。"] else: tips = tips[:5] return "系統建議:" # = 多圖視覺化+自動建議卡片綁定 = def multi_viz_and_recommend(file, plot_types): vis = multi_viz(file, plot_types) recomm = auto_recommendation(file) print("vis型態", type(vis), "vis長度", len(vis)) result = (*vis, recomm) print("最終回傳型態", type(result), "長度", len(result)) return result with gr.Blocks() as demo: upfile2 = gr.File(label="上傳檔案") plot_select = gr.CheckboxGroup( choices=["Heatmap", "Pairplot", "Histogram", "Scatter Matrix", "PCA"], value=["Heatmap", "Pairplot", "Histogram", "PCA"], label="選擇圖像類型" ) vis_outs = [gr.Plot() for _ in range(5)] + [gr.Markdown() for _ in range(10)] recomm_card = gr.Markdown() vizbtn = gr.Button("產生多圖分析", elem_classes=["main-btn"]) vizbtn.click( lambda f, t: (*multi_viz(f, t), auto_recommendation(f)), inputs=[upfile2, plot_select], outputs=vis_outs + [recomm_card] ) # [貝葉斯優化/超參數搜尋] model_spaces = { "XGBoost": ( XGBRegressor(verbosity=0, random_state=42), { "max_depth": Integer(2, 10), "n_estimators": Integer(50, 300), "learning_rate": Real(0.01, 0.2, prior="log-uniform") } ), "Random Forest": ( RandomForestRegressor(random_state=42), { "max_depth": Integer(2, 15), "n_estimators": Integer(50, 300) } ), "LightGBM": ( LGBMRegressor(random_state=42), { "max_depth": Integer(2, 15), "n_estimators": Integer(50, 300), "learning_rate": Real(0.01, 0.2, prior="log-uniform") } ), "SVR": ( SVR(), { "C": Real(0.01, 100, prior="log-uniform"), "gamma": Real(0.001, 1.0, prior="log-uniform") } ) } # = 格式化最佳參數組合摘要(Markdown)= def format_best_params(best_params, best_score): txt = "最佳參數組合:最佳CV RMSE: {best_score:.4f}" return txt # = 單模型貝葉斯優化超參數、產生收斂曲線與自動摘要 = def run_bayes_optimization(datafile, model_name, n_iter=20): if datafile is None: return "請上傳CSV資料", None, "", "" df = pd.read_csv(datafile.name if hasattr(datafile, "name") else datafile) X, y = df.iloc[:, :-1], df.iloc[:, -1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) model, search_space = model_spaces.get(model_name, (None, None)) if model is None: return "不支援的模型", None, "", "" if not search_space: model.fit(X_train, y_train) score = model.score(X_test, y_test) return f"{model_name}為單純模型,無可優化超參數。\n測試集R2={score:.3f}", None, "", "" opt = BayesSearchCV( model, search_spaces=search_space, n_iter=n_iter, scoring='neg_root_mean_squared_error', cv=3, n_jobs=-1, random_state=42, verbose=0 ) opt.fit(X_train, y_train) best_params = opt.best_params_ best_score = -opt.best_score_ results = opt.cv_results_["mean_test_score"] rmse_curve = -1 * results # 轉正的RMSE fig, ax = plt.subplots(figsize=(6,3)) ax.plot(rmse_curve, marker='o') ax.set_title("優化歷程 (CV RMSE)") ax.set_xlabel("Iteration") ax.set_ylabel("CV RMSE") ax.grid(True) plt.tight_layout() plt.close(fig) # === 自動判讀收斂趨勢,產生 markdown === auto_summary = auto_conclude_bayes_curve(rmse_curve, model_name) return ( format_best_params(best_params, best_score), # 超參數最佳組合摘要(markdown) fig, # RMSE收斂圖 get_viz_desc("Bayesian Optimization"), # 圖說 auto_summary # << 這裡就是自動結論 ) # = 多模型同時Bayes優化,產生多條收斂曲線 = def run_multi_bayes_optimization(datafile, model_types, n_iter=20): # UI要的模型順序 MODEL_ORDER = ["Random Forest", "XGBoost", "LightGBM", "SVR"] N_PER_MODEL = 3 # [fig, summary, best_param] if datafile is None or not model_types: # 沒資料或沒選模型,直接全空 return [None] + [None, "", ""] * len(MODEL_ORDER) df = pd.read_csv(datafile.name if hasattr(datafile, "name") else datafile) X, y = df.iloc[:, :-1], df.iloc[:, -1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) rmse_curves = {} summaries = {} best_params_dict = {} for mtype in model_types: model, search_space = model_spaces.get(mtype, (None, None)) if model is None or not search_space: rmse_curves[mtype] = [np.nan] summaries[mtype] = f"{mtype} 無法執行最佳化" best_params_dict[mtype] = f"{mtype} 不支援" continue opt = BayesSearchCV( model, search_spaces=search_space, n_iter=n_iter, scoring='neg_root_mean_squared_error', cv=3, n_jobs=-1, random_state=42, verbose=0 ) opt.fit(X_train, y_train) rmse_curve = -1 * opt.cv_results_["mean_test_score"] rmse_curves[mtype] = rmse_curve summaries[mtype] = auto_conclude_bayes_curve(rmse_curve, mtype) best_params_dict[mtype] = format_best_params(opt.best_params_, -opt.best_score_) # 多模型共線互動圖 fig = go.Figure() for mtype in model_types: curve = rmse_curves.get(mtype, []) fig.add_trace(go.Scatter( x=list(range(len(curve))), y=curve, mode='lines+markers', name=mtype )) fig.update_layout( title="所有模型Bayes優化歷程 (CV RMSE)", xaxis_title="Iteration", yaxis_title="CV RMSE", hovermode="x unified" ) outs = [fig] # 確保 outputs 長度一定固定 for mtype in MODEL_ORDER: if mtype in model_types: # 有選就給內容 curve = rmse_curves.get(mtype, []) fig_tab = go.Figure(go.Scatter( x=list(range(len(curve))), y=curve, mode='lines+markers', name=mtype )) summary = summaries.get(mtype, "") best_param = best_params_dict.get(mtype, "") outs.extend([fig_tab, summary, best_param]) else: outs.extend([None, "", ""]) return outs # [AI搜尋最佳化條件] # = 自動判斷欄位型態(連續/離散)以建立搜尋空間 = def suggest_optimization_space(df, discrete_cols=[]): space = {} for col in df.columns[:-1]: if col in discrete_cols: vals = sorted(df[col].unique()) space[col] = Categorical(vals) else: vmin, vmax = df[col].min(), df[col].max() if len(df[col].unique()) < 8: space[col] = Categorical(sorted(df[col].unique())) else: space[col] = Real(vmin, vmax) return space # = 搜尋單模型之預測最佳/最差條件組合 = def find_best_feature(model, X, maximize=True): print(">>> maximize=", maximize) # Debug 印出 # 建立每個feature的上下限作為搜尋範圍 bounds = [] for i in range(X.shape[1]): vmin, vmax = X.iloc[:, i].min(), X.iloc[:, i].max() bounds.append((vmin, vmax)) def obj(x): y_pred = model.predict(np.array(x).reshape(1, -1))[0] print("obj() y_pred:", y_pred, "| maximize:", maximize) return -y_pred if maximize else y_pred x0 = X.mean().values res = minimize(obj, x0, bounds=bounds) best_x = res.x best_pred = -res.fun if maximize else res.fun return best_x, best_pred # = 多模型搜尋最佳化參數組合(最大化/最小化)= def optimize_conditions(datafile, model_types, direction, is_discrete=False, n_iter=32): if datafile is None or not model_types: return pd.DataFrame({"提醒": ["請上傳DoE數據與選擇模型"]}), "", "", "" # 四個 output df = pd.read_csv(datafile.name if hasattr(datafile, "name") else datafile) X, y = df.iloc[:, :-1], df.iloc[:, -1] maximize = (direction == "最大化") summary = [] all_results = [] for mtype in model_types: if mtype == "Random Forest": model = RandomForestRegressor(n_estimators=160, random_state=42) elif mtype == "XGBoost": model = XGBRegressor(n_estimators=100, random_state=42, verbosity=0) elif mtype == "LightGBM": model = LGBMRegressor(n_estimators=100, random_state=42) elif mtype == "SVR": model = make_pipeline(StandardScaler(), SVR()) elif mtype == "Linear Regression": model = make_pipeline(StandardScaler(), LinearRegression()) elif mtype == "Lasso": model = make_pipeline(StandardScaler(), Lasso(alpha=0.0001, max_iter=5000)) elif mtype == "Ridge": model = make_pipeline(StandardScaler(), Ridge()) elif mtype == "ElasticNet": model = make_pipeline(StandardScaler(), ElasticNet(alpha=0.0001)) else: continue model.fit(X, y) if "大" in str(direction): # 有「大」字就是最大化 maximize = True else: maximize = False best_x, best_pred = find_best_feature(model, X, maximize=maximize) feature_dict = {k: float(v) for k, v in zip(X.columns, best_x)} feature_dict["模型"] = mtype feature_dict["預測產率"] = float(best_pred) all_results.append(feature_dict) summary.append( f"【{mtype}】最佳反應條件:{feature_dict}
預測產率/目標值:{best_pred:.3g}
" ) df_out = pd.DataFrame(all_results) txt = "
".join(summary) desc = "" # 可寫 "依據多模型搜尋理論極值" sum_ = "" # 你可以回傳模型特性、附註等 return df_out, txt, desc, sum_ # [AI模型訓練前後回歸比較] # = 單一y,對比多模型在前後資料集的R2/RMSE與特徵重要性變化 = def compare_models_before_after(old_csv, new_csv, model_linear, model_nonlinear, target): try: # 合併用戶選的模型 model_types = (model_linear or []) + (model_nonlinear or []) if old_csv is None or new_csv is None or not model_types: return "⚠️ 請上傳原始/合併DoE並選擇模型", pd.DataFrame(), None, None old_df = pd.read_csv(old_csv.name if hasattr(old_csv, "name") else old_csv) new_df = pd.read_csv(new_csv.name if hasattr(new_csv, "name") else new_csv) if target not in old_df.columns or target not in new_df.columns: return f"⚠️ 缺少 '{target}' 欄位", pd.DataFrame(), None, None X_old, y_old = old_df.drop(columns=[target]), old_df[target] X_new, y_new = new_df.drop(columns=[target]), new_df[target] cv_num = min(5, len(y_old), len(y_new)) if cv_num < 2: return "⚠️ 資料筆數太少,無法交叉驗證(至少需2列)", pd.DataFrame(), None, None rows = [] importances = {} for mtype in model_types: if mtype == "Random Forest": model1 = RandomForestRegressor(n_estimators=120, random_state=42) model2 = RandomForestRegressor(n_estimators=120, random_state=42) elif mtype == "XGBoost": model1 = XGBRegressor(n_estimators=100, random_state=42, verbosity=0) model2 = XGBRegressor(n_estimators=100, random_state=42, verbosity=0) elif mtype == "LightGBM": model1 = LGBMRegressor(n_estimators=100, random_state=42) model2 = LGBMRegressor(n_estimators=100, random_state=42) elif mtype == "SVR": model1 = make_pipeline(StandardScaler(), SVR()) model2 = make_pipeline(StandardScaler(), SVR()) elif mtype == "Linear Regression": model1 = make_pipeline(StandardScaler(), LinearRegression()) model2 = make_pipeline(StandardScaler(), LinearRegression()) elif mtype == "Lasso": model1 = make_pipeline(StandardScaler(), Lasso(alpha=0.0001, max_iter=5000)) model2 = make_pipeline(StandardScaler(), Lasso(alpha=0.0001, max_iter=5000)) elif mtype == "Ridge": model1 = make_pipeline(StandardScaler(), Ridge()) model2 = make_pipeline(StandardScaler(), Ridge()) elif mtype == "ElasticNet": model1 = make_pipeline(StandardScaler(), ElasticNet(alpha=0.0001)) model2 = make_pipeline(StandardScaler(), ElasticNet(alpha=0.0001)) else: continue try: # R² (CV) r2_old = cross_val_score(model1, X_old, y_old, cv=cv_num, scoring="r2").mean() r2_new = cross_val_score(model2, X_new, y_new, cv=cv_num, scoring="r2").mean() # RMSE (fit on all, compute on all) model1.fit(X_old, y_old) model2.fit(X_new, y_new) pred_old = model1.predict(X_old) pred_new = model2.predict(X_new) rmse_old = np.sqrt(np.mean((pred_old - y_old) ** 2)) rmse_new = np.sqrt(np.mean((pred_new - y_new) ** 2)) # feature importance if hasattr(model1, "feature_importances_"): imp_old = model1.feature_importances_ imp_new = model2.feature_importances_ elif hasattr(model1, "named_steps") and "randomforestregressor" in model1.named_steps: imp_old = model1.named_steps["randomforestregressor"].feature_importances_ imp_new = model2.named_steps["randomforestregressor"].feature_importances_ elif hasattr(model1, "coef_"): imp_old = np.abs(model1.coef_) imp_new = np.abs(model2.coef_) elif hasattr(model1, "named_steps") and "linearregression" in model1.named_steps: imp_old = np.abs(model1.named_steps["linearregression"].coef_) imp_new = np.abs(model2.named_steps["linearregression"].coef_) else: imp_old = np.zeros(X_old.shape[1]) imp_new = np.zeros(X_new.shape[1]) rows.append({ "模型": mtype, "原始R2": r2_old, "合併新點R2": r2_new, "原始RMSE": rmse_old, "合併新點RMSE": rmse_new }) # 只有計算成功的才存 importance importances[mtype+"_old"] = imp_old importances[mtype+"_new"] = imp_new except Exception as model_e: rows.append({ "模型": mtype, "原始R2": f"Error: {model_e}", "合併新點R2": f"Error: {model_e}", "原始RMSE": f"Error: {model_e}", "合併新點RMSE": f"Error: {model_e}" }) # 不把異常模型 importance 放進去 print(f"{mtype} failed: {model_e}") continue table = pd.DataFrame(rows) # ========== Feature Importance Plot (只畫有用的) ========== fig_fi = go.Figure() features = list(X_old.columns) colors = [ "#4E79A7", "#F28E2B", "#76B7B2", "#E15759", "#59A14F", "#EDC948", "#B07AA1", "#FF9DA7" ] plot_count = 0 # 用於 color 依序遞增 for mtype in model_types: k_old = f"{mtype}_old" k_new = f"{mtype}_new" # 只畫 importance > 0 的 if k_old in importances and np.sum(importances[k_old]) > 0: fig_fi.add_trace(go.Bar( x=features, y=importances[k_old], name=f"{mtype} - Before", marker_color=colors[plot_count % len(colors)], opacity=0.75 )) plot_count += 1 if k_new in importances and np.sum(importances[k_new]) > 0: fig_fi.add_trace(go.Bar( x=features, y=importances[k_new], name=f"{mtype} - After", marker_color=colors[plot_count % len(colors)], opacity=0.4 )) plot_count += 1 fig_fi.update_layout( barmode="group", title="Feature Importance Comparison", xaxis_title="Feature", yaxis_title="Importance", legend_title="Model", font=dict(size=13) ) # ========== y Distribution Plot ========== fig_y = go.Figure() fig_y.add_trace(go.Histogram( x=old_df[target], name="Before", opacity=0.7, nbinsx=16 )) fig_y.add_trace(go.Histogram( x=new_df[target], name="After", opacity=0.7, nbinsx=16 )) fig_y.update_layout( barmode='overlay', title="y Distribution Comparison", xaxis_title=target, yaxis_title="Count", legend_title="Dataset", font=dict(size=13) ) return "", table, fig_fi, fig_y except Exception as e: import traceback tb = traceback.format_exc() print("=== DEBUG ERROR ===") print(tb) return f"❌ 系統發生錯誤:{str(e)}", pd.DataFrame(), None, None # = 根據R2變化自動產生效能總結摘要 = def generate_r2_summary(table): lines = [] for idx, row in table.iterrows(): model = row['模型'] try: r2_before = float(row['原始R2']) r2_after = float(row['合併新點R2']) delta = r2_after - r2_before if r2_after < r2_before - 0.1: lines.append(f"**{model}:表現顯著下降(R² {r2_before:.2f} → {r2_after:.2f})**") elif r2_after > r2_before + 0.1: lines.append(f"{model}:模型表現提升(R² {r2_before:.2f} → {r2_after:.2f})") elif abs(delta) < 0.1: lines.append(f"{model}:R²無明顯變化(R² {r2_before:.2f} → {r2_after:.2f})") except Exception: lines.append(f"{model}:計算失敗或資料不足。") if not lines: lines = ["無有效模型結果。"] return "### AI模型R²比較摘要\n" + "\n".join(lines) # = 整合前後回歸比較結果與自動摘要 = def compare_models_before_after_with_summary(old_csv, new_csv, model_types, target="y"): result = compare_models_before_after(old_csv, new_csv, model_types, target) table = result[0] summary = generate_r2_summary(table) return (*result, summary) # = 多y目標下,對比多模型在前後資料集的效能與特徵重要性變化 = def compare_models_multi_y_before_after(old_csv, new_csv, model_types, targets): # 防呆 if old_csv is None or new_csv is None or not model_types or not targets: return {}, {}, {}, "請確認已上傳檔案、選擇模型與目標欄位" old_df = pd.read_csv(old_csv.name if hasattr(old_csv, "name") else old_csv) new_df = pd.read_csv(new_csv.name if hasattr(new_csv, "name") else new_csv) if isinstance(targets, str): targets = [targets] result_tables, feature_figs, ydist_figs = {}, {}, {} summary_lines = [] for target in targets: if target not in old_df.columns or target not in new_df.columns: summary_lines.append(f"❌ 欄位 {target} 在資料中不存在,略過。") continue X_old, y_old = old_df.drop(columns=[target]), old_df[target] X_new, y_new = new_df.drop(columns=[target]), new_df[target] cv_num = min(5, len(y_old), len(y_new)) if cv_num < 2: summary_lines.append(f"⚠️ {target} 資料筆數不足無法交叉驗證。") continue rows, importances = [], {} for mtype in model_types: # 建立模型 if mtype == "Random Forest": model1 = RandomForestRegressor(n_estimators=120, random_state=42) model2 = RandomForestRegressor(n_estimators=120, random_state=42) elif mtype == "XGBoost": model1 = XGBRegressor(n_estimators=100, random_state=42, verbosity=0) model2 = XGBRegressor(n_estimators=100, random_state=42, verbosity=0) elif mtype == "LightGBM": model1 = LGBMRegressor(n_estimators=100, random_state=42) model2 = LGBMRegressor(n_estimators=100, random_state=42) elif mtype == "SVR": model1 = make_pipeline(StandardScaler(), SVR()) model2 = make_pipeline(StandardScaler(), SVR()) elif mtype == "Linear Regression": model1 = make_pipeline(StandardScaler(), LinearRegression()) model2 = make_pipeline(StandardScaler(), LinearRegression()) elif mtype == "Lasso": model1 = make_pipeline(StandardScaler(), Lasso(alpha=0.0001, max_iter=5000)) model2 = make_pipeline(StandardScaler(), Lasso(alpha=0.0001, max_iter=5000)) elif mtype == "Ridge": model1 = make_pipeline(StandardScaler(), Ridge()) model2 = make_pipeline(StandardScaler(), Ridge()) elif mtype == "ElasticNet": model1 = make_pipeline(StandardScaler(), ElasticNet(alpha=0.0001)) model2 = make_pipeline(StandardScaler(), ElasticNet(alpha=0.0001)) else: continue try: r2_old = cross_val_score(model1, X_old, y_old, cv=cv_num, scoring="r2").mean() r2_new = cross_val_score(model2, X_new, y_new, cv=cv_num, scoring="r2").mean() model1.fit(X_old, y_old) model2.fit(X_new, y_new) pred_old = model1.predict(X_old) pred_new = model2.predict(X_new) rmse_old = np.sqrt(np.mean((pred_old - y_old) ** 2)) rmse_new = np.sqrt(np.mean((pred_new - y_new) ** 2)) # feature importance if hasattr(model1, "feature_importances_"): imp_old = model1.feature_importances_ imp_new = model2.feature_importances_ elif hasattr(model1, "named_steps") and "randomforestregressor" in model1.named_steps: imp_old = model1.named_steps["randomforestregressor"].feature_importances_ imp_new = model2.named_steps["randomforestregressor"].feature_importances_ elif hasattr(model1, "coef_"): imp_old = np.abs(model1.coef_) imp_new = np.abs(model2.coef_) elif hasattr(model1, "named_steps") and "linearregression" in model1.named_steps: imp_old = np.abs(model1.named_steps["linearregression"].coef_) imp_new = np.abs(model2.named_steps["linearregression"].coef_) else: imp_old = np.zeros(X_old.shape[1]) imp_new = np.zeros(X_new.shape[1]) rows.append({ "模型": mtype, "原始R2": r2_old, "合併新點R2": r2_new, "原始RMSE": rmse_old, "合併新點RMSE": rmse_new }) importances[mtype+"_old"] = imp_old importances[mtype+"_new"] = imp_new except Exception as model_e: rows.append({ "模型": mtype, "原始R2": f"Error: {model_e}", "合併新點R2": f"Error: {model_e}", "原始RMSE": f"Error: {model_e}", "合併新點RMSE": f"Error: {model_e}" }) table = pd.DataFrame(rows) result_tables[target] = table # 特徵重要性圖 features = list(X_old.columns) fig_fi = go.Figure() colors = [ "#4E79A7", "#F28E2B", "#76B7B2", "#E15759", "#59A14F", "#EDC948", "#B07AA1", "#FF9DA7" ] plot_count = 0 for mtype in model_types: k_old = f"{mtype}_old" k_new = f"{mtype}_new" if k_old in importances and np.sum(importances[k_old]) > 0: fig_fi.add_trace(go.Bar( x=features, y=importances[k_old], name=f"{mtype} - Before", marker_color=colors[plot_count % len(colors)], opacity=0.75 )) plot_count += 1 if k_new in importances and np.sum(importances[k_new]) > 0: fig_fi.add_trace(go.Bar( x=features, y=importances[k_new], name=f"{mtype} - After", marker_color=colors[plot_count % len(colors)], opacity=0.4 )) plot_count += 1 fig_fi.update_layout( barmode="group", title=f"{target} Feature Importance Comparison", xaxis_title="Feature", yaxis_title="Importance", legend_title="Model", font=dict(size=13) ) feature_figs[target] = fig_fi # y分布圖 fig_y = go.Figure() fig_y.add_trace(go.Histogram( x=old_df[target], name="Before", opacity=0.7, nbinsx=16 )) fig_y.add_trace(go.Histogram( x=new_df[target], name="After", opacity=0.7, nbinsx=16 )) fig_y.update_layout( barmode='overlay', title=f"{target} y Distribution Comparison", xaxis_title=target, yaxis_title="Count", legend_title="Dataset", font=dict(size=13) ) ydist_figs[target] = fig_y # 摘要 # 給摘要更精簡 for _, row in table.iterrows(): try: r2_before = float(row['原始R2']) r2_after = float(row['合併新點R2']) model = row['模型'] delta = r2_after - r2_before if r2_after < r2_before - 0.1: summary_lines.append(f"{target} - {model}:顯著下降(R² {r2_before:.2f} → {r2_after:.2f})") elif r2_after > r2_before + 0.1: summary_lines.append(f"{target} - {model}:提升(R² {r2_before:.2f} → {r2_after:.2f})") elif abs(delta) < 0.1: summary_lines.append(f"{target} - {model}:無明顯變化(R² {r2_before:.2f} → {r2_after:.2f})") except Exception: summary_lines.append(f"{target} - {row['模型']}:計算失敗/資料不足") # 回傳 dict,供 Tab 動態展示 return result_tables, feature_figs, ydist_figs, "### AI多 y 回歸比較摘要\n" + "
".join(summary_lines) # = 自動根據關鍵字偵測數據集y欄位 = def detect_y_columns(csv_file, keyword_str): if csv_file is None: return gr.update(choices=[], value=[]) try: df = pd.read_csv(csv_file.name if hasattr(csv_file, "name") else csv_file) keywords = [k.strip().lower() for k in keyword_str.split(",") if k.strip()] cols = [] for c in df.columns: # 關鍵字優先 if any(k in str(c).lower() for k in keywords): cols.append(c) # 若沒關鍵字命中,則用 fallback:所有數值欄位 if not cols: cols = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])] # 避免太多,最多只選4個 if len(cols) > 4: cols = cols[-4:] return gr.update(choices=cols, value=cols[:2]) except Exception: return gr.update(choices=[], value=[]) # [3D/2D分布、反應面、等高線視覺化] # = 3D散點圖 + 預測曲面圖(隨機森林擬合)= def plot_3d_scatter_surface( file, x_col, y_col, z_col, target_col, surface_fit=False, # 是否繪製曲面 n_grid=40 # 曲面分辨率 ): # 預設空圖 empty_fig = go.Figure() empty_fig.update_layout(template="plotly_white") if file is None or not x_col or not y_col or not z_col or not target_col: return empty_fig, empty_fig try: df = pd.read_csv(file.name if hasattr(file, "name") else file) # 防呆 if not all([c in df.columns for c in [x_col, y_col, z_col, target_col]]): return empty_fig, empty_fig # 三維散點圖 fig_scatter = px.scatter_3d( df, x=x_col, y=y_col, z=z_col, color=target_col, title=f"3D Scatter: {x_col}, {y_col}, {z_col} / Color={target_col}", opacity=0.85 ) # 三維曲面圖 fig_surface = empty_fig if surface_fit: # 只取指定欄位 X = df[[x_col, y_col, z_col]].values y = df[target_col].values # fit一個隨機森林 model = RandomForestRegressor(n_estimators=80, random_state=0) model.fit(X, y) # 建立網格 x_lin = np.linspace(df[x_col].min(), df[x_col].max(), n_grid) y_lin = np.linspace(df[y_col].min(), df[y_col].max(), n_grid) z_lin = np.linspace(df[z_col].min(), df[z_col].max(), n_grid) xx, yy, zz = np.meshgrid(x_lin, y_lin, z_lin) grid_points = np.c_[xx.ravel(), yy.ravel(), zz.ravel()] yy_pred = model.predict(grid_points) # 用scatter3d畫預測點 fig_surface = go.Figure(data=[ go.Scatter3d( x=grid_points[:,0], y=grid_points[:,1], z=grid_points[:,2], mode='markers', marker=dict(size=2.2, color=yy_pred, colorscale='Viridis', opacity=0.35), name="預測表面" ) ]) fig_surface.update_layout( title=f"3D Predicted Surface: {target_col} vs {x_col}, {y_col}, {z_col}", scene=dict( xaxis_title=x_col, yaxis_title=y_col, zaxis_title=z_col ) ) return fig_scatter, fig_surface except Exception as e: # 除錯用途可log e,但前端只回傳空圖 return empty_fig, empty_fig # = 自動解析欄位,更新3D視覺化的欄位選單 = def update_dropdowns(file): print("收到 file:", file) if file is None: print("沒有收到檔案") return (["(請選擇)"], "(請選擇)"), (["(請選擇)"], "(請選擇)"), (["(請選擇)"], "(請選擇)"), (["(請選擇)"], "(請選擇)") try: path = file.name if hasattr(file, "name") else file print("預計讀取路徑:", path) print("檔案存在?", os.path.exists(path)) df = pd.read_csv(path) cols = list(df.select_dtypes(include="number").columns) if not cols: cols = list(df.columns) x_def = cols[0] if len(cols) > 0 else "(請選擇)" y_def = cols[1] if len(cols) > 1 else x_def z_def = cols[2] if len(cols) > 2 else x_def t_def = cols[-1] if len(cols) > 0 else x_def print("預設值:", x_def, y_def, z_def, t_def) return (cols, x_def), (cols, y_def), (cols, z_def), (cols, t_def) except Exception as e: print("讀檔失敗:", e) return (["(請選擇)"], "(請選擇)"), (["(請選擇)"], "(請選擇)"), (["(請選擇)"], "(請選擇)"), (["(請選擇)"], "(請選擇)") # = 二變數反應面/等高線圖(3D Surface/Contour)= def plot_surface_and_contour(file, x_col, y_col, z_col, n_grid=40): empty_fig = go.Figure() empty_fig.update_layout(template="plotly_white") empty_fig2 = go.Figure() empty_fig2.update_layout(template="plotly_white") if file is None or not x_col or not y_col or not z_col: return empty_fig, empty_fig2 try: df = pd.read_csv(file.name if hasattr(file, "name") else file) if not all([c in df.columns for c in [x_col, y_col, z_col]]): return empty_fig, empty_fig2 x, y, z = df[x_col].values, df[y_col].values, df[z_col].values # 建立格點 xi = np.linspace(x.min(), x.max(), n_grid) yi = np.linspace(y.min(), y.max(), n_grid) xi, yi = np.meshgrid(xi, yi) zi = griddata((x, y), z, (xi, yi), method="cubic") # Surface 3D 曲面圖 fig_surface = go.Figure(data=[ go.Surface(x=xi, y=yi, z=zi, colorscale="Viridis", opacity=0.93, showscale=True) ]) fig_surface.update_layout( title=f"3D 曲面圖:{z_col} vs {x_col}, {y_col}", scene=dict( xaxis_title=x_col, yaxis_title=y_col, zaxis_title=z_col ), margin=dict(l=0, r=0, b=0, t=40) ) # Contour 等高線圖 fig_contour = go.Figure(data=[ go.Contour( x=xi[0], y=yi[:,0], z=zi, colorscale="Viridis", contours=dict(showlabels=True), colorbar=dict(title=z_col) ) ]) fig_contour.update_layout( title=f"等高線圖:{z_col} vs {x_col}, {y_col}", xaxis_title=x_col, yaxis_title=y_col, margin=dict(l=0, r=0, b=0, t=40) ) return fig_surface, fig_contour except Exception as e: print(f"3D surface/contour plot error: {e}") return empty_fig, empty_fig2 # [複合y批次多模型回歸/交互作用] # = 根據模型名稱產生多y回歸可用的模型物件 = def get_model(name): if name == "Random Forest": return MultiOutputRegressor(RandomForestRegressor(n_estimators=100, random_state=0)) elif name == "XGBoost": return MultiOutputRegressor(XGBRegressor(n_estimators=100, random_state=0)) elif name == "PLS Regression": return PLSRegression(n_components=2) elif name == "Ridge": return MultiOutputRegressor(Ridge()) elif name == "Lasso": return MultiOutputRegressor(Lasso()) elif name == "ElasticNet": return MultiOutputRegressor(ElasticNet()) elif name == "Linear Regression": return MultiOutputRegressor(LinearRegression()) elif name == "SVR": return MultiOutputRegressor(SVR()) else: raise ValueError(f"Unknown model: {name}") # = 根據關鍵字從資料中自動偵測多y欄位 = def detect_y_columns(file, keywords_str): import re if file is None: return gr.update(choices=[], value=[]) df = pd.read_csv(file.name if hasattr(file, 'name') else file) keywords = [kw.strip() for kw in keywords_str.split(",") if kw.strip()] patt = re.compile("|".join([re.escape(k) for k in keywords]), re.IGNORECASE) y_candidates = [c for c in df.columns if patt.search(str(c))] return gr.update(choices=list(df.columns), value=y_candidates) # = 多y/交互作用/多模型的批次回歸主程式 = def run_multi_y(before_file, after_file, linear, nonlinear, ylist, add_inter, add_y_inter, degree): df = pd.read_csv(before_file.name if hasattr(before_file, 'name') else before_file) if not ylist or not (linear or nonlinear): return "請選擇目標y欄位與模型", *[""]*4, *[None]*12 X = df.drop(columns=ylist) Y = df[ylist] X = X.select_dtypes(include=[np.number]) # 1. 特徵交互作用 if add_inter and int(degree) > 1: poly = PolynomialFeatures(degree=int(degree), interaction_only=True, include_bias=False) X_inter = pd.DataFrame(poly.fit_transform(X), columns=poly.get_feature_names_out(X.columns)) else: X_inter = X.copy() # 2. y 交互作用 if add_y_inter and len(ylist) > 1: for i in range(len(ylist)): for j in range(i+1, len(ylist)): Y[f"{ylist[i]}*{ylist[j]}"] = Y[ylist[i]] * Y[ylist[j]] X_train, X_test, Y_train, Y_test = train_test_split(X_inter, Y, test_size=0.2, random_state=42) # 3. 多模型分析 model_names = (linear or []) + (nonlinear or []) results, tab_results = [], [] for m in model_names: model = get_model(m) model.fit(X_train, Y_train) pred = model.predict(X_test) if isinstance(pred, np.ndarray): pred = pd.DataFrame(pred, columns=Y.columns) else: pred = pd.DataFrame(pred, columns=Y.columns) scores = {y: r2_score(Y_test[y], pred[y]) for y in Y.columns} rmses = {y: np.sqrt(mean_squared_error(Y_test[y], pred[y])) for y in Y.columns} model_summary = pd.DataFrame({ "Model": [m]*len(Y.columns), "y": list(Y.columns), "R2": [scores[y] for y in Y.columns], "RMSE": [rmses[y] for y in Y.columns] }) results.append(model_summary) for i, y in enumerate(Y.columns[:4]): # ==== Robust 特徵重要性自動判斷 ==== if hasattr(model, "estimators_"): est = model.estimators_[i] else: est = model # 樹模型 if hasattr(est, "feature_importances_"): importances = est.feature_importances_ # 線性模型 elif hasattr(est, "coef_"): # 注意 shape coef = est.coef_ if coef.ndim > 1: importances = np.abs(coef[i]) else: importances = np.abs(coef) # PLS (Partial Least Squares) elif hasattr(est, "x_weights_"): importances = np.abs(est.x_weights_[:, 0]) # 其餘模型 fallback else: importances = np.zeros(X_inter.shape[1]) feat_names = X_inter.columns # ====== 畫圖 ======= fig_feat = go.Figure([go.Bar(x=feat_names, y=importances)]) fig_feat.update_layout(title=f"{m} {y} Feature Importances", height=440) fig_dist = go.Figure() fig_dist.add_trace(go.Histogram(x=Y_test[y], name='True', opacity=0.7)) fig_dist.add_trace(go.Histogram(x=pred[y], name='Pred', opacity=0.7)) fig_dist.update_layout(barmode='overlay', title=f"{m} {y} True vs Pred Dist", height=440) tab_results.append((f"【{m}】y: {y}", model_summary, fig_feat, fig_dist)) # 結果組合 out_titles, out_tables, out_feats, out_ydists = [], [], [], [] N=8 for i in range(N): if i < len(tab_results): tab = tab_results[i] out_titles.append(tab[0]) out_tables.append(tab[1]) out_feats.append(tab[2]) out_ydists.append(tab[3]) else: out_titles.append("") out_tables.append(None) out_feats.append(None) out_ydists.append(None) summary = pd.concat(results, ignore_index=True) if results else "" return summary, *out_titles, *out_tables, *out_feats, *out_ydists # [自動function管理核心] import re def extract_tab_ui_and_function(pyfile="app.py"): """ 抓每個Tab所有UI元件,以及其是否綁定function/lambda/callback """ try: with open(pyfile, encoding="utf-8") as f: code = f.read() except Exception as e: return f"❌ 讀取 {pyfile} 失敗:{e}" # (1) 抓所有 def defs = set(re.findall(r"def\s+([a-zA-Z_][\w\d_]*)\s*\(", code)) # (2) 抓所有 Tab 區塊 tab_pattern = re.compile( r'with gr\.Tab(?:Item)?\(\s*[\'"](.+?)[\'"]\s*\):([\s\S]*?)(?=with gr\.Tab|with gr\.TabItem|\Z)', re.MULTILINE) # (3) UI元件建立(如 gr.Markdown、gr.Dataframe、gr.File、gr.Button...) ui_pattern = re.compile(r'(gr\.[A-Za-z_]+)\s*\(') # (4) 互動事件 event_pattern = re.compile( r'([a-zA-Z_][\w\d_]*)\.(click|change|submit|select)\(\s*([a-zA-Z_][\w\d_]*|lambda)(.*?)(\)|$)', re.DOTALL) output = "# 🧩 各Tab UI元件與function/lambda/callback mapping\n\n" for m in tab_pattern.finditer(code): tab_name, tab_code = m.group(1), m.group(2) output += f"## {tab_name}\n" # 1. 抓所有 UI元件名稱 ui_list = ui_pattern.findall(tab_code) # 統計有幾種元件 ui_count = {} for u in ui_list: ui_count[u] = ui_count.get(u, 0) + 1 if ui_list: output += "### 本Tab使用UI元件:\n" for u in sorted(set(ui_list)): output += f"- `{u}` x {ui_count[u]}\n" else: output += "- (本Tab沒有任何UI元件)\n" # 2. 掃描本Tab互動callback func_map = [] for ev in event_pattern.findall(tab_code): obj, trigger, fn, args, _ = ev if fn == "lambda": func_map.append(f"{obj}.{trigger} → lambda(匿名)") else: func_map.append(f"{obj}.{trigger} → {fn}()") # callback 參數 cb_matches = re.findall(r'(_callback|_js|_preprocess|_postprocess)\s*=\s*([a-zA-Z_][\w\d_]*|lambda[^\),]*)', args) for cb_type, cb_fn in cb_matches: if cb_fn.strip().startswith("lambda"): func_map.append(f"{obj}.{cb_type} → lambda") else: func_map.append(f"{obj}.{cb_type} → {cb_fn.strip()}()") if func_map: output += "\n### 有callback的元件/方法:\n" for item in func_map: output += f"- {item}\n" else: output += "\n- 本Tab所有UI皆為純靜態,無綁定function\n" output += "\n" return output def extract_tab_functions_with_lambda_and_callback(pyfile="app.py"): """ 同時抓取每個Tab的所有UI元件,與互動function/lambda/callback mapping。 """ try: with open(pyfile, encoding="utf-8") as f: code = f.read() except Exception as e: return f"❌ 讀取 {pyfile} 失敗:{e}" # (1) 抓所有 def defs = set(re.findall(r"def\s+([a-zA-Z_][\w\d_]*)\s*\(", code)) # (2) 抓所有 Tab 區塊 tab_pattern = re.compile( r'with gr\.Tab(?:Item)?\(\s*[\'"](.+?)[\'"]\s*\):([\s\S]*?)(?=with gr\.Tab|with gr\.TabItem|\Z)', re.MULTILINE) # (3) UI元件建立(如 gr.Markdown、gr.Dataframe、gr.File、gr.Button...) ui_pattern = re.compile(r'(gr\.[A-Za-z_]+)\s*\(') # (4) 互動事件 event_pattern = re.compile( r'([a-zA-Z_][\w\d_]*)\.(click|change|submit|select)\(\s*([a-zA-Z_][\w\d_]*|lambda)(.*?)(\)|$)', re.DOTALL) output = "# 🧩 各Tab UI元件與function/lambda/callback mapping\n\n" for m in tab_pattern.finditer(code): tab_name, tab_code = m.group(1), m.group(2) output += f"## {tab_name}\n" # 1. 抓所有 UI元件名稱 ui_list = ui_pattern.findall(tab_code) # 統計有幾種元件 ui_count = {} for u in ui_list: ui_count[u] = ui_count.get(u, 0) + 1 if ui_list: output += "### 本Tab使用UI元件:\n" for u in sorted(set(ui_list)): output += f"- `{u}` x {ui_count[u]}\n" else: output += "- (本Tab沒有任何UI元件)\n" # 2. 掃描本Tab互動callback func_map = [] for ev in event_pattern.findall(tab_code): obj, trigger, fn, args, _ = ev if fn == "lambda": func_map.append(f"{obj}.{trigger} → lambda(匿名)") else: func_map.append(f"{obj}.{trigger} → {fn}()") # callback 參數 cb_matches = re.findall(r'(_callback|_js|_preprocess|_postprocess)\s*=\s*([a-zA-Z_][\w\d_]*|lambda[^\),]*)', args) for cb_type, cb_fn in cb_matches: if cb_fn.strip().startswith("lambda"): func_map.append(f"{obj}.{cb_type} → lambda") else: func_map.append(f"{obj}.{cb_type} → {cb_fn.strip()}()") if func_map: output += "\n### 有callback的元件/方法:\n" for item in func_map: output += f"- {item}\n" else: output += "\n- 本Tab所有UI皆為純靜態,無綁定function\n" output += "\n" return output def extract_all_functions(pyfile="app.py"): import re try: with open(pyfile, encoding="utf-8") as f: code = f.read() except Exception as e: return f"❌ 讀取 {pyfile} 失敗:{e}" func_pattern = re.compile( r"^(def [a-zA-Z_][\w\d_]*\(.*?\):(?:\n(?: |\t).*)*)", re.MULTILINE) output = "## 📃 所有 function 定義\n" matches = func_pattern.findall(code) if not matches: return "❗ 沒有抓到任何 function (def)!" for func in matches: func_name = re.match(r"def ([a-zA-Z_][\w\d_]*)", func) output += f"---\n### `{func_name.group(1) if func_name else '?'}()`\n" func_lines = func.split("\n") if len(func_lines) > 10: output += "```python\n" + "\n".join(func_lines[:10]) + "\n... (略)\n```\n" else: output += "```python\n" + func + "\n```\n" return output # ======================== Gradio 多分頁主UI ======================== with gr.Blocks(css=custom_css) as demo: gr.Markdown("## AI化實驗設計與數據分析平台 ") with gr.Tabs(): # 1️⃣ 標準DoE設計分布 with gr.Tab("1️⃣ 標準DoE "): gr.Markdown(""" ### 🧪 標準DoE設計分布 - 支援自動產生參數空間內的多種經典DoE設計法(LHS, Sobol, Halton, Uniform) - 可視覺化設計點分布、產生對應的設計參數表 - 支援下載CSV檔、表格內容可複製 **如何使用:** - 1️⃣ 填寫參數名稱、範圍及步進 - 2️⃣ 設定要產生的組數與亂數種子(可選) - 3️⃣ 點選「產生設計+分布圖」 - 4️⃣ 下方各分頁可檢視不同設計法的結果、分布圖與自動摘要 **注意事項:** - 所有參數名稱需唯一、不得重複 - 組數愈大,運算與繪圖所需時間會增加 - 請檢查參數範圍、步進格式是否正確 """) with gr.Row(): with gr.Column(scale=1, min_width=240): with gr.Accordion("參數設定", open=True): param_table = gr.Dataframe( headers=["名稱", "最小值", "最大值", "間隔(step)"], datatype=["str", "number", "number", "number"], row_count=(3, "dynamic"), col_count=(4, "fixed"), value=[["A", 10, 20, 2], ["B", 100, 200, 25], ["C", 1, 2, 0.5]], label="參數設定" ) n_samples = gr.Number(label="組數", value=8, precision=0) seed = gr.Number(label="亂數種子(留空或0為隨機)", value=42, precision=0) btn = gr.Button("🪄 產生設計與分布圖", elem_classes=["main-btn"]) with gr.Column(scale=2): with gr.Accordion("分布結果/圖表/摘要", open=True): tabs = [] for name in ["LHS", "Sobol", "Halton", "Uniform"]: with gr.Tab(name): df = gr.Dataframe(label=f"{name} 設計點表格") fig = gr.Plot(label=f"{name} 設計分布") csv = gr.File(label="下載CSV📥") desc = gr.Markdown() summary = gr.Markdown() tabs.extend([df, fig, csv, desc, summary]) btn.click(compare_all_designs, inputs=[param_table, n_samples, seed], outputs=tabs) # 2️⃣ 進階DoE(Box-Behnken/CCD) with gr.Tab("2️⃣ 進階DoE(Box-Behnken/CCD)"): gr.Markdown(""" ### 🧪 進階DoE (Box-Behnken/CCD) - 支援 Box-Behnken 與 中心組合設計 (CCD) 兩種進階DoE設計法 - 同步產生標準化設計矩陣與對應實際參數表 - 提供一鍵下載CSV,方便後續AI建模 **如何使用:** - 1️⃣ 設定各參數的最小、最大值及間隔 - 2️⃣ 選擇所需的設計法(Box-Behnken或CCD) - 3️⃣ 點「產生進階DoE設計」即自動產生全部設計點 **注意事項:** - 參數欄位請完整填寫,勿留空 - 各參數區間需合理,否則設計點數量可能異常 - 若需大規模設計點,運算會稍久,請耐心等待 """) with gr.Row(): with gr.Column(scale=1, min_width=240): with gr.Accordion("參數設定", open=True): param_table2 = gr.Dataframe( headers=["名稱", "最小值", "最大值", "間隔"], datatype=["str", "number", "number", "number"], row_count=(3, "dynamic"), col_count=(4, "fixed"), value=[["溫度", 80, 120, 10], ["壓力", 1, 5, 1], ["pH", 6, 8, 1]], label="參數設定" ) design_type = gr.Radio(["Box-Behnken", "CCD"], value="Box-Behnken", label="設計法") run_btn = gr.Button("🪄 產生進階DoE設計", elem_classes=["main-btn"]) with gr.Column(scale=2): with gr.Accordion("設計矩陣/參數表", open=True): out_std = gr.Dataframe(label="標準化設計矩陣") download_std = gr.File(label="下載標準矩陣CSV📥") out_real = gr.Dataframe(label="實際參數表") download_real = gr.File(label="下載參數表CSV📥") run_btn.click( advanced_doe_with_mapping, inputs=[param_table2, design_type], outputs=[out_std, out_real, download_std, download_real] ) # 3️⃣ AI建模/特徵重要性/SHAP with gr.Tab("3️⃣ AI建模/特徵重要性/SHAP"): gr.Markdown(""" ### 🧠 AI建模/特徵重要性/SHAP - 支援多種線性、非線性AI回歸模型,自動化訓練與模型評估 - 一鍵產生預測結果、模型效能指標、特徵重要性圖、SHAP全圖解釋 - 輕鬆檢視哪些參數對y預測最關鍵 **如何使用:** - 1️⃣ 上傳DoE結果CSV,選擇目標y欄位 - 2️⃣ 勾選需比較的AI模型(可多選) - 3️⃣ 可選擇是否顯示SHAP解釋圖 - 4️⃣ 點「一鍵訓練+特徵重要性」,即可檢視全部結果 **注意事項:** - 資料需為數值型且無遺漏值 - 目標y欄位不可有重複 - 資料量太小時,部分模型可能無法有效學習 """) with gr.Row(): with gr.Column(scale=1, min_width=320): with gr.Accordion("上傳/選模型", open=True): datafile = gr.File(label="上傳DoE結果CSV📤", file_types=[".csv"]) test_ratio = gr.Slider(label="測試集比例", value=0.3, minimum=0.1, maximum=0.5, step=0.05) algo_linear = gr.CheckboxGroup( ["Linear Regression", "Lasso", "Ridge", "ElasticNet"], value=[], label="線性回歸" ) algo_nonlinear = gr.CheckboxGroup( ["Random Forest", "XGBoost", "LightGBM", "SVR"], value=["Random Forest"], label="非線性回歸" ) show_shap = gr.Checkbox(label="進階SHAP解釋", value=False) btn_ai = gr.Button("🚀 一鍵訓練", elem_classes=["main-btn"]) with gr.Column(scale=2): with gr.Accordion("預測/重要性圖", open=True): predfig = gr.Plot(label="📊 預測/實際對比圖") met_df = gr.Dataframe(label="模型效能指標", datatype="auto") summary = gr.Markdown(visible=True) feat_summary = gr.Markdown(visible=True) feat_fig = gr.Plot(label="特徵重要性") shap_img = gr.Image(label="SHAP解釋圖") btn_ai.click( lambda file, lin, nonlin, ratio, shap_flag: train_and_predict_with_importance( file, (lin or []) + (nonlin or []), ratio, True, shap_flag ), inputs=[datafile, algo_linear, algo_nonlinear, test_ratio, show_shap], outputs=[predfig, met_df, summary, feat_summary, feat_fig, shap_img] ) # 4️⃣ 多圖資料視覺化+2D/3D/等高線 with gr.Tab("4️⃣ 數值資料視覺化處理"): gr.Markdown(""" ### 📊 多圖資料視覺化 + 2D/3D/等高線分析 - 多種常用資料視覺化工具(熱圖、pairplot、直方圖、PCA等) - 支援三維散點、曲面、2D等高線等專業圖形 - 可自選圖形、快速比較變數分布 **如何使用:** - 1️⃣ 上傳資料CSV,選擇要產生的視覺化圖種類 - 2️⃣ 點「產生多圖分析」可一次顯示多種圖表 - 3️⃣ 三維分布:指定x、y、z軸變數(或目標欄位),生成3D散點/曲面圖 - 4️⃣ 2D/3D反應面:輸入要分析的變數組合,產生等高線/曲面圖 **注意事項:** - 欄位名稱需為英文/數字,不支援特殊字元 - 缺值過多資料會自動忽略 - 若圖形異常請檢查欄位型態及範圍 """) # --- (1) 多圖區塊 with gr.Row(): with gr.Column(scale=1, min_width=230): upfile2 = gr.File(label="上傳資料CSV📤") plot_select = gr.CheckboxGroup( ["Heatmap", "Pairplot", "Histogram", "Scatter Matrix", "PCA"], value=["Heatmap", "Pairplot", "Histogram", "PCA"], label="視覺化圖" ) vizbtn = gr.Button("📊 產生多圖分析", elem_classes=["main-btn"]) with gr.Column(scale=3): vis_outs = [] for i in range(5): vis_outs.extend([gr.Plot(label=f"圖像{i+1}"), gr.Markdown(), gr.Markdown()]) recomm_card = gr.Markdown(visible=True, value="", elem_id="recommend-card") vizbtn.click( lambda f, t: (*multi_viz(f, t), auto_recommendation(f)), inputs=[upfile2, plot_select], outputs=vis_outs + [recomm_card] ) # --- (2) 3D變數分布/曲面 gr.Markdown("#### 🧬 三維分析:3D變數分布/曲面圖") with gr.Row(): with gr.Column(scale=1, min_width=260): columns_md = gr.Markdown(label="資料欄位", value="請先上傳資料CSV,欄位將自動顯示") x_col = gr.Textbox(label="X軸欄位", placeholder="如 A", interactive=True) y_col = gr.Textbox(label="Y軸欄位", placeholder="如 B", interactive=True) z_col = gr.Textbox(label="Z軸欄位", placeholder="如 C", interactive=True) target_col = gr.Textbox(label="目標Y/顏色", placeholder="如 y", interactive=True) surface_flag = gr.Checkbox(label="顯示三維曲面", value=False) plot_btn = gr.Button("🧊 生成3D散點/曲面圖", elem_classes=["main-btn"]) with gr.Column(scale=3): fig_scatter_out = gr.Plot(label="3D散點圖") fig_surface_out = gr.Plot(label="3D預測曲面圖") upfile2.change( lambda file: f"資料欄位:A, B, C, y" if file else "請先上傳資料", inputs=[upfile2], outputs=[columns_md] ) plot_btn.click( plot_3d_scatter_surface, inputs=[upfile2, x_col, y_col, z_col, target_col, surface_flag], outputs=[fig_scatter_out, fig_surface_out] ) # --- (3) 二變數 3D 反應面/等高線 gr.Markdown("#### 🧬 二變數 3D 反應面/等高線圖") with gr.Row(): with gr.Column(scale=1, min_width=260): columns_md2 = gr.Markdown(label="資料欄位", value="請先上傳資料CSV,欄位將自動顯示") x_col2 = gr.Textbox(label="X軸欄位", placeholder="如 A", interactive=True) y_col2 = gr.Textbox(label="Y軸欄位", placeholder="如 B", interactive=True) z_col2 = gr.Textbox(label="目標Z(反應/產率/預測)", placeholder="如 y", interactive=True) surface2_btn = gr.Button("🧊 生成3D曲面+等高線圖", elem_classes=["main-btn"]) with gr.Column(scale=3): fig_surface2 = gr.Plot(label="3D曲面圖") fig_contour2 = gr.Plot(label="等高線圖") upfile2.change( lambda file: f"資料欄位:A, B, y" if file else "請先上傳資料", inputs=[upfile2], outputs=[columns_md2] ) surface2_btn.click( plot_surface_and_contour, inputs=[upfile2, x_col2, y_col2, z_col2], outputs=[fig_surface2, fig_contour2] ) # 5️⃣ 超參數/貝葉斯優化 with gr.Tab("5️⃣ 超參數/貝葉斯優化"): gr.Markdown(""" ### 🏆 超參數/貝葉斯優化 - 自動執行各類AI模型的超參數最佳化(貝葉斯法) - 即時繪出優化歷程,提供最佳參數組合與效能摘要 **如何使用:** - 1️⃣ 上傳DoE實驗結果CSV - 2️⃣ 勾選要優化的模型(可複選),設定最大迭代次數 - 3️⃣ 點「執行Bayes超參數優化」,自動開始優化並顯示所有歷程 **注意事項:** - 請確認資料充足且欄位型態正確 - 較複雜模型/高維空間下,優化需較多時間 - 迭代次數過少時,最佳值可能不穩定 """) with gr.Row(): with gr.Column(scale=1, min_width=230): with gr.Accordion("上傳/模型選擇", open=True): upfile3 = gr.File(label="📤 上傳DoE結果CSV", file_types=[".csv"]) model_sel = gr.CheckboxGroup( ["Random Forest", "XGBoost", "LightGBM", "SVR"], value=["XGBoost"], label="模型選擇(可複選)" ) n_iter = gr.Number(label="最大迭代次數", value=16, precision=0) bayes_btn = gr.Button("🚀 執行Bayes超參數優化", elem_classes=["main-btn"]) with gr.Column(scale=2): with gr.Accordion("優化歷程/結果", open=True): multi_fig = gr.Plot(label="所有模型Bayes優化歷程 (CV RMSE)") tab_figs = [] for mtype in ["Random Forest", "XGBoost", "LightGBM", "SVR"]: with gr.Tab(mtype): fig = gr.Plot(label=f"{mtype} 優化歷程") summary = gr.Markdown() best_param = gr.Markdown() tab_figs.extend([fig, summary, best_param]) bayes_btn.click( run_multi_bayes_optimization, inputs=[upfile3, model_sel, n_iter], outputs=[multi_fig] + tab_figs ) # 6️⃣ 智能推薦/混合策略/合併回填 with gr.Tab("6️⃣ 智能推薦/混合策略/合併回填"): gr.Markdown(""" ### 🌟 智能推薦/混合策略/合併回填 - AI自動推薦新實驗點、混合策略智能選點 - 一鍵搜尋最佳組合、合併新舊DoE資料 **如何使用:** - 1️⃣ 上傳現有DoE資料,選擇推薦模式與模型 - 2️⃣ 指定推薦點數與是否排除重複 - 3️⃣ 點「產生推薦點組合」可直接下載推薦點 - 4️⃣ 新舊資料合併:上傳原始與新實驗CSV,自動合併去重 **注意事項:** - 請確認欄位名稱一致、資料格式正確 - 合併時將以欄位名為主,自動排除重複點 - 推薦模式可同時多選,增加實驗多樣性 """) # --- (1) 多模型最佳化搜尋 with gr.Accordion("多模型最佳化搜尋", open=True): with gr.Row(): with gr.Column(scale=1, min_width=280): opt_file = gr.File(label="📤 上傳DoE資料(CSV)", file_types=[".csv"]) opt_model_sel = gr.CheckboxGroup( ["Random Forest", "XGBoost", "LightGBM", "SVR"], value=["Random Forest", "XGBoost"], label="最佳化用模型" ) direction = gr.Radio(["最大化", "最小化"], value="最大化", label="目標") is_discrete = gr.Checkbox(label="全部參數視為離散", value=False) n_iter2 = gr.Number(label="搜尋迭代次數", value=28, precision=0) btn_opt = gr.Button("🏆 搜尋AI預測最佳條件", elem_classes=["main-btn"]) with gr.Column(scale=2): opt_df = gr.Dataframe(label="最佳參數組合", datatype="auto") opt_txt = gr.Markdown() opt_desc = gr.Markdown() opt_sum = gr.Markdown() btn_opt.click( optimize_conditions, inputs=[opt_file, opt_model_sel, direction, is_discrete, n_iter2], outputs=[opt_df, opt_txt, opt_desc, opt_sum] ) # --- (2) 新點推薦 with gr.Accordion("新點推薦", open=False): with gr.Row(): with gr.Column(scale=1, min_width=280): rec_file = gr.File(label="📤 請上傳DoE資料(CSV)", file_types=[".csv"]) recommend_mode = gr.CheckboxGroup( ["探索型推薦", "混合策略推薦"], value=["探索型推薦"], label="推薦模式(可複選)" ) recommend_models = gr.CheckboxGroup( ["Random Forest", "XGBoost", "LightGBM", "SVR"], value=["Random Forest", "XGBoost"], label="模型選擇" ) recommend_n = gr.Number(label="推薦點數", value=4, precision=0) recommend_exclude = gr.Checkbox(label="排除現有點", value=True) recommend_btn = gr.Button("🎯 產生推薦點組合", elem_classes=["main-btn"]) with gr.Column(scale=2): recommend_out = gr.Markdown(label="推薦結果", value="") recommend_download_file = gr.File(label="📥 下載推薦點(回填y用)", interactive=False) recommend_btn.click( make_recommended_points, inputs=[rec_file, recommend_models, recommend_mode, recommend_n, recommend_exclude], outputs=[recommend_out, recommend_download_file] ) # --- (3) 合併回填 with gr.Accordion("合併回填資料", open=False): with gr.Row(): with gr.Column(scale=1, min_width=320): base_csv = gr.File(label="原始DoE資料(CSV)") new_csv = gr.File(label="新實驗資料(推薦點CSV)") merge_btn = gr.Button("🧩 自動合併/去重", elem_classes=["main-btn"]) merge_out = gr.File(label="📥 下載合併後資料") merge_btn.click( merge_csvs, inputs=[base_csv, new_csv], outputs=merge_out ) # AI模型回歸分析(多目標批次,支援交互作用/多模型/多y) with gr.Tab("7️⃣ AI模型回歸分析"): gr.Markdown(""" ### 🧠 AI模型回歸分析(多目標/多模型/交互作用) - 批次執行多種AI模型,支援多y、多特徵交互作用 - 各y可獨立檢視效能指標、重要性對比與預測分布 - 自動比較前後回填資料對AI效能之提升/變化 **如何使用:** - 1️⃣ 上傳原始DoE資料(CSV)及合併新點(CSV,可選) - 2️⃣ 選擇目標y欄位,可自動偵測或自行調整 - 3️⃣ 勾選所需AI模型、設定特徵交互作用階數 - 4️⃣ 點「批次回歸分析」,下方各分頁顯示每y結果 **注意事項:** - 交互作用階數設定愈高,特徵數量愈多,模型訓練愈慢 - 目標欄位過多,僅顯示前8個y的詳細結果 - 回填資料須與原始資料欄位一致 """) before_file = gr.File(label="原始DoE資料(CSV)") after_file = gr.File(label="🧩 合併新點DoE(CSV)") algo_linear = gr.CheckboxGroup( ["Linear Regression", "Lasso", "Ridge", "ElasticNet"], value=[], label="線性回歸" ) algo_nonlinear = gr.CheckboxGroup( ["Random Forest", "XGBoost", "PLS Regression", "SVR"], value=["Random Forest"], label="非線性回歸" ) # Degree控制 degree_select = gr.Dropdown([1, 2, 3], value=1, label="特徵交互作用階數 (degree)") add_inter = gr.Checkbox(label="特徵間交互作用 (x1*x2)", value=True) add_y_inter = gr.Checkbox(label="y間交互作用 (y1*y2)", value=False) y_keywords = gr.Textbox(label="目標欄位關鍵字 (逗號分隔)", value="y,目標,output,target") y_columns = gr.CheckboxGroup(label="目標y欄位 (可複選)", choices=[], value=[]) before_file.change( detect_y_columns, inputs=[before_file, y_keywords], outputs=y_columns ) y_keywords.change( detect_y_columns, inputs=[before_file, y_keywords], outputs=y_columns ) run_btn = gr.Button("🚀 批次回歸分析", elem_classes=["main-btn"]) summary_md = gr.Dataframe(label="所有模型-y效能總表") y_titles, y_tables, y_feats, y_ydists = [], [], [], [] with gr.Tabs() as tabs_container: for idx in range(8): with gr.TabItem(f"Tab{idx+1}"): y_title = gr.Markdown(value="") y_table = gr.Dataframe(label="模型效能比較表") with gr.Row(): y_feat = gr.Plot(label="特徵重要性對比圖") y_ydist = gr.Plot(label="y 分布對比圖") y_titles.append(y_title) y_tables.append(y_table) y_feats.append(y_feat) y_ydists.append(y_ydist) run_btn.click( run_multi_y, inputs=[before_file, after_file, algo_linear, algo_nonlinear, y_columns, add_inter, add_y_inter, degree_select], outputs=[summary_md, *y_titles, *y_tables, *y_feats, *y_ydists] ) # 平台說明/索引 with gr.Tab("⚙️平台說明與索引"): gr.Markdown(""" ## 🧭 功能說明 & 導航指南 本平台整合「自動實驗設計(DoE)」、「AI建模」、「資料視覺化」、「超參數優化」、「智能推薦」等模組,專為化學/材料/製程等工程應用打造,協助您**從設計點產生、數據分析到模型推薦,全流程自動化**! --- ### 🧰 主要功能分頁 - **1️⃣ 標準DoE設計分布** - 產生經典設計法(LHS、Sobol等)的多維參數設計點,便於建立模型訓練用基礎資料。 - 直觀展示每種設計點分布、支援結果下載。 - **2️⃣ 進階DoE(Box-Behnken/CCD)** - 支援正交型、中心組合等進階設計法,方便進行曲面反應分析(RSM)。 - 產生標準化設計矩陣、對應實際參數表。 - **3️⃣ AI建模/特徵重要性/SHAP** - 一鍵啟動多模型AI訓練、交叉驗證、特徵重要性排序、SHAP解釋。 - 適用於尋找關鍵變數與預測能力評估。 - **4️⃣ 多圖資料視覺化 + 2D/3D/等高線** - 提供各類視覺化工具(熱圖、pairplot、PCA、3D曲面/等高線)協助多角度理解數據分布。 - 支援高維資料降維與多種圖表疊合分析。 - **5️⃣ 超參數/貝葉斯優化** - 針對各種AI回歸模型自動進行超參數優化(如Random Forest、XGBoost等),即時檢視優化歷程與最佳參數。 - **6️⃣ 智能推薦/混合策略/合併回填** - 結合AI預測與探索性搜尋,自動推薦新實驗條件,並支援資料自動合併與去重。 - 適合推進次輪實驗設計及自動補齊數據。 - **7️⃣ AI模型回歸分析(多目標/多模型/交互作用)** - 支援多y欄位、多模型批次建模、特徵交互作用分析,詳細呈現各y的訓練/預測效能。 --- ### 📝 操作建議與常見注意事項 - **所有欄位均可直接點擊或複製表格內容,並可一鍵下載分析結果** - **CSV資料須為純數值型(中文欄位會自動支援,但建議用英文/數字命名)** - **回歸與建模功能建議資料組數大於10,避免過度擬合或模型效能不穩定** - **每個Tab下方皆有詳細分頁說明、注意事項,建議操作前先閱讀上方說明** --- ### 🏠 應用情境 - 多參數製程最佳化 - 原型實驗規劃(探索型/補點/混合設計) - 關鍵因子敏感度分析 - 自動推薦新實驗組合 - AI輔助反應機制推論與模型精度提升 --- **本平台持續優化,歡迎多加利用!** """) with gr.Tab("🧩 Function管理"): gr.Markdown("#### 自動偵測各分頁 UI 綁定 function / lambda / callback") mapping = extract_tab_functions_with_lambda_and_callback("app.py") gr.Markdown(value=mapping) gr.Markdown("#### 本程式所有 function 定義 (摘要)") gr.Markdown(value=extract_all_functions("app.py")) gr.Markdown("") demo.launch()