# Requirements (pip)
# gradio>=4.17.0
# pandas>=2.0.0
# numpy>=1.24.0
# matplotlib
# scikit-learn>=1.2.0
# pyDOE2
# Pillow
# xgboost>=2.0.0
# lightgbm>=4.0.0
# seaborn
# scipy>=1.10.0
# plotly>=5.16.0
# scikit-optimize>=0.9.0
# optuna (不需要)
# tpot>=0.12 (目前沒用到)
# shap
# tabulate


# import blocks

# [Gradio 基本UI]
import gradio as gr

# [數據處理/科學計算]
import numpy as np
import pandas as pd

# [DoE設計/取樣法]
from pyDOE2 import lhs, bbdesign, ccdesign
from scipy.stats.qmc import Sobol, Halton

# [AI/ML建模]
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
import xgboost as xgb
import lightgbm as lgb
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

# [模型訓練/評估/特徵選擇]
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.multioutput import MultiOutputRegressor
from sklearn.cross_decomposition import PLSRegression
from sklearn.preprocessing import PolynomialFeatures

# [可視化/統計分析]
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from scipy.stats import skew, kurtosis, shapiro
from scipy.interpolate import griddata
import shap

# [雜項工具/暫存檔]
import tempfile
import warnings
warnings.filterwarnings("ignore")
import io

# [貝葉斯/自動化優化]
from skopt import BayesSearchCV
from skopt.space import Real, Integer, Categorical
import optuna  # ← 若要支援optuna，這裡一定要加
import optuna.visualization.matplotlib
from scipy.optimize import minimize


# [互動頁面樣式]
custom_css = """
/* 主卡片 */
/* 用於主要內容區塊（如自動摘要卡片、資訊框）美化，圓角+陰影 */
.gr-card {
    background: #f7fafd;
    border-radius: 18px;
    box-shadow: 0 2px 8px #0002;
}

/* 按鈕樣式 */
/* 主要操作按鈕樣式：漸層、圓角、陰影、字體放大，滑鼠懸停有亮度與色調變化 */
.main-btn {
    font-size: 1.14em;
    padding: 10px 28px 11px 28px;
    border-radius: 999px;
    margin: 14px 0 8px 0;
    background: linear-gradient(90deg, #5b8cff, #76e7ff 70%);
    color: #fff;
    font-weight: 600;
    box-shadow: 0 2px 12px #3976d855;
    border: none;
    transition: .2s;
}
.main-btn:hover {
    filter: brightness(1.08);
    box-shadow: 0 2px 18px #5b8cff33;
    background: linear-gradient(90deg, #2259c9 55%, #4fa7d9);
}

/* 全平台字體 */
/* 全域字型套用 Noto Sans JP（優先日系風格），備用 Segoe UI 和 Arial，維持專案一致性 */
.gradio-container {
    font-family: 'Noto Sans JP', 'Segoe UI', Arial, sans-serif;
}

/* 區塊標題 */
/* 區塊或章節標題顯眼化，藍色、字重加粗 */
.section-title {
    font-size: 1.26em;
    font-weight: 700;
    color: #3976d8;
    margin-bottom: 6px;
}

/* Tab & Row 區塊間距 */
/* Tab、列區塊加上下間距，避免UI太擁擠 */
.tab-pane, .gr-row {
    padding: 8px 0;
}

/* 推薦小卡 */
/* 固定右下角推薦訊息小卡，用於快速提醒或建議，帶有柔和背景、陰影 */
#recommend-card {
    position: fixed; right: 28px; bottom: 28px; max-width: 360px;
    background: #f8fbffde; border-radius: 13px; box-shadow: 0 2px 14px #5ab2fa29;
    border-left: 5px solid #88a3e6; padding: 12px 18px 10px 14px; font-size: 1.07em;
    z-index: 9999; color: #285078;
}

/* 分隔線 */
/* 水平線設計：加厚、色彩淡藍灰，分隔內容用 */
hr {
    border: 0;
    border-bottom: 2.5px solid #e4e8f0;
    margin: 28px 0 22px 0;
}

/* Accordion 動畫 */
/* 摺疊區塊 summary 動畫與配色，展開時加上陰影強調 */
.accordion > summary {
    transition: .25s;
    background: #f4f8ff;
}
.accordion[open] > summary {
    background: #cfeaff;
}
.accordion[open] {
    box-shadow: 0 8px 24px #1ca7ec25;
}

/* Tab高亮 */
/* 當前選中 Tab 標籤高亮顯示，底色、字色、粗體 */
div[role="tab"][aria-selected="true"] {
    background: #e3f1ff !important;
    font-weight: bold;
    color: #2675ff;
}

/* 頁腳 */
/* Footer字體顏色與大小（淡灰、較小字） */
#footer {
    color: #888;
    font-size: 0.98em;
}
"""


# [UI 樣式工具/自動摘要卡片]

# = 產生HTML卡片說明 =
def make_card(title, tips):
    return f"""<div class='gr-card'><b>{title}</b><ul style='margin:0 0 0 12px'>{''.join(['<li>'+str(x)+'</li>' for x in tips])}</ul></div>"""

# = 根據圖形類型產生分圖說明卡片 =
def get_viz_desc(name):
    idx = {
        "DoE Main": ("DoE設計分布圖", [
            "檢查設計點分布是否均勻覆蓋整個空間",
            "若集中/離散，代表參數區間或設計法可優化",
            "高維會以降維(PCA)檢視主變異結構"
        ]),
        "Heatmap": ("Heatmap(相關係數)", [
            "檢查所有數值變數的正負相關",
            "紅色：強正相關；藍色：強負相關",
            "相關係數>0.7為高度相關，< -0.7為強負相關"
        ]),
        "Pairplot": ("Pairplot(成對散點)", [
            "展示任兩變數間的散點型態",
            "斜線型=高相關，圓形=低相關",
            "可發現集群、離群或特定結構"
        ]),
        "Histogram": ("Histogram(直方圖)", [
            "單變數分布形態檢查",
            "偏態、長尾、極端值需注意",
            "單峰/多峰、常態/非對稱可判斷資料型態"
        ]),
        "Scatter Matrix": ("Scatter Matrix(全變數關聯)", [
            "類似Pairplot但一次顯示所有成對分布",
            "對角線顯示每欄分布直方圖",
            "可發現明顯群集、離群"
        ]),
        "PCA": ("PCA(主成分分布)", [
            "多維特徵壓縮到2D",
            "檢查主變異來源、潛在群集",
            "可輔助檢查是否有明顯離群"
        ]),
        "AI Predict": ("AI預測對比圖", [
            "預測y與實際y對比，點貼近對角線代表高精度",
            "偏離對角線代表模型誤差大，建議優化特徵或模型"
        ]),
        "Bayesian Optimization": ("貝葉斯優化", [
            "自動搜尋最佳參數組合，減少無效試驗",
            "可用於AI模型超參數、或實驗設計優化",
            "優化歷程圖：看最佳值逐步收斂"
        ])
    }
    return make_card(*idx.get(name, ("", [])))

# = 根據視覺化類型自動產生總結 =
def auto_conclude_viz(df, vtype):
    out = []
    if vtype == "Heatmap":
        cor = df.corr()
        highcorr = cor.where(np.triu(np.ones(cor.shape), 1).astype(bool)).stack()
        hc = highcorr[abs(highcorr) > 0.7]
        if not hc.empty:
            for idx, v in hc.items():
                out.append(f"「{idx[0]}」與「{idx[1]}」高度相關 (corr={v:.2f})")
        else:
            out.append("無明顯高度相關特徵")
    elif vtype == "Histogram":
        for col in df.select_dtypes(include=np.number).columns:
            sk = skew(df[col].dropna())
            if abs(sk) > 2:
                out.append(f"「{col}」極端偏態({sk:.2f})")
        if not out:
            out.append("欄位分布大致對稱")
    elif vtype == "PCA":
        pca = PCA(n_components=2).fit(df.values)
        expvar = pca.explained_variance_ratio_.sum()
        out.append(f"前2主成分共解釋 {expvar*100:.1f}% 變異")
    elif vtype == "Pairplot":
        out.append("請留意有無線性排列（高相關）或明顯群集/異常點")
    elif vtype == "Scatter Matrix":
        out.append("集群或離群點可從圖中直接辨識")
    else:
        out.append("資料檢查完成")
    return make_card("AI自動結論", out)

# = 產生AI回歸自動總結卡片 =
def auto_conclude_ai(y_test, y_pred, name):
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    out = [
        f"模型：{name}",
        f"測試集 RMSE={rmse:.3g}",
        f"R²={r2:.2f}",
        ("模型表現佳" if r2 > 0.8 else "可進一步優化特徵/資料量")
    ]
    return make_card("AI自動結論", out)

# = 貝葉斯優化RMSE曲線自動解讀摘要 =
def auto_conclude_bayes_curve(rmse_curve, model_name=None):
    import numpy as np
    rmse_curve = np.array(rmse_curve)
    minv = np.min(rmse_curve)
    lastv = rmse_curve[-1]
    diff = np.ptp(rmse_curve)
    std = np.std(rmse_curve)
    trend = "平穩"
    if np.allclose(rmse_curve, rmse_curve[0], atol=0.2*std):
        trend = "幾乎無變化"
    elif rmse_curve[0] > minv and lastv > rmse_curve[0] and lastv > minv + std:
        trend = "尾端上升"
    elif np.argmin(rmse_curve) < len(rmse_curve)//2:
        trend = "快速下降收斂"
    elif std > 0.2 * minv and diff > 0.3 * minv:
        trend = "波動起伏"
    if trend == "快速下降收斂":
        comment = "RMSE 隨迭代明顯下降，代表最佳化收斂，已找到較佳參數組合。"
    elif trend == "幾乎無變化":
        comment = "RMSE 變動極小，代表模型/資料難以藉由超參數優化提升。"
    elif trend == "尾端上升":
        comment = "最後幾點 RMSE 明顯上升，建議忽略尾端結果，以最低點作最佳選擇。"
    elif trend == "波動起伏":
        comment = "RMSE 震盪明顯，代表模型不穩定或參數空間設過寬，建議縮小搜尋區間。"
    else:
        comment = "RMSE 變動趨勢平穩，可依最低點選定最佳參數。"
    model_str = f"【{model_name}】" if model_name else ""
    return f"{model_str}最低RMSE：**{minv:.3f}**\n- 收斂型態：**{trend}**\n- 建議：{comment}\n"


# [資料品質檢查/AutoML推薦]

# = 自動偵測資料品質與欄位異常 =
def auto_data_quality_check(datafile):
    if datafile is None:
        return "> 尚未上傳資料"
    df = pd.read_csv(datafile.name if hasattr(datafile, "name") else datafile)
    tips = []
    for col in df.columns:
        if df[col].isnull().sum() > 0:
            tips.append(f"「{col}」有缺值，建議補值或刪除")
        if df[col].nunique() == 1:
            tips.append(f"「{col}」為常數欄，建議刪除")
        if pd.api.types.is_numeric_dtype(df[col]):
            sk = (df[col].dropna().skew() if hasattr(df[col], "skew") else 0)
            if abs(sk) > 2:
                tips.append(f"「{col}」嚴重偏態（skew={sk:.2f}），建議標準化")
    if not tips:
        tips = ["資料品質良好，無明顯異常。"]
    return "<b>資料品質偵測：</b><ul style='margin:0 0 0 12px'>" + "".join([f"<li>{t}</li>" for t in tips]) + "</ul>"

# = 簡易AutoML，推薦最佳模型並產生程式碼 =
def automl_leaderboard(datafile):
    # (簡化) 這裡你可以調用 TPOT、auto-sklearn 或自己比較多組模型
    # 這裡直接隨機選一個
    best = np.random.choice(["XGBoost", "Random Forest", "LightGBM", "SVR"])
    code = f"""# 範例BestModel
from xgboost import XGBRegressor
model = XGBRegressor(n_estimators=120, random_state=0)
model.fit(X_train, y_train)"""
    return f"最佳模型推薦：<b>{best}</b>", code


# [DoE設計/推薦點生成]

# = 檢查新點是否與已存在點重複（向量距離法）=
def is_close_to_existing(xrow, existing_X, tol=1e-4):
    existing_X = np.asarray(existing_X)
    if existing_X.size == 0:
        return False
    diffs = np.abs(existing_X - np.array(xrow))
    if diffs.ndim == 1:
        return np.all(diffs < tol)
    return np.any(np.all(diffs < tol, axis=1))

# = 多模型AI推薦下一批DoE設計點（探索/最大化等模式）=
def suggest_next_doe_points_batch(
    datafile, 
    model_types = ["Random Forest", "XGBoost", "LightGBM", "SVR"], 
    mode = "最大化", n_points = 3, 
    exclude_existing = True,
    random_seed = 42,
    max_attempts_factor = 30,
    return_df = False
):
    import numpy as np
    from scipy.optimize import minimize
    import pandas as pd

    if datafile is None:
        if return_df:
            return pd.DataFrame()
        return "> 尚未上傳資料"
    df = pd.read_csv(datafile.name if hasattr(datafile, "name") else datafile)
    X, y = df.iloc[:, :-1], df.iloc[:, -1]
    colnames = X.columns

    # 訓練多模型
    models = []
    for t in model_types:
        if t == "Random Forest":
            models.append(RandomForestRegressor(n_estimators=120, random_state=random_seed))
        elif t == "XGBoost":
            models.append(XGBRegressor(n_estimators=120, random_state=random_seed, verbosity=0))
        elif t == "LightGBM":
            models.append(LGBMRegressor(n_estimators=120, random_state=random_seed))
        elif t == "SVR":
            models.append(make_pipeline(StandardScaler(), SVR()))
    for m in models:
        m.fit(X, y)

    bounds = [(X[c].min(), X[c].max()) for c in colnames]

    def ensemble_pred(xrow):
        preds = [m.predict(np.array(xrow).reshape(1, -1))[0] for m in models]
        return np.mean(preds), np.std(preds)

    if mode == "最大化":
        def obj(x): return -ensemble_pred(x)[0]
    elif mode == "最小化":
        def obj(x): return ensemble_pred(x)[0]
    elif mode == "不確定性":
        def obj(x): return -ensemble_pred(x)[1]
    else:
        def obj(x): return -ensemble_pred(x)[0]

    found_points, preds_mean, preds_std = [], [], []
    attempts = 0
    max_attempts = n_points * max_attempts_factor
    np.random.seed(random_seed)
    while len(found_points) < n_points and attempts < max_attempts:
        x0 = np.random.uniform([b[0] for b in bounds], [b[1] for b in bounds])
        res = minimize(obj, x0, bounds=bounds)
        best_x = res.x
        # 排除重複點
        exist = False
        if exclude_existing:
            if is_close_to_existing(best_x, X.values) or is_close_to_existing(best_x, np.array(found_points)):
                exist = True
        if exist:
            attempts += 1
            continue
        found_points.append(best_x)
        mean_pred, std_pred = ensemble_pred(best_x)
        preds_mean.append(mean_pred)
        preds_std.append(std_pred)
        attempts += 1

    if not found_points:
        if return_df:
            return pd.DataFrame()
        return "> 無法自動產生新點（參數範圍或步階過細、模型表現過於平坦）"

    # == DataFrame格式（for下載用）==
    if return_df:
        df_points = pd.DataFrame(found_points, columns=colnames)
        df_points["y"] = ""    # 給user回填
        df_points["模型平均預測"] = preds_mean
        df_points["不確定性(std)"] = preds_std
        df_points["推薦策略"] = mode
        return df_points

    # == Markdown文字版 ==
    if mode == "最大化":
        best_idx = int(np.argmax(preds_mean))
    elif mode == "最小化":
        best_idx = int(np.argmin(preds_mean))
    elif mode == "不確定性":
        best_idx = int(np.argmax(preds_std))
    else:
        best_idx = 0

    out = "<b>推薦次一輪DoE設計點（Top N）：</b><br>"
    for i, (best_x, mu, std) in enumerate(zip(found_points, preds_mean, preds_std), 1):
        flag = " 🏆 <b>【最推薦】</b>" if (i-1) == best_idx else ""
        out += f"<b>候選{i}{flag}：</b>"
        out += "<ul style='margin-top:0;margin-bottom:6px;'>"
        for c, v in zip(colnames, best_x):
            out += f"<li>{c} = {v:.3f}</li>"
        out += f"<li>平均預測產率：<b>{mu:.3f}</b></li><li>不確定性（std）：{std:.3f}</li>"
        if mode == "不確定性":
            out += "<li><i>（此點模型間意見分歧最大）</i></li>"
        out += "</ul>"
    return out

# = 智能混合策略推薦DoE新點（最大化、最小化、不確定性、隨機）=
def suggest_mixed_doe_points(
    datafile, 
    model_types = ["Random Forest", "XGBoost", "LightGBM", "SVR"], 
    n_total = 4,   # 推薦總點數
    exclude_existing = True,
    random_seed = 2025,
    return_df = False
):
    import numpy as np
    from scipy.optimize import minimize
    import pandas as pd

    if datafile is None:
        if return_df:
            return pd.DataFrame()
        return "> 尚未上傳資料"
    df = pd.read_csv(datafile.name if hasattr(datafile, "name") else datafile)
    X, y = df.iloc[:, :-1], df.iloc[:, -1]
    colnames = X.columns

    # 訓練多模型
    models = []
    for t in model_types:
        if t == "Random Forest":
            models.append(RandomForestRegressor(n_estimators=120, random_state=random_seed))
        elif t == "XGBoost":
            models.append(XGBRegressor(n_estimators=120, random_state=random_seed, verbosity=0))
        elif t == "LightGBM":
            models.append(LGBMRegressor(n_estimators=120, random_state=random_seed))
        elif t == "SVR":
            models.append(make_pipeline(StandardScaler(), SVR()))
    for m in models:
        m.fit(X, y)

    bounds = [(X[c].min(), X[c].max()) for c in colnames]

    def ensemble_pred(xrow):
        preds = [m.predict(np.array(xrow).reshape(1, -1))[0] for m in models]
        return np.mean(preds), np.std(preds)

    def obj_max(x): return -ensemble_pred(x)[0]
    def obj_min(x): return ensemble_pred(x)[0]
    def obj_uncert(x): return -ensemble_pred(x)[1]

    np.random.seed(random_seed)
    found_points, point_types, mu_list, std_list = [], [], [], []
    attempts = 0
    max_attempts = n_total * 30

    # 1. 最大化
    res1 = minimize(obj_max, np.random.uniform([b[0] for b in bounds], [b[1] for b in bounds]), bounds=bounds)
    x1 = res1.x
    if not (exclude_existing and is_close_to_existing(x1, X.values)):
        found_points.append(x1)
        point_types.append("最大化（exploit）")
        mu, std = ensemble_pred(x1)
        mu_list.append(mu)
        std_list.append(std)

    # 2. 最小化
    res2 = minimize(obj_min, np.random.uniform([b[0] for b in bounds], [b[1] for b in bounds]), bounds=bounds)
    x2 = res2.x
    if not (exclude_existing and (is_close_to_existing(x2, X.values) or is_close_to_existing(x2, np.array(found_points)))):
        found_points.append(x2)
        point_types.append("最小化（exploit）")
        mu, std = ensemble_pred(x2)
        mu_list.append(mu)
        std_list.append(std)

    # 3. 最大不確定性
    res3 = minimize(obj_uncert, np.random.uniform([b[0] for b in bounds], [b[1] for b in bounds]), bounds=bounds)
    x3 = res3.x
    if not (exclude_existing and (is_close_to_existing(x3, X.values) or is_close_to_existing(x3, np.array(found_points)))):
        found_points.append(x3)
        point_types.append("最大不確定性（exploration）")
        mu, std = ensemble_pred(x3)
        mu_list.append(mu)
        std_list.append(std)

    # 4. 隨機探索補滿
    while len(found_points) < n_total and attempts < max_attempts:
        x0 = np.random.uniform([b[0] for b in bounds], [b[1] for b in bounds])
        if exclude_existing and (is_close_to_existing(x0, X.values) or is_close_to_existing(x0, np.array(found_points))):
            attempts += 1
            continue
        found_points.append(x0)
        point_types.append("隨機探索")
        mu, std = ensemble_pred(x0)
        mu_list.append(mu)
        std_list.append(std)
        attempts += 1

    # === DataFrame for CSV 下載 ===
    if return_df:
        df_points = pd.DataFrame(found_points, columns=colnames)
        df_points["y"] = ""
        return df_points

    # === Markdown展示 ===
    out = "<b>智能推薦多重新DoE設計點（混合策略）</b><br>"
    for i, (best_x, mu, std, label) in enumerate(zip(found_points, mu_list, std_list, point_types), 1):
        flag = " 🏆 <b>【最推薦】</b>" if label.startswith("最大化") else ""
        out += f"<b>候選{i}{flag}：</b><i>{label}</i>"
        out += "<ul style='margin-top:0;margin-bottom:6px;'>"
        for c, v in zip(colnames, best_x):
            out += f"<li>{c} = {v:.3f}</li>"
        out += f"<li>平均預測產率：<b>{mu:.3f}</b></li><li>不確定性（std）：{std:.3f}</li>"
        if label.startswith("最大不確定性"):
            out += "<li><i>（模型分歧最大，探索新區域）</i></li>"
        out += "</ul>"
    return out


# = 組合各推薦策略生成新點，合併成單一下載檔 =
def make_recommended_points(file, models, modes, n, exclude):
    import pandas as pd
    import tempfile
    outs = []
    df_list = []
    n = int(n)
    if file is None or not modes or not models:
        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", encoding="utf-8")
        pd.DataFrame().to_csv(tmp.name, index=False)
        return "請確認已上傳資料、選模式與模型", tmp.name
    for mode in modes:
        if mode == "探索型推薦":
            out = suggest_next_doe_points_batch(file, models, "最大化", n, exclude, return_df=False)
            df = suggest_next_doe_points_batch(file, models, "最大化", n, exclude, return_df=True)
            outs.append(f"<b>【探索型推薦】</b><br>{out}")
            if isinstance(df, pd.DataFrame) and not df.empty:
                df_list.append(df)
        elif mode == "混合策略推薦":
            out = suggest_mixed_doe_points(file, models, n, exclude, return_df=False)
            df = suggest_mixed_doe_points(file, models, n, exclude, return_df=True)
            outs.append(f"<b>【混合策略推薦】</b><br>{out}")
            if isinstance(df, pd.DataFrame) and not df.empty:
                df_list.append(df)
    if df_list:
        all_df = pd.concat(df_list, ignore_index=True).drop_duplicates()
    else:
        all_df = pd.DataFrame()
    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", encoding="utf-8-sig")
    all_df.to_csv(tmp.name, index=False)
    tmp.flush()
    tmp.close()
    return "<br><hr>".join(outs), tmp.name   # ok, 兩個 output


# = 下載推薦DoE點組合（CSV）=
def download_recommended_points(file, models, mode, n, exclude):
    # 支援模式切換
    if mode == "混合策略推薦":
        df_points = suggest_mixed_doe_points(file, models, int(n), exclude, return_df=True)
    else:
        # 預設探索型推薦
        df_points = suggest_next_doe_points_batch(file, models, mode, int(n), exclude, return_df=True)
    if df_points is None or len(df_points) == 0:
        return None
    # 存成臨時檔
    with tempfile.NamedTemporaryFile(suffix=".csv", mode="w", delete=False, encoding="utf-8-sig") as f:
        df_points.to_csv(f.name, index=False)
        return f.name

# [資料合併/CSV工具]

# = 兩份CSV資料自動合併、去重、優先保留已填y者 =
def merge_csvs(base_csv, new_csv):
    import pandas as pd
    import tempfile
    if base_csv is None or new_csv is None:
        return None
    df1 = pd.read_csv(base_csv.name if hasattr(base_csv, "name") else base_csv)
    df2 = pd.read_csv(new_csv.name if hasattr(new_csv, "name") else new_csv)
    # 將y(目標)欄位填過的優先保留，未填y只補點不覆蓋
    key_cols = [c for c in df1.columns if c != "y"]
    merged = pd.concat([df1, df2], ignore_index=True)
    # 去重優先保留已填y者
    merged = merged.sort_values(by=["y"], ascending=[False]).drop_duplicates(subset=key_cols, keep="first")
    merged = merged.reset_index(drop=True)
    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", encoding="utf-8-sig")
    merged.to_csv(tmp.name, index=False)
    tmp.flush()
    tmp.close()
    return tmp.name

# [標準DoE設計/分布比較]

# = 檢查輸入參數列是否有效 =
def is_valid_row(row):
    if not isinstance(row, (list, tuple)) or len(row) < 4:
        return False
    try:
        if str(row[0]).strip() == "":
            return False
        float(row[1])
        float(row[2])
        float(row[3])
        return True
    except Exception:
        return False

# = 產生指定類型的標準DoE設計點（LHS/Sobol/Halton/Uniform）=
def gen_design(design_type, n_params, n_samples, param_lows, param_highs, param_steps, seed):
    if seed is not None and str(seed).strip() != "" and int(seed) != 0:
        my_seed = int(seed)
    else:
        my_seed = None

    if design_type == "LHS":
        if my_seed is not None:
            np.random.seed(my_seed)
        design = lhs(n_params, samples=n_samples, criterion='maximin')
    elif design_type == "Sobol":
        sampler = Sobol(d=n_params, scramble=True, seed=my_seed)
        design = sampler.random(n_samples)
    elif design_type == "Halton":
        sampler = Halton(d=n_params, scramble=True, seed=my_seed)
        design = sampler.random(n_samples)
    elif design_type == "Uniform":
        if my_seed is not None:
            np.random.seed(my_seed)
        design = np.random.rand(n_samples, n_params)
    else:
        raise ValueError("Unknown SFD type!")
    real_samples = np.zeros_like(design)
    for idx, (low, high, step) in enumerate(zip(param_lows, param_highs, param_steps)):
        real_samples[:, idx] = design[:, idx] * (high - low) + low
        if step > 0:
            real_samples[:, idx] = np.round((real_samples[:, idx] - low) / step) * step + low
        else:
            decimals = str(step)[::-1].find('.')
            real_samples[:, idx] = np.round(real_samples[:, idx], decimals)
        real_samples[:, idx] = np.clip(real_samples[:, idx], low, high)
    return pd.DataFrame(real_samples)

# = 2D參數分布圖（Plotly）=
def plot_scatter_2d(df, title):
    fig = px.scatter(df, x=df.columns[0], y=df.columns[1], title=title)
    return fig
    
# = 3D參數分布圖（Plotly）=
def plot_scatter_3d(df, title):
    fig = px.scatter_3d(df, x=df.columns[0], y=df.columns[1], z=df.columns[2], title=title)
    return fig

# = 多維參數成對散點圖（Plotly）=
def plot_pairplot(df, title):
    return px.scatter_matrix(df, title=title)

# = PCA主成分降維分布圖（Plotly）=
def plot_pca(df, title):
    X = df.values
    pca = PCA(n_components=2)
    X_pca = pca.fit_transform(X)
    df_pca = pd.DataFrame(X_pca, columns=['PCA1', 'PCA2'])
    return px.scatter(df_pca, x='PCA1', y='PCA2', title=title + " (PCA降維)")

# = 依參數設定產生所有主流程設計法（四種）並比較分布 =
def compare_all_designs(param_table, n_samples, seed):
    all_types = ["LHS", "Sobol", "Halton", "Uniform"]
    outs = []
    if isinstance(param_table, pd.DataFrame):
        param_table = param_table.values.tolist()
    param_names, param_lows, param_highs, param_steps = [], [], [], []
    for row in param_table:
        if not is_valid_row(row):
            continue
        try:
            param_names.append(str(row[0]).strip())
            param_lows.append(float(row[1]))
            param_highs.append(float(row[2]))
            param_steps.append(float(row[3]))
        except Exception:
            continue
    n_params = len(param_names)
    if n_params == 0:
        return pd.DataFrame({"提醒": ["請正確輸入至少一列參數"]}), None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None
    for des in all_types:
        df = gen_design(des, n_params, int(n_samples), param_lows, param_highs, param_steps, seed)
        df.columns = param_names
        # 主分布圖
        if n_params == 2:
            mainfig = plot_scatter_2d(df, des + " 分布圖")
        elif n_params == 3:
            mainfig = plot_scatter_3d(df, des + " 分布圖")
        elif n_params >= 4 and n_params <= 8:
            mainfig = plot_pairplot(df, des + " Pairplot")
        else:
            mainfig = plot_pca(df, des + " PCA")
        with tempfile.NamedTemporaryFile(delete=False, suffix=f'_{des}_design.csv', mode='w', encoding='utf-8-sig') as tmpfile:
            df.to_csv(tmpfile, index=False)
            outs.extend([df, mainfig, tmpfile.name, get_viz_desc("DoE Main"), auto_conclude_viz(df, "DoE Main")])
    return outs

# [進階DoE: Box-Behnken/CCD設計]

# = 將標準化DoE設計矩陣轉換為實際參數 =
def doe_normal_to_actual(doe_matrix, param_info):
    df = pd.DataFrame(doe_matrix)
    df_out = pd.DataFrame()
    param_names = []
    for i, info in enumerate(param_info):
        name, pmin, pmax, *_ = info
        param_names.append(name)
        pmin = float(pmin)
        pmax = float(pmax)
        vals = df.iloc[:, i].values
        mid = (pmin + pmax) / 2
        half_range = (pmax - pmin) / 2
        df_out[name] = mid + vals * half_range
    df_out = df_out.round(6)
    return df_out

# = 產生Box-Behnken或CCD設計法的標準化/實際參數表 =
def advanced_doe_with_mapping(param_table, design_type):
    param_list = []
    if isinstance(param_table, pd.DataFrame):
        values = param_table.values.tolist()
    else:
        values = param_table
    for row in values:
        try:
            if not str(row[0]).strip(): continue
            param_list.append([row[0], float(row[1]), float(row[2]), float(row[3])])
        except Exception:
            continue
    n_param = len(param_list)
    if n_param < 2:
        return (pd.DataFrame({"提醒":["請至少輸入2個參數"]}),
                pd.DataFrame(),
                None, None)

    # 產生DoE矩陣
    if design_type == "Box-Behnken":
        mat = bbdesign(n_param, center=1)
    elif design_type == "CCD":
        mat = ccdesign(n_param, center=(1,1), face='ccc')
    else:
        return (pd.DataFrame({"提醒":["不支援的設計法"]}),
                pd.DataFrame(),
                None, None)
    
    # 產生標準化表
    colnames = [f"X{i+1}" for i in range(n_param)]
    df_std = pd.DataFrame(mat, columns=colnames)
    
    # 實際參數表
    df_real = doe_normal_to_actual(mat, param_list)
    
    # CSV暫存檔
    std_fd, std_path = tempfile.mkstemp(suffix="_std.csv")
    df_std.to_csv(std_path, index=False)
    real_fd, real_path = tempfile.mkstemp(suffix="_real.csv")
    df_real.to_csv(real_path, index=False)
    
    return df_std, df_real, std_path, real_path

# [AI建模/特徵重要性/SHAP]

# = 獲取模型特徵重要性圖與摘要 =
def get_feature_importance(model, feature_names):
    if hasattr(model, "feature_importances_"):
        importances = model.feature_importances_
        indices = np.argsort(importances)[::-1]
        fig = px.bar(
            x=[feature_names[i] for i in indices], 
            y=importances[indices], 
            orientation='v', title="特徵重要性(Feature Importance)",
            labels={"x":"特徵","y":"重要性"}
        )
        top3 = ", ".join([feature_names[i] for i in indices[:3]])
        summary = f"最重要特徵前三名：{top3}"
        return fig, summary
    return None, "此模型無 feature_importances_"

# = 產生SHAP特徵解釋圖像檔 =
def get_shap_summary(model, X, feature_names):
    explainer = shap.TreeExplainer(model)
    shap_values = explainer.shap_values(X)
    plt.figure(figsize=(7,4))
    shap.summary_plot(shap_values, X, feature_names=feature_names, show=False)
    buf = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
    plt.tight_layout()
    plt.savefig(buf, format="png", bbox_inches='tight')
    plt.close()
    return buf.name

# = 訓練多模型、產生預測與特徵重要性/SHAP圖，返回各指標 =
def train_and_predict_with_importance(datafile, algos, test_ratio, show_importance=True, show_shap=False):
    if datafile is None or algos is None or len(algos) == 0:
        return None, pd.DataFrame({"提醒": ["請上傳DoE資料並選擇演算法"]}), "", "", None, ""
    df = pd.read_csv(datafile.name if hasattr(datafile, "name") else datafile)
    X = df.iloc[:, :-1].values
    y = df.iloc[:, -1].values
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_ratio, random_state=42)
    feature_names = list(df.columns[:-1])
    results = []
    y_pred_dict = {}
    outlines = []
    feature_fig = None
    feature_summary = ""
    shap_img = None

    for algo in algos:
        try:
            # 使用標準化管線讓小資料集下更穩定
            if algo == "Random Forest":
                model = RandomForestRegressor(n_estimators=150, random_state=0)
            elif algo == "XGBoost":
                model = xgb.XGBRegressor(n_estimators=120, random_state=0, verbosity=0)
            elif algo == "LightGBM":
                model = lgb.LGBMRegressor(n_estimators=120, random_state=0)
            elif algo == "SVR":
                model = make_pipeline(StandardScaler(), SVR())
            elif algo == "Linear Regression":
                model = make_pipeline(StandardScaler(), LinearRegression())
            elif algo == "Lasso":
                model = make_pipeline(StandardScaler(), Lasso(alpha=0.0001, max_iter=5000))
            elif algo == "Ridge":
                model = make_pipeline(StandardScaler(), Ridge())
            elif algo == "ElasticNet":
                model = make_pipeline(StandardScaler(), ElasticNet(alpha=0.0001))
            else:
                continue
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            # 避免y_pred為scalar
            if np.isscalar(y_pred):
                y_pred = np.full_like(y_test, y_pred)
            rmse = np.sqrt(mean_squared_error(y_test, y_pred))
            r2 = r2_score(y_test, y_pred)
            results.append({
                "模型": algo,
                "測試RMSE": rmse,
                "測試R²": r2,
                "訓練數": len(X_train),
                "測試數": len(X_test),
            })
            y_pred_dict[algo] = y_pred
            outlines.append(auto_conclude_ai(y_test, y_pred, algo))
            if show_importance and feature_fig is None and hasattr(getattr(model, "named_steps", model), "feature_importances_"):
                base_model = model.named_steps.get(list(model.named_steps)[-1], model) if hasattr(model, "named_steps") else model
                feature_fig, feature_summary = get_feature_importance(base_model, feature_names)
            if show_shap and shap_img is None and hasattr(getattr(model, "named_steps", model), "feature_importances_"):
                base_model = model.named_steps.get(list(model.named_steps)[-1], model) if hasattr(model, "named_steps") else model
                shap_img = get_shap_summary(base_model, X_test, feature_names)
        except Exception as e:
            print(f"模型 {algo} 失敗: {e}")
            continue

    res_df = pd.DataFrame(results)
    model_colors = {
        "Random Forest": "#7c82f6", "XGBoost": "#ff686b", "LightGBM": "#54b984", "SVR": "#7e4a99",
        "Linear Regression": "#229aff", "Lasso": "#f8d90f", "Ridge": "#9edafe", "ElasticNet": "#f9a15b"
    }
    model_markers = {
        "Random Forest": "circle", "XGBoost": "diamond", "LightGBM": "triangle-up", "SVR": "square",
        "Linear Regression": "star", "Lasso": "cross", "Ridge": "hexagon", "ElasticNet": "x"
    }

    fig = px.scatter()
    for algo, y_pred in y_pred_dict.items():
        # 只畫有效的預測點
        if y_pred is not None and len(y_pred) == len(y_test) and not np.isnan(y_pred).all():
            fig.add_scatter(
                x=y_test, y=y_pred, mode='markers', name=algo,
                marker=dict(
                    size=13, color=model_colors.get(algo, "#888"),
                    symbol=model_markers.get(algo, "circle"),
                    line=dict(width=1.5, color="#222")
                ),
                showlegend=True
            )
    minv, maxv = np.min(y_test), np.max(y_test)
    fig.add_scatter(
        x=[minv, maxv], y=[minv, maxv], mode='lines', name='Ideal',
        line=dict(dash='dash', color='black'), showlegend=True
    )
    fig.update_layout(
        title="Test Set Prediction（預測/實際）",
        xaxis_title="True Output", yaxis_title="Predicted",
        legend=dict(font=dict(size=17)), 
        margin=dict(l=40, r=20, t=60, b=40)
    )
    fig.update_yaxes(scaleanchor="x", scaleratio=1)
    return fig, res_df, get_viz_desc("AI Predict"), "<br>".join(outlines), feature_fig, shap_img

# [多圖資料視覺化/自動建議卡片]

# = 產生多種資料視覺化圖與摘要（最多五種）=
def multi_viz(file, plot_types):
    # 準備三個 list 裝圖、描述、總結
    figs, descs, sums = [], [], []
    # 防呆：如果沒檔案或沒選圖，直接回傳 15 個空 output
    if file is None or not plot_types:
        return [None, "", ""] * 5

    # 載入資料
    df = pd.read_csv(file.name if hasattr(file, "name") else file)
    plot_types = plot_types[:5]  # 最多五種

    # 每一種圖型各自 try，出錯補空
    for t in plot_types:
        fig, desc, summ = None, "", ""
        try:
            if t == "Heatmap":
                fig = px.imshow(df.corr(), text_auto=True, title="相關係數Heatmap")
                desc = get_viz_desc("Heatmap")
                summ = auto_conclude_viz(df, "Heatmap")
            elif t == "Pairplot":
                fig = px.scatter_matrix(df, title="資料 Pairplot")
                desc = get_viz_desc("Pairplot")
                summ = auto_conclude_viz(df, "Pairplot")
            elif t == "Histogram":
                fig = px.histogram(df, nbins=10, title="資料 Histogram")
                desc = get_viz_desc("Histogram")
                summ = auto_conclude_viz(df, "Histogram")
            elif t == "Scatter Matrix":
                fig = px.scatter_matrix(df, title="Scatter Matrix")
                desc = get_viz_desc("Scatter Matrix")
                summ = auto_conclude_viz(df, "Scatter Matrix")
            elif t == "PCA":
                X = df.values
                pca = PCA(n_components=2)
                X_pca = pca.fit_transform(X)
                df_pca = pd.DataFrame(X_pca, columns=['PCA1', 'PCA2'])
                fig = px.scatter(df_pca, x='PCA1', y='PCA2', title="PCA降維")
                desc = get_viz_desc("PCA")
                summ = auto_conclude_viz(df, "PCA")
        except Exception as e:
            print(f"[multi_viz error]: {t}", e)
            fig, desc, summ = None, "", ""
        figs.append(fig)
        descs.append(desc)
        sums.append(summ)

    # 不足5組的話補空
    while len(figs) < 5:
        figs.append(None)
        descs.append("")
        sums.append("")

    # 將三個 list interleave 成 [fig, desc, sum, ...]
    outs = []
    for i in range(5):
        outs.extend([figs[i], descs[i], sums[i]])

    # Debug：確認長度一定是15
    print("multi_viz 輸出數量：", len(outs))
    assert len(outs) == 15, f"multi_viz 輸出長度異常：{len(outs)}"

    return outs

# = 根據資料欄位自動產生系統建議 =
def auto_recommendation(file):
    df = pd.read_csv(file.name if hasattr(file, "name") else file)
    tips = []
    for col in df.columns:
        if df[col].isnull().sum() > 0:
            tips.append(f"「{col}」有缺值，建議補值或刪除")
        if df[col].nunique() == 1:
            tips.append(f"「{col}」為常數欄，建議刪除")
        if pd.api.types.is_numeric_dtype(df[col]):
            sk = skew(df[col].dropna())
            if abs(sk) > 2:
                tips.append(f"「{col}」嚴重偏態（skew={sk:.2f}），建議標準化")
    cor = df.corr().abs()
    for c1 in cor.columns:
        for c2 in cor.columns:
            if c1 != c2 and cor.loc[c1, c2] > 0.8:
                tips.append(f"「{c1}」與「{c2}」高度相關，建議後續特徵選擇")
    if not tips:
        tips = ["資料品質良好，無明顯異常。"]
    else:
        tips = tips[:5]
    return "<b>系統建議：</b><ul style='margin:0 0 0 12px'>" + "".join([f"<li>{t}</li>" for t in tips]) + "</ul>"

# = 多圖視覺化+自動建議卡片綁定 =
def multi_viz_and_recommend(file, plot_types):
    vis = multi_viz(file, plot_types)
    recomm = auto_recommendation(file)
    print("vis型態", type(vis), "vis長度", len(vis))
    result = (*vis, recomm)
    print("最終回傳型態", type(result), "長度", len(result))
    return result

with gr.Blocks() as demo:
    upfile2 = gr.File(label="上傳檔案")
    plot_select = gr.CheckboxGroup(
        choices=["Heatmap", "Pairplot", "Histogram", "Scatter Matrix", "PCA"],
        value=["Heatmap", "Pairplot", "Histogram", "PCA"],
        label="選擇圖像類型"
    )
    vis_outs = [gr.Plot() for _ in range(5)] + [gr.Markdown() for _ in range(10)]
    recomm_card = gr.Markdown()
    vizbtn = gr.Button("產生多圖分析", elem_classes=["main-btn"])
    vizbtn.click(
        lambda f, t: (*multi_viz(f, t), auto_recommendation(f)),
        inputs=[upfile2, plot_select],
        outputs=vis_outs + [recomm_card]
    )


# [貝葉斯優化/超參數搜尋]

model_spaces = {
    "XGBoost": (
        XGBRegressor(verbosity=0, random_state=42),
        {
            "max_depth": Integer(2, 10),
            "n_estimators": Integer(50, 300),
            "learning_rate": Real(0.01, 0.2, prior="log-uniform")
        }
    ),
    "Random Forest": (
        RandomForestRegressor(random_state=42),
        {
            "max_depth": Integer(2, 15),
            "n_estimators": Integer(50, 300)
        }
    ),
    "LightGBM": (
        LGBMRegressor(random_state=42),
        {
            "max_depth": Integer(2, 15),
            "n_estimators": Integer(50, 300),
            "learning_rate": Real(0.01, 0.2, prior="log-uniform")
        }
    ),
    "SVR": (
        SVR(),
        {
            "C": Real(0.01, 100, prior="log-uniform"),
            "gamma": Real(0.001, 1.0, prior="log-uniform")
        }
    )
}

# = 格式化最佳參數組合摘要（Markdown）=
def format_best_params(best_params, best_score):
    txt = "<b>最佳參數組合：</b><ul style='margin:0 0 0 16px;'>"
    for k, v in best_params.items():
        txt += f"<li><b>{k}</b>: {v}</li>"
    txt += f"</ul><b>最佳CV RMSE：</b> {best_score:.4f}"
    return txt

# = 單模型貝葉斯優化超參數、產生收斂曲線與自動摘要 =
def run_bayes_optimization(datafile, model_name, n_iter=20):
    if datafile is None:
        return "請上傳CSV資料", None, "", ""
    df = pd.read_csv(datafile.name if hasattr(datafile, "name") else datafile)
    X, y = df.iloc[:, :-1], df.iloc[:, -1]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    model, search_space = model_spaces.get(model_name, (None, None))
    if model is None:
        return "不支援的模型", None, "", ""
    if not search_space:
        model.fit(X_train, y_train)
        score = model.score(X_test, y_test)
        return f"{model_name}為單純模型，無可優化超參數。\n測試集R2={score:.3f}", None, "", ""
    
    opt = BayesSearchCV(
        model,
        search_spaces=search_space,
        n_iter=n_iter,
        scoring='neg_root_mean_squared_error',
        cv=3,
        n_jobs=-1,
        random_state=42,
        verbose=0
    )
    opt.fit(X_train, y_train)
    best_params = opt.best_params_
    best_score = -opt.best_score_

    results = opt.cv_results_["mean_test_score"]
    rmse_curve = -1 * results  # 轉正的RMSE

    fig, ax = plt.subplots(figsize=(6,3))
    ax.plot(rmse_curve, marker='o')
    ax.set_title("優化歷程 (CV RMSE)")
    ax.set_xlabel("Iteration")
    ax.set_ylabel("CV RMSE")
    ax.grid(True)
    plt.tight_layout()
    plt.close(fig)

    # === 自動判讀收斂趨勢，產生 markdown ===
    auto_summary = auto_conclude_bayes_curve(rmse_curve, model_name)

    return (
        format_best_params(best_params, best_score),  # 超參數最佳組合摘要（markdown）
        fig,                                         # RMSE收斂圖
        get_viz_desc("Bayesian Optimization"),        # 圖說
        auto_summary                                 # << 這裡就是自動結論
    )


# = 多模型同時Bayes優化，產生多條收斂曲線 =
def run_multi_bayes_optimization(datafile, model_types, n_iter=20):

    # UI要的模型順序
    MODEL_ORDER = ["Random Forest", "XGBoost", "LightGBM", "SVR"]
    N_PER_MODEL = 3  # [fig, summary, best_param]

    if datafile is None or not model_types:
        # 沒資料或沒選模型，直接全空
        return [None] + [None, "", ""] * len(MODEL_ORDER)
    
    df = pd.read_csv(datafile.name if hasattr(datafile, "name") else datafile)
    X, y = df.iloc[:, :-1], df.iloc[:, -1]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    rmse_curves = {}
    summaries = {}
    best_params_dict = {}

    for mtype in model_types:
        model, search_space = model_spaces.get(mtype, (None, None))
        if model is None or not search_space:
            rmse_curves[mtype] = [np.nan]
            summaries[mtype] = f"{mtype} 無法執行最佳化"
            best_params_dict[mtype] = f"{mtype} 不支援"
            continue
        opt = BayesSearchCV(
            model,
            search_spaces=search_space,
            n_iter=n_iter,
            scoring='neg_root_mean_squared_error',
            cv=3,
            n_jobs=-1,
            random_state=42,
            verbose=0
        )
        opt.fit(X_train, y_train)
        rmse_curve = -1 * opt.cv_results_["mean_test_score"]
        rmse_curves[mtype] = rmse_curve
        summaries[mtype] = auto_conclude_bayes_curve(rmse_curve, mtype)
        best_params_dict[mtype] = format_best_params(opt.best_params_, -opt.best_score_)

    # 多模型共線互動圖
    fig = go.Figure()
    for mtype in model_types:
        curve = rmse_curves.get(mtype, [])
        fig.add_trace(go.Scatter(
            x=list(range(len(curve))),
            y=curve,
            mode='lines+markers',
            name=mtype
        ))
    fig.update_layout(
        title="所有模型Bayes優化歷程 (CV RMSE)",
        xaxis_title="Iteration",
        yaxis_title="CV RMSE",
        hovermode="x unified"
    )
    outs = [fig]
    # 確保 outputs 長度一定固定
    for mtype in MODEL_ORDER:
        if mtype in model_types:
            # 有選就給內容
            curve = rmse_curves.get(mtype, [])
            fig_tab = go.Figure(go.Scatter(
                x=list(range(len(curve))),
                y=curve,
                mode='lines+markers',
                name=mtype
            ))
            summary = summaries.get(mtype, "")
            best_param = best_params_dict.get(mtype, "")
            outs.extend([fig_tab, summary, best_param])
        else:
            outs.extend([None, "", ""])
    return outs

# [AI搜尋最佳化條件]

# = 自動判斷欄位型態（連續/離散）以建立搜尋空間 =
def suggest_optimization_space(df, discrete_cols=[]):
    space = {}
    for col in df.columns[:-1]:
        if col in discrete_cols:
            vals = sorted(df[col].unique())
            space[col] = Categorical(vals)
        else:
            vmin, vmax = df[col].min(), df[col].max()
            if len(df[col].unique()) < 8:
                space[col] = Categorical(sorted(df[col].unique()))
            else:
                space[col] = Real(vmin, vmax)
    return space

# = 搜尋單模型之預測最佳/最差條件組合 =
def find_best_feature(model, X, maximize=True):
    print(">>> maximize=", maximize)  # Debug 印出
    # 建立每個feature的上下限作為搜尋範圍
    bounds = []
    for i in range(X.shape[1]):
        vmin, vmax = X.iloc[:, i].min(), X.iloc[:, i].max()
        bounds.append((vmin, vmax))
    def obj(x):
        y_pred = model.predict(np.array(x).reshape(1, -1))[0]
        print("obj() y_pred:", y_pred, "| maximize:", maximize)
        return -y_pred if maximize else y_pred
    x0 = X.mean().values
    res = minimize(obj, x0, bounds=bounds)
    best_x = res.x
    best_pred = -res.fun if maximize else res.fun
    return best_x, best_pred

# = 多模型搜尋最佳化參數組合（最大化/最小化）=
def optimize_conditions(datafile, model_types, direction, is_discrete=False, n_iter=32):
    if datafile is None or not model_types:
        return pd.DataFrame({"提醒": ["請上傳DoE數據與選擇模型"]}), "", "", ""  # 四個 output
    df = pd.read_csv(datafile.name if hasattr(datafile, "name") else datafile)
    X, y = df.iloc[:, :-1], df.iloc[:, -1]
    maximize = (direction == "最大化")

    summary = []
    all_results = []
    for mtype in model_types:
        if mtype == "Random Forest":
            model = RandomForestRegressor(n_estimators=160, random_state=42)
        elif mtype == "XGBoost":
            model = XGBRegressor(n_estimators=100, random_state=42, verbosity=0)
        elif mtype == "LightGBM":
            model = LGBMRegressor(n_estimators=100, random_state=42)
        elif mtype == "SVR":
            model = make_pipeline(StandardScaler(), SVR())
        elif mtype == "Linear Regression":
            model = make_pipeline(StandardScaler(), LinearRegression())
        elif mtype == "Lasso":
            model = make_pipeline(StandardScaler(), Lasso(alpha=0.0001, max_iter=5000))
        elif mtype == "Ridge":
            model = make_pipeline(StandardScaler(), Ridge())
        elif mtype == "ElasticNet":
            model = make_pipeline(StandardScaler(), ElasticNet(alpha=0.0001))
        else:
            continue
        model.fit(X, y)
        if "大" in str(direction):   # 有「大」字就是最大化
            maximize = True
        else:
            maximize = False
        best_x, best_pred = find_best_feature(model, X, maximize=maximize)
        feature_dict = {k: float(v) for k, v in zip(X.columns, best_x)}
        feature_dict["模型"] = mtype
        feature_dict["預測產率"] = float(best_pred)
        all_results.append(feature_dict)
        summary.append(
            f"【{mtype}】最佳反應條件：{feature_dict}<br>預測產率/目標值：<b>{best_pred:.3g}</b><br>"
        )

    df_out = pd.DataFrame(all_results)
    txt = "<br>".join(summary)
    desc = ""   # 可寫 "依據多模型搜尋理論極值"
    sum_ = ""   # 你可以回傳模型特性、附註等
    return df_out, txt, desc, sum_

# [AI模型訓練前後回歸比較]

# = 單一y，對比多模型在前後資料集的R2/RMSE與特徵重要性變化 =
def compare_models_before_after(old_csv, new_csv, model_linear, model_nonlinear, target):
    try:
        # 合併用戶選的模型
        model_types = (model_linear or []) + (model_nonlinear or [])
        if old_csv is None or new_csv is None or not model_types:
            return "⚠️ 請上傳原始/合併DoE並選擇模型", pd.DataFrame(), None, None

        old_df = pd.read_csv(old_csv.name if hasattr(old_csv, "name") else old_csv)
        new_df = pd.read_csv(new_csv.name if hasattr(new_csv, "name") else new_csv)

        if target not in old_df.columns or target not in new_df.columns:
            return f"⚠️ 缺少 '{target}' 欄位", pd.DataFrame(), None, None

        X_old, y_old = old_df.drop(columns=[target]), old_df[target]
        X_new, y_new = new_df.drop(columns=[target]), new_df[target]

        cv_num = min(5, len(y_old), len(y_new))
        if cv_num < 2:
            return "⚠️ 資料筆數太少，無法交叉驗證（至少需2列）", pd.DataFrame(), None, None

        rows = []
        importances = {}

        for mtype in model_types:
            if mtype == "Random Forest":
                model1 = RandomForestRegressor(n_estimators=120, random_state=42)
                model2 = RandomForestRegressor(n_estimators=120, random_state=42)
            elif mtype == "XGBoost":
                model1 = XGBRegressor(n_estimators=100, random_state=42, verbosity=0)
                model2 = XGBRegressor(n_estimators=100, random_state=42, verbosity=0)
            elif mtype == "LightGBM":
                model1 = LGBMRegressor(n_estimators=100, random_state=42)
                model2 = LGBMRegressor(n_estimators=100, random_state=42)
            elif mtype == "SVR":
                model1 = make_pipeline(StandardScaler(), SVR())
                model2 = make_pipeline(StandardScaler(), SVR())
            elif mtype == "Linear Regression":
                model1 = make_pipeline(StandardScaler(), LinearRegression())
                model2 = make_pipeline(StandardScaler(), LinearRegression())
            elif mtype == "Lasso":
                model1 = make_pipeline(StandardScaler(), Lasso(alpha=0.0001, max_iter=5000))
                model2 = make_pipeline(StandardScaler(), Lasso(alpha=0.0001, max_iter=5000))
            elif mtype == "Ridge":
                model1 = make_pipeline(StandardScaler(), Ridge())
                model2 = make_pipeline(StandardScaler(), Ridge())
            elif mtype == "ElasticNet":
                model1 = make_pipeline(StandardScaler(), ElasticNet(alpha=0.0001))
                model2 = make_pipeline(StandardScaler(), ElasticNet(alpha=0.0001))
            else:
                continue

            try:
                # R² (CV)
                r2_old = cross_val_score(model1, X_old, y_old, cv=cv_num, scoring="r2").mean()
                r2_new = cross_val_score(model2, X_new, y_new, cv=cv_num, scoring="r2").mean()
                # RMSE (fit on all, compute on all)
                model1.fit(X_old, y_old)
                model2.fit(X_new, y_new)
                pred_old = model1.predict(X_old)
                pred_new = model2.predict(X_new)
                rmse_old = np.sqrt(np.mean((pred_old - y_old) ** 2))
                rmse_new = np.sqrt(np.mean((pred_new - y_new) ** 2))
                # feature importance
                if hasattr(model1, "feature_importances_"):
                    imp_old = model1.feature_importances_
                    imp_new = model2.feature_importances_
                elif hasattr(model1, "named_steps") and "randomforestregressor" in model1.named_steps:
                    imp_old = model1.named_steps["randomforestregressor"].feature_importances_
                    imp_new = model2.named_steps["randomforestregressor"].feature_importances_
                elif hasattr(model1, "coef_"):
                    imp_old = np.abs(model1.coef_)
                    imp_new = np.abs(model2.coef_)
                elif hasattr(model1, "named_steps") and "linearregression" in model1.named_steps:
                    imp_old = np.abs(model1.named_steps["linearregression"].coef_)
                    imp_new = np.abs(model2.named_steps["linearregression"].coef_)
                else:
                    imp_old = np.zeros(X_old.shape[1])
                    imp_new = np.zeros(X_new.shape[1])

                rows.append({
                    "模型": mtype,
                    "原始R2": r2_old,
                    "合併新點R2": r2_new,
                    "原始RMSE": rmse_old,
                    "合併新點RMSE": rmse_new
                })
                # 只有計算成功的才存 importance
                importances[mtype+"_old"] = imp_old
                importances[mtype+"_new"] = imp_new

            except Exception as model_e:
                rows.append({
                    "模型": mtype,
                    "原始R2": f"Error: {model_e}",
                    "合併新點R2": f"Error: {model_e}",
                    "原始RMSE": f"Error: {model_e}",
                    "合併新點RMSE": f"Error: {model_e}"
                })
                # 不把異常模型 importance 放進去
                print(f"{mtype} failed: {model_e}")
                continue

        table = pd.DataFrame(rows)

        # ========== Feature Importance Plot (只畫有用的) ==========
        fig_fi = go.Figure()
        features = list(X_old.columns)
        colors = [
            "#4E79A7", "#F28E2B", "#76B7B2", "#E15759",
            "#59A14F", "#EDC948", "#B07AA1", "#FF9DA7"
        ]
        plot_count = 0  # 用於 color 依序遞增
        for mtype in model_types:
            k_old = f"{mtype}_old"
            k_new = f"{mtype}_new"
            # 只畫 importance > 0 的
            if k_old in importances and np.sum(importances[k_old]) > 0:
                fig_fi.add_trace(go.Bar(
                    x=features, y=importances[k_old],
                    name=f"{mtype} - Before", marker_color=colors[plot_count % len(colors)], opacity=0.75
                ))
                plot_count += 1
            if k_new in importances and np.sum(importances[k_new]) > 0:
                fig_fi.add_trace(go.Bar(
                    x=features, y=importances[k_new],
                    name=f"{mtype} - After", marker_color=colors[plot_count % len(colors)], opacity=0.4
                ))
                plot_count += 1
        fig_fi.update_layout(
            barmode="group",
            title="Feature Importance Comparison",
            xaxis_title="Feature",
            yaxis_title="Importance",
            legend_title="Model",
            font=dict(size=13)
        )

        # ========== y Distribution Plot ==========
        fig_y = go.Figure()
        fig_y.add_trace(go.Histogram(
            x=old_df[target], name="Before", opacity=0.7, nbinsx=16
        ))
        fig_y.add_trace(go.Histogram(
            x=new_df[target], name="After", opacity=0.7, nbinsx=16
        ))
        fig_y.update_layout(
            barmode='overlay',
            title="y Distribution Comparison",
            xaxis_title=target,
            yaxis_title="Count",
            legend_title="Dataset",
            font=dict(size=13)
        )

        return "", table, fig_fi, fig_y

    except Exception as e:
        import traceback
        tb = traceback.format_exc()
        print("=== DEBUG ERROR ===")
        print(tb)
        return f"❌ 系統發生錯誤：{str(e)}", pd.DataFrame(), None, None

# = 根據R2變化自動產生效能總結摘要 =        
def generate_r2_summary(table):
    lines = []
    for idx, row in table.iterrows():
        model = row['模型']
        try:
            r2_before = float(row['原始R2'])
            r2_after = float(row['合併新點R2'])
            delta = r2_after - r2_before
            if r2_after < r2_before - 0.1:
                lines.append(f"**{model}：表現顯著下降（R² {r2_before:.2f} → {r2_after:.2f}）**")
            elif r2_after > r2_before + 0.1:
                lines.append(f"{model}：模型表現提升（R² {r2_before:.2f} → {r2_after:.2f}）")
            elif abs(delta) < 0.1:
                lines.append(f"{model}：R²無明顯變化（R² {r2_before:.2f} → {r2_after:.2f}）")
        except Exception:
            lines.append(f"{model}：計算失敗或資料不足。")
    if not lines:
        lines = ["無有效模型結果。"]
    return "### AI模型R²比較摘要\n" + "\n".join(lines)

# = 整合前後回歸比較結果與自動摘要 =
def compare_models_before_after_with_summary(old_csv, new_csv, model_types, target="y"):
    result = compare_models_before_after(old_csv, new_csv, model_types, target)
    table = result[0]
    summary = generate_r2_summary(table)
    return (*result, summary)

# = 多y目標下，對比多模型在前後資料集的效能與特徵重要性變化 =
def compare_models_multi_y_before_after(old_csv, new_csv, model_types, targets):
    # 防呆
    if old_csv is None or new_csv is None or not model_types or not targets:
        return {}, {}, {}, "請確認已上傳檔案、選擇模型與目標欄位"

    old_df = pd.read_csv(old_csv.name if hasattr(old_csv, "name") else old_csv)
    new_df = pd.read_csv(new_csv.name if hasattr(new_csv, "name") else new_csv)
    if isinstance(targets, str): targets = [targets]

    result_tables, feature_figs, ydist_figs = {}, {}, {}
    summary_lines = []

    for target in targets:
        if target not in old_df.columns or target not in new_df.columns:
            summary_lines.append(f"❌ 欄位 {target} 在資料中不存在，略過。")
            continue

        X_old, y_old = old_df.drop(columns=[target]), old_df[target]
        X_new, y_new = new_df.drop(columns=[target]), new_df[target]
        cv_num = min(5, len(y_old), len(y_new))
        if cv_num < 2:
            summary_lines.append(f"⚠️ {target} 資料筆數不足無法交叉驗證。")
            continue

        rows, importances = [], {}
        for mtype in model_types:
            # 建立模型
            if mtype == "Random Forest":
                model1 = RandomForestRegressor(n_estimators=120, random_state=42)
                model2 = RandomForestRegressor(n_estimators=120, random_state=42)
            elif mtype == "XGBoost":
                model1 = XGBRegressor(n_estimators=100, random_state=42, verbosity=0)
                model2 = XGBRegressor(n_estimators=100, random_state=42, verbosity=0)
            elif mtype == "LightGBM":
                model1 = LGBMRegressor(n_estimators=100, random_state=42)
                model2 = LGBMRegressor(n_estimators=100, random_state=42)
            elif mtype == "SVR":
                model1 = make_pipeline(StandardScaler(), SVR())
                model2 = make_pipeline(StandardScaler(), SVR())
            elif mtype == "Linear Regression":
                model1 = make_pipeline(StandardScaler(), LinearRegression())
                model2 = make_pipeline(StandardScaler(), LinearRegression())
            elif mtype == "Lasso":
                model1 = make_pipeline(StandardScaler(), Lasso(alpha=0.0001, max_iter=5000))
                model2 = make_pipeline(StandardScaler(), Lasso(alpha=0.0001, max_iter=5000))
            elif mtype == "Ridge":
                model1 = make_pipeline(StandardScaler(), Ridge())
                model2 = make_pipeline(StandardScaler(), Ridge())
            elif mtype == "ElasticNet":
                model1 = make_pipeline(StandardScaler(), ElasticNet(alpha=0.0001))
                model2 = make_pipeline(StandardScaler(), ElasticNet(alpha=0.0001))
            else:
                continue

            try:
                r2_old = cross_val_score(model1, X_old, y_old, cv=cv_num, scoring="r2").mean()
                r2_new = cross_val_score(model2, X_new, y_new, cv=cv_num, scoring="r2").mean()
                model1.fit(X_old, y_old)
                model2.fit(X_new, y_new)
                pred_old = model1.predict(X_old)
                pred_new = model2.predict(X_new)
                rmse_old = np.sqrt(np.mean((pred_old - y_old) ** 2))
                rmse_new = np.sqrt(np.mean((pred_new - y_new) ** 2))

                # feature importance
                if hasattr(model1, "feature_importances_"):
                    imp_old = model1.feature_importances_
                    imp_new = model2.feature_importances_
                elif hasattr(model1, "named_steps") and "randomforestregressor" in model1.named_steps:
                    imp_old = model1.named_steps["randomforestregressor"].feature_importances_
                    imp_new = model2.named_steps["randomforestregressor"].feature_importances_
                elif hasattr(model1, "coef_"):
                    imp_old = np.abs(model1.coef_)
                    imp_new = np.abs(model2.coef_)
                elif hasattr(model1, "named_steps") and "linearregression" in model1.named_steps:
                    imp_old = np.abs(model1.named_steps["linearregression"].coef_)
                    imp_new = np.abs(model2.named_steps["linearregression"].coef_)
                else:
                    imp_old = np.zeros(X_old.shape[1])
                    imp_new = np.zeros(X_new.shape[1])

                rows.append({
                    "模型": mtype,
                    "原始R2": r2_old,
                    "合併新點R2": r2_new,
                    "原始RMSE": rmse_old,
                    "合併新點RMSE": rmse_new
                })
                importances[mtype+"_old"] = imp_old
                importances[mtype+"_new"] = imp_new
            except Exception as model_e:
                rows.append({
                    "模型": mtype,
                    "原始R2": f"Error: {model_e}",
                    "合併新點R2": f"Error: {model_e}",
                    "原始RMSE": f"Error: {model_e}",
                    "合併新點RMSE": f"Error: {model_e}"
                })

        table = pd.DataFrame(rows)
        result_tables[target] = table

        # 特徵重要性圖
        features = list(X_old.columns)
        fig_fi = go.Figure()
        colors = [
            "#4E79A7", "#F28E2B", "#76B7B2", "#E15759",
            "#59A14F", "#EDC948", "#B07AA1", "#FF9DA7"
        ]
        plot_count = 0
        for mtype in model_types:
            k_old = f"{mtype}_old"
            k_new = f"{mtype}_new"
            if k_old in importances and np.sum(importances[k_old]) > 0:
                fig_fi.add_trace(go.Bar(
                    x=features, y=importances[k_old],
                    name=f"{mtype} - Before", marker_color=colors[plot_count % len(colors)], opacity=0.75
                ))
                plot_count += 1
            if k_new in importances and np.sum(importances[k_new]) > 0:
                fig_fi.add_trace(go.Bar(
                    x=features, y=importances[k_new],
                    name=f"{mtype} - After", marker_color=colors[plot_count % len(colors)], opacity=0.4
                ))
                plot_count += 1
        fig_fi.update_layout(
            barmode="group",
            title=f"{target} Feature Importance Comparison",
            xaxis_title="Feature",
            yaxis_title="Importance",
            legend_title="Model",
            font=dict(size=13)
        )
        feature_figs[target] = fig_fi

        # y分布圖
        fig_y = go.Figure()
        fig_y.add_trace(go.Histogram(
            x=old_df[target], name="Before", opacity=0.7, nbinsx=16
        ))
        fig_y.add_trace(go.Histogram(
            x=new_df[target], name="After", opacity=0.7, nbinsx=16
        ))
        fig_y.update_layout(
            barmode='overlay',
            title=f"{target} y Distribution Comparison",
            xaxis_title=target,
            yaxis_title="Count",
            legend_title="Dataset",
            font=dict(size=13)
        )
        ydist_figs[target] = fig_y

        # 摘要
        # 給摘要更精簡
        for _, row in table.iterrows():
            try:
                r2_before = float(row['原始R2'])
                r2_after = float(row['合併新點R2'])
                model = row['模型']
                delta = r2_after - r2_before
                if r2_after < r2_before - 0.1:
                    summary_lines.append(f"<b>{target} - {model}</b>：顯著下降（R² {r2_before:.2f} → {r2_after:.2f}）")
                elif r2_after > r2_before + 0.1:
                    summary_lines.append(f"<b>{target} - {model}</b>：提升（R² {r2_before:.2f} → {r2_after:.2f}）")
                elif abs(delta) < 0.1:
                    summary_lines.append(f"<b>{target} - {model}</b>：無明顯變化（R² {r2_before:.2f} → {r2_after:.2f}）")
            except Exception:
                summary_lines.append(f"<b>{target} - {row['模型']}</b>：計算失敗/資料不足")
    # 回傳 dict，供 Tab 動態展示
    return result_tables, feature_figs, ydist_figs, "### AI多 y 回歸比較摘要\n" + "<br>".join(summary_lines)

# = 自動根據關鍵字偵測數據集y欄位 =
def detect_y_columns(csv_file, keyword_str):
    if csv_file is None:
        return gr.update(choices=[], value=[])
    try:
        df = pd.read_csv(csv_file.name if hasattr(csv_file, "name") else csv_file)
        keywords = [k.strip().lower() for k in keyword_str.split(",") if k.strip()]
        cols = []
        for c in df.columns:
            # 關鍵字優先
            if any(k in str(c).lower() for k in keywords):
                cols.append(c)
        # 若沒關鍵字命中，則用 fallback：所有數值欄位
        if not cols:
            cols = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
        # 避免太多，最多只選4個
        if len(cols) > 4:
            cols = cols[-4:]
        return gr.update(choices=cols, value=cols[:2])
    except Exception:
        return gr.update(choices=[], value=[])


# [3D/2D分布、反應面、等高線視覺化]

# = 3D散點圖 + 預測曲面圖（隨機森林擬合）=
def plot_3d_scatter_surface(
    file, 
    x_col, y_col, z_col, target_col, 
    surface_fit=False,  # 是否繪製曲面
    n_grid=40           # 曲面分辨率
):

    # 預設空圖
    empty_fig = go.Figure()
    empty_fig.update_layout(template="plotly_white")

    if file is None or not x_col or not y_col or not z_col or not target_col:
        return empty_fig, empty_fig
    try:
        df = pd.read_csv(file.name if hasattr(file, "name") else file)
        # 防呆
        if not all([c in df.columns for c in [x_col, y_col, z_col, target_col]]):
            return empty_fig, empty_fig

        # 三維散點圖
        fig_scatter = px.scatter_3d(
            df, x=x_col, y=y_col, z=z_col, color=target_col,
            title=f"3D Scatter: {x_col}, {y_col}, {z_col} / Color={target_col}",
            opacity=0.85
        )

        # 三維曲面圖
        fig_surface = empty_fig
        if surface_fit:
            # 只取指定欄位
            X = df[[x_col, y_col, z_col]].values
            y = df[target_col].values
            # fit一個隨機森林
            model = RandomForestRegressor(n_estimators=80, random_state=0)
            model.fit(X, y)
            # 建立網格
            x_lin = np.linspace(df[x_col].min(), df[x_col].max(), n_grid)
            y_lin = np.linspace(df[y_col].min(), df[y_col].max(), n_grid)
            z_lin = np.linspace(df[z_col].min(), df[z_col].max(), n_grid)
            xx, yy, zz = np.meshgrid(x_lin, y_lin, z_lin)
            grid_points = np.c_[xx.ravel(), yy.ravel(), zz.ravel()]
            yy_pred = model.predict(grid_points)
            # 用scatter3d畫預測點
            fig_surface = go.Figure(data=[
                go.Scatter3d(
                    x=grid_points[:,0], y=grid_points[:,1], z=grid_points[:,2],
                    mode='markers',
                    marker=dict(size=2.2, color=yy_pred, colorscale='Viridis', opacity=0.35),
                    name="預測表面"
                )
            ])
            fig_surface.update_layout(
                title=f"3D Predicted Surface: {target_col} vs {x_col}, {y_col}, {z_col}",
                scene=dict(
                    xaxis_title=x_col, yaxis_title=y_col, zaxis_title=z_col
                )
            )
        return fig_scatter, fig_surface
    except Exception as e:
        # 除錯用途可log e，但前端只回傳空圖
        return empty_fig, empty_fig

# = 自動解析欄位，更新3D視覺化的欄位選單 =
def update_dropdowns(file):
    print("收到 file:", file)
    if file is None:
        print("沒有收到檔案")
        return (["（請選擇）"], "（請選擇）"), (["（請選擇）"], "（請選擇）"), (["（請選擇）"], "（請選擇）"), (["（請選擇）"], "（請選擇）")
    try:
        path = file.name if hasattr(file, "name") else file
        print("預計讀取路徑:", path)
        print("檔案存在?", os.path.exists(path))
        df = pd.read_csv(path)
        cols = list(df.select_dtypes(include="number").columns)
        if not cols:
            cols = list(df.columns)
        x_def = cols[0] if len(cols) > 0 else "（請選擇）"
        y_def = cols[1] if len(cols) > 1 else x_def
        z_def = cols[2] if len(cols) > 2 else x_def
        t_def = cols[-1] if len(cols) > 0 else x_def
        print("預設值:", x_def, y_def, z_def, t_def)
        return (cols, x_def), (cols, y_def), (cols, z_def), (cols, t_def)
    except Exception as e:
        print("讀檔失敗：", e)
        return (["（請選擇）"], "（請選擇）"), (["（請選擇）"], "（請選擇）"), (["（請選擇）"], "（請選擇）"), (["（請選擇）"], "（請選擇）")


# = 二變數反應面/等高線圖（3D Surface/Contour）=
def plot_surface_and_contour(file, x_col, y_col, z_col, n_grid=40):

    empty_fig = go.Figure()
    empty_fig.update_layout(template="plotly_white")
    empty_fig2 = go.Figure()
    empty_fig2.update_layout(template="plotly_white")

    if file is None or not x_col or not y_col or not z_col:
        return empty_fig, empty_fig2

    try:
        df = pd.read_csv(file.name if hasattr(file, "name") else file)
        if not all([c in df.columns for c in [x_col, y_col, z_col]]):
            return empty_fig, empty_fig2

        x, y, z = df[x_col].values, df[y_col].values, df[z_col].values

        # 建立格點
        xi = np.linspace(x.min(), x.max(), n_grid)
        yi = np.linspace(y.min(), y.max(), n_grid)
        xi, yi = np.meshgrid(xi, yi)
        zi = griddata((x, y), z, (xi, yi), method="cubic")

        # Surface 3D 曲面圖
        fig_surface = go.Figure(data=[
            go.Surface(x=xi, y=yi, z=zi, colorscale="Viridis", opacity=0.93, showscale=True)
        ])
        fig_surface.update_layout(
            title=f"3D 曲面圖：{z_col} vs {x_col}, {y_col}",
            scene=dict(
                xaxis_title=x_col,
                yaxis_title=y_col,
                zaxis_title=z_col
            ),
            margin=dict(l=0, r=0, b=0, t=40)
        )

        # Contour 等高線圖
        fig_contour = go.Figure(data=[
            go.Contour(
                x=xi[0], y=yi[:,0], z=zi,
                colorscale="Viridis",
                contours=dict(showlabels=True),
                colorbar=dict(title=z_col)
            )
        ])
        fig_contour.update_layout(
            title=f"等高線圖：{z_col} vs {x_col}, {y_col}",
            xaxis_title=x_col,
            yaxis_title=y_col,
            margin=dict(l=0, r=0, b=0, t=40)
        )

        return fig_surface, fig_contour
    except Exception as e:
        print(f"3D surface/contour plot error: {e}")
        return empty_fig, empty_fig2


# [複合y批次多模型回歸/交互作用] 

# = 根據模型名稱產生多y回歸可用的模型物件 =
def get_model(name):
    if name == "Random Forest":
        return MultiOutputRegressor(RandomForestRegressor(n_estimators=100, random_state=0))
    elif name == "XGBoost":
        return MultiOutputRegressor(XGBRegressor(n_estimators=100, random_state=0))
    elif name == "PLS Regression":
        return PLSRegression(n_components=2)
    elif name == "Ridge":
        return MultiOutputRegressor(Ridge())
    elif name == "Lasso":
        return MultiOutputRegressor(Lasso())
    elif name == "ElasticNet":
        return MultiOutputRegressor(ElasticNet())
    elif name == "Linear Regression":
        return MultiOutputRegressor(LinearRegression())
    elif name == "SVR":
        return MultiOutputRegressor(SVR())
    else:
        raise ValueError(f"Unknown model: {name}")

# = 根據關鍵字從資料中自動偵測多y欄位 =
def detect_y_columns(file, keywords_str):
    import re
    if file is None:
        return gr.update(choices=[], value=[])
    df = pd.read_csv(file.name if hasattr(file, 'name') else file)
    keywords = [kw.strip() for kw in keywords_str.split(",") if kw.strip()]
    patt = re.compile("|".join([re.escape(k) for k in keywords]), re.IGNORECASE)
    y_candidates = [c for c in df.columns if patt.search(str(c))]
    return gr.update(choices=list(df.columns), value=y_candidates)

# = 多y/交互作用/多模型的批次回歸主程式 =
def run_multi_y(before_file, after_file, linear, nonlinear, ylist, add_inter, add_y_inter, degree):

    df = pd.read_csv(before_file.name if hasattr(before_file, 'name') else before_file)
    if not ylist or not (linear or nonlinear):
        return "請選擇目標y欄位與模型", *[""]*4, *[None]*12

    X = df.drop(columns=ylist)
    Y = df[ylist]
    X = X.select_dtypes(include=[np.number])
    # 1. 特徵交互作用
    if add_inter and int(degree) > 1:
        poly = PolynomialFeatures(degree=int(degree), interaction_only=True, include_bias=False)
        X_inter = pd.DataFrame(poly.fit_transform(X), columns=poly.get_feature_names_out(X.columns))
    else:
        X_inter = X.copy()
    # 2. y 交互作用
    if add_y_inter and len(ylist) > 1:
        for i in range(len(ylist)):
            for j in range(i+1, len(ylist)):
                Y[f"{ylist[i]}*{ylist[j]}"] = Y[ylist[i]] * Y[ylist[j]]
    X_train, X_test, Y_train, Y_test = train_test_split(X_inter, Y, test_size=0.2, random_state=42)
    # 3. 多模型分析
    model_names = (linear or []) + (nonlinear or [])
    results, tab_results = [], []
    for m in model_names:
        model = get_model(m)
        model.fit(X_train, Y_train)
        pred = model.predict(X_test)
        if isinstance(pred, np.ndarray):
            pred = pd.DataFrame(pred, columns=Y.columns)
        else:
            pred = pd.DataFrame(pred, columns=Y.columns)
        scores = {y: r2_score(Y_test[y], pred[y]) for y in Y.columns}
        rmses = {y: np.sqrt(mean_squared_error(Y_test[y], pred[y])) for y in Y.columns}
        model_summary = pd.DataFrame({
            "Model": [m]*len(Y.columns),
            "y": list(Y.columns),
            "R2": [scores[y] for y in Y.columns],
            "RMSE": [rmses[y] for y in Y.columns]
        })
        results.append(model_summary)
        for i, y in enumerate(Y.columns[:4]):
            # ==== Robust 特徵重要性自動判斷 ====
            if hasattr(model, "estimators_"):
                est = model.estimators_[i]
            else:
                est = model
            # 樹模型
            if hasattr(est, "feature_importances_"):
                importances = est.feature_importances_
            # 線性模型
            elif hasattr(est, "coef_"):
                # 注意 shape
                coef = est.coef_
                if coef.ndim > 1:
                    importances = np.abs(coef[i])
                else:
                    importances = np.abs(coef)
            # PLS (Partial Least Squares)
            elif hasattr(est, "x_weights_"):
                importances = np.abs(est.x_weights_[:, 0])
            # 其餘模型 fallback
            else:
                importances = np.zeros(X_inter.shape[1])
            feat_names = X_inter.columns
            # ====== 畫圖 =======
            fig_feat = go.Figure([go.Bar(x=feat_names, y=importances)])
            fig_feat.update_layout(title=f"{m} {y} Feature Importances", height=440)
            fig_dist = go.Figure()
            fig_dist.add_trace(go.Histogram(x=Y_test[y], name='True', opacity=0.7))
            fig_dist.add_trace(go.Histogram(x=pred[y], name='Pred', opacity=0.7))
            fig_dist.update_layout(barmode='overlay', title=f"{m} {y} True vs Pred Dist", height=440)
            tab_results.append((f"【{m}】y: {y}", model_summary, fig_feat, fig_dist))
    # 結果組合
    out_titles, out_tables, out_feats, out_ydists = [], [], [], []
    N=8
    for i in range(N):
        if i < len(tab_results):
            tab = tab_results[i]
            out_titles.append(tab[0])
            out_tables.append(tab[1])
            out_feats.append(tab[2])
            out_ydists.append(tab[3])
        else:
            out_titles.append("")
            out_tables.append(None)
            out_feats.append(None)
            out_ydists.append(None)
    summary = pd.concat(results, ignore_index=True) if results else ""
    return summary, *out_titles, *out_tables, *out_feats, *out_ydists


# [自動function管理核心]
import re

def extract_tab_ui_and_function(pyfile="app.py"):
    """
    抓每個Tab所有UI元件，以及其是否綁定function/lambda/callback
    """
    try:
        with open(pyfile, encoding="utf-8") as f:
            code = f.read()
    except Exception as e:
        return f"❌ 讀取 {pyfile} 失敗：{e}"

    # (1) 抓所有 def
    defs = set(re.findall(r"def\s+([a-zA-Z_][\w\d_]*)\s*\(", code))

    # (2) 抓所有 Tab 區塊
    tab_pattern = re.compile(
        r'with gr\.Tab(?:Item)?\(\s*[\'"](.+?)[\'"]\s*\):([\s\S]*?)(?=with gr\.Tab|with gr\.TabItem|\Z)', re.MULTILINE)

    # (3) UI元件建立（如 gr.Markdown、gr.Dataframe、gr.File、gr.Button...）
    ui_pattern = re.compile(r'(gr\.[A-Za-z_]+)\s*\(')
    # (4) 互動事件
    event_pattern = re.compile(
        r'([a-zA-Z_][\w\d_]*)\.(click|change|submit|select)\(\s*([a-zA-Z_][\w\d_]*|lambda)(.*?)(\)|$)', re.DOTALL)

    output = "# 🧩 各Tab UI元件與function/lambda/callback mapping\n\n"
    for m in tab_pattern.finditer(code):
        tab_name, tab_code = m.group(1), m.group(2)
        output += f"## {tab_name}\n"
        # 1. 抓所有 UI元件名稱
        ui_list = ui_pattern.findall(tab_code)
        # 統計有幾種元件
        ui_count = {}
        for u in ui_list:
            ui_count[u] = ui_count.get(u, 0) + 1
        if ui_list:
            output += "### 本Tab使用UI元件：\n"
            for u in sorted(set(ui_list)):
                output += f"- `{u}` x {ui_count[u]}\n"
        else:
            output += "- (本Tab沒有任何UI元件)\n"

        # 2. 掃描本Tab互動callback
        func_map = []
        for ev in event_pattern.findall(tab_code):
            obj, trigger, fn, args, _ = ev
            if fn == "lambda":
                func_map.append(f"{obj}.{trigger} → lambda（匿名）")
            else:
                func_map.append(f"{obj}.{trigger} → {fn}()")
            # callback 參數
            cb_matches = re.findall(r'(_callback|_js|_preprocess|_postprocess)\s*=\s*([a-zA-Z_][\w\d_]*|lambda[^\),]*)', args)
            for cb_type, cb_fn in cb_matches:
                if cb_fn.strip().startswith("lambda"):
                    func_map.append(f"{obj}.{cb_type} → lambda")
                else:
                    func_map.append(f"{obj}.{cb_type} → {cb_fn.strip()}()")
        if func_map:
            output += "\n### 有callback的元件/方法：\n"
            for item in func_map:
                output += f"- {item}\n"
        else:
            output += "\n- 本Tab所有UI皆為純靜態，無綁定function\n"
        output += "\n"
    return output

def extract_tab_functions_with_lambda_and_callback(pyfile="app.py"):
    """
    同時抓取每個Tab的所有UI元件，與互動function/lambda/callback mapping。
    """
    try:
        with open(pyfile, encoding="utf-8") as f:
            code = f.read()
    except Exception as e:
        return f"❌ 讀取 {pyfile} 失敗：{e}"

    # (1) 抓所有 def
    defs = set(re.findall(r"def\s+([a-zA-Z_][\w\d_]*)\s*\(", code))

    # (2) 抓所有 Tab 區塊
    tab_pattern = re.compile(
        r'with gr\.Tab(?:Item)?\(\s*[\'"](.+?)[\'"]\s*\):([\s\S]*?)(?=with gr\.Tab|with gr\.TabItem|\Z)', re.MULTILINE)

    # (3) UI元件建立（如 gr.Markdown、gr.Dataframe、gr.File、gr.Button...）
    ui_pattern = re.compile(r'(gr\.[A-Za-z_]+)\s*\(')
    # (4) 互動事件
    event_pattern = re.compile(
        r'([a-zA-Z_][\w\d_]*)\.(click|change|submit|select)\(\s*([a-zA-Z_][\w\d_]*|lambda)(.*?)(\)|$)', re.DOTALL)

    output = "# 🧩 各Tab UI元件與function/lambda/callback mapping\n\n"
    for m in tab_pattern.finditer(code):
        tab_name, tab_code = m.group(1), m.group(2)
        output += f"## {tab_name}\n"
        # 1. 抓所有 UI元件名稱
        ui_list = ui_pattern.findall(tab_code)
        # 統計有幾種元件
        ui_count = {}
        for u in ui_list:
            ui_count[u] = ui_count.get(u, 0) + 1
        if ui_list:
            output += "### 本Tab使用UI元件：\n"
            for u in sorted(set(ui_list)):
                output += f"- `{u}` x {ui_count[u]}\n"
        else:
            output += "- (本Tab沒有任何UI元件)\n"

        # 2. 掃描本Tab互動callback
        func_map = []
        for ev in event_pattern.findall(tab_code):
            obj, trigger, fn, args, _ = ev
            if fn == "lambda":
                func_map.append(f"{obj}.{trigger} → lambda（匿名）")
            else:
                func_map.append(f"{obj}.{trigger} → {fn}()")
            # callback 參數
            cb_matches = re.findall(r'(_callback|_js|_preprocess|_postprocess)\s*=\s*([a-zA-Z_][\w\d_]*|lambda[^\),]*)', args)
            for cb_type, cb_fn in cb_matches:
                if cb_fn.strip().startswith("lambda"):
                    func_map.append(f"{obj}.{cb_type} → lambda")
                else:
                    func_map.append(f"{obj}.{cb_type} → {cb_fn.strip()}()")
        if func_map:
            output += "\n### 有callback的元件/方法：\n"
            for item in func_map:
                output += f"- {item}\n"
        else:
            output += "\n- 本Tab所有UI皆為純靜態，無綁定function\n"
        output += "\n"
    return output

def extract_all_functions(pyfile="app.py"):
    import re
    try:
        with open(pyfile, encoding="utf-8") as f:
            code = f.read()
    except Exception as e:
        return f"❌ 讀取 {pyfile} 失敗：{e}"

    func_pattern = re.compile(
        r"^(def [a-zA-Z_][\w\d_]*\(.*?\):(?:\n(?:    |\t).*)*)",
        re.MULTILINE)
    output = "## 📃 所有 function 定義\n"
    matches = func_pattern.findall(code)
    if not matches:
        return "❗ 沒有抓到任何 function (def)！"
    for func in matches:
        func_name = re.match(r"def ([a-zA-Z_][\w\d_]*)", func)
        output += f"---\n### `{func_name.group(1) if func_name else '?'}()`\n"
        func_lines = func.split("\n")
        if len(func_lines) > 10:
            output += "```python\n" + "\n".join(func_lines[:10]) + "\n... (略)\n```\n"
        else:
            output += "```python\n" + func + "\n```\n"
    return output


# ======================== Gradio 多分頁主UI ========================

with gr.Blocks(css=custom_css) as demo:

    gr.Markdown("## <span style='color:#2675ff;font-weight:bold'> AI化實驗設計與數據分析平台 </span>")

    with gr.Tabs():
# 1️⃣ 標準DoE設計分布
        with gr.Tab("1️⃣ 標準DoE "):
            gr.Markdown("""
            ### 🧪 標準DoE設計分布
            - 支援自動產生參數空間內的多種經典DoE設計法（LHS, Sobol, Halton, Uniform）
            - 可視覺化設計點分布、產生對應的設計參數表
            - 支援下載CSV檔、表格內容可複製
            
            **如何使用：**
            - 1️⃣ 填寫參數名稱、範圍及步進
            - 2️⃣ 設定要產生的組數與亂數種子（可選）
            - 3️⃣ 點選「產生設計+分布圖」
            - 4️⃣ 下方各分頁可檢視不同設計法的結果、分布圖與自動摘要
            
            **注意事項：**
            - 所有參數名稱需唯一、不得重複
            - 組數愈大，運算與繪圖所需時間會增加
            - 請檢查參數範圍、步進格式是否正確
            """)
            with gr.Row():
                with gr.Column(scale=1, min_width=240):
                    with gr.Accordion("參數設定", open=True):
                        param_table = gr.Dataframe(
                            headers=["名稱", "最小值", "最大值", "間隔(step)"],
                            datatype=["str", "number", "number", "number"],
                            row_count=(3, "dynamic"),
                            col_count=(4, "fixed"),
                            value=[["A", 10, 20, 2], ["B", 100, 200, 25], ["C", 1, 2, 0.5]],
                            label="參數設定"
                        )
                        n_samples = gr.Number(label="組數", value=8, precision=0)
                        seed = gr.Number(label="亂數種子（留空或0為隨機）", value=42, precision=0)
                        btn = gr.Button("🪄 產生設計與分布圖", elem_classes=["main-btn"])
                with gr.Column(scale=2):
                    with gr.Accordion("分布結果/圖表/摘要", open=True):
                        tabs = []
                        for name in ["LHS", "Sobol", "Halton", "Uniform"]:
                            with gr.Tab(name):
                                df = gr.Dataframe(label=f"{name} 設計點表格")
                                fig = gr.Plot(label=f"{name} 設計分布")
                                csv = gr.File(label="下載CSV📥")
                                desc = gr.Markdown()
                                summary = gr.Markdown()
                                tabs.extend([df, fig, csv, desc, summary])
                        btn.click(compare_all_designs, inputs=[param_table, n_samples, seed], outputs=tabs)

# 2️⃣ 進階DoE（Box-Behnken/CCD）
        with gr.Tab("2️⃣ 進階DoE(Box-Behnken/CCD)"):
            gr.Markdown("""
            ### 🧪 進階DoE (Box-Behnken/CCD)
            - 支援 Box-Behnken 與 中心組合設計 (CCD) 兩種進階DoE設計法
            - 同步產生標準化設計矩陣與對應實際參數表
            - 提供一鍵下載CSV，方便後續AI建模
            
            **如何使用：**
            - 1️⃣ 設定各參數的最小、最大值及間隔
            - 2️⃣ 選擇所需的設計法（Box-Behnken或CCD）
            - 3️⃣ 點「產生進階DoE設計」即自動產生全部設計點
            
            **注意事項：**
            - 參數欄位請完整填寫，勿留空
            - 各參數區間需合理，否則設計點數量可能異常
            - 若需大規模設計點，運算會稍久，請耐心等待
            """)
            with gr.Row():
                with gr.Column(scale=1, min_width=240):
                    with gr.Accordion("參數設定", open=True):
                        param_table2 = gr.Dataframe(
                            headers=["名稱", "最小值", "最大值", "間隔"],
                            datatype=["str", "number", "number", "number"],
                            row_count=(3, "dynamic"),
                            col_count=(4, "fixed"),
                            value=[["溫度", 80, 120, 10], ["壓力", 1, 5, 1], ["pH", 6, 8, 1]],
                            label="參數設定"
                        )
                        design_type = gr.Radio(["Box-Behnken", "CCD"], value="Box-Behnken", label="設計法")
                        run_btn = gr.Button("🪄 產生進階DoE設計", elem_classes=["main-btn"])
                with gr.Column(scale=2):
                    with gr.Accordion("設計矩陣/參數表", open=True):
                        out_std = gr.Dataframe(label="標準化設計矩陣")
                        download_std = gr.File(label="下載標準矩陣CSV📥")
                        out_real = gr.Dataframe(label="實際參數表")
                        download_real = gr.File(label="下載參數表CSV📥")
                run_btn.click(
                    advanced_doe_with_mapping,
                    inputs=[param_table2, design_type],
                    outputs=[out_std, out_real, download_std, download_real]
                )

# 3️⃣ AI建模/特徵重要性/SHAP
        with gr.Tab("3️⃣ AI建模/特徵重要性/SHAP"):
            gr.Markdown("""
            ### 🧠 AI建模/特徵重要性/SHAP
            - 支援多種線性、非線性AI回歸模型，自動化訓練與模型評估
            - 一鍵產生預測結果、模型效能指標、特徵重要性圖、SHAP全圖解釋
            - 輕鬆檢視哪些參數對y預測最關鍵
            
            **如何使用：**
            - 1️⃣ 上傳DoE結果CSV，選擇目標y欄位
            - 2️⃣ 勾選需比較的AI模型（可多選）
            - 3️⃣ 可選擇是否顯示SHAP解釋圖
            - 4️⃣ 點「一鍵訓練+特徵重要性」，即可檢視全部結果
            
            **注意事項：**
            - 資料需為數值型且無遺漏值
            - 目標y欄位不可有重複
            - 資料量太小時，部分模型可能無法有效學習
            """)
            with gr.Row():
                with gr.Column(scale=1, min_width=320):
                    with gr.Accordion("上傳/選模型", open=True):
                        datafile = gr.File(label="上傳DoE結果CSV📤", file_types=[".csv"])
                        test_ratio = gr.Slider(label="測試集比例", value=0.3, minimum=0.1, maximum=0.5, step=0.05)
                        algo_linear = gr.CheckboxGroup(
                            ["Linear Regression", "Lasso", "Ridge", "ElasticNet"],
                            value=[], label="線性回歸"
                        )
                        algo_nonlinear = gr.CheckboxGroup(
                            ["Random Forest", "XGBoost", "LightGBM", "SVR"],
                            value=["Random Forest"], label="非線性回歸"
                        )
                        show_shap = gr.Checkbox(label="進階SHAP解釋", value=False)
                        btn_ai = gr.Button("🚀 一鍵訓練", elem_classes=["main-btn"])
                with gr.Column(scale=2):
                    with gr.Accordion("預測/重要性圖", open=True):
                        predfig = gr.Plot(label="📊 預測/實際對比圖")
                        met_df = gr.Dataframe(label="模型效能指標", datatype="auto")
                        summary = gr.Markdown(visible=True)
                        feat_summary = gr.Markdown(visible=True)
                        feat_fig = gr.Plot(label="特徵重要性")
                        shap_img = gr.Image(label="SHAP解釋圖")
                btn_ai.click(
                    lambda file, lin, nonlin, ratio, shap_flag:
                        train_and_predict_with_importance(
                            file, (lin or []) + (nonlin or []), ratio, True, shap_flag
                        ),
                    inputs=[datafile, algo_linear, algo_nonlinear, test_ratio, show_shap],
                    outputs=[predfig, met_df, summary, feat_summary, feat_fig, shap_img]
                )

# 4️⃣ 多圖資料視覺化+2D/3D/等高線
        with gr.Tab("4️⃣ 數值資料視覺化處理"):
            gr.Markdown("""
            ### 📊 多圖資料視覺化 + 2D/3D/等高線分析
            - 多種常用資料視覺化工具（熱圖、pairplot、直方圖、PCA等）
            - 支援三維散點、曲面、2D等高線等專業圖形
            - 可自選圖形、快速比較變數分布
            
            **如何使用：**
            - 1️⃣ 上傳資料CSV，選擇要產生的視覺化圖種類
            - 2️⃣ 點「產生多圖分析」可一次顯示多種圖表
            - 3️⃣ 三維分布：指定x、y、z軸變數（或目標欄位），生成3D散點/曲面圖
            - 4️⃣ 2D/3D反應面：輸入要分析的變數組合，產生等高線/曲面圖
            
            **注意事項：**
            - 欄位名稱需為英文/數字，不支援特殊字元
            - 缺值過多資料會自動忽略
            - 若圖形異常請檢查欄位型態及範圍
            """)
            # --- (1) 多圖區塊
            with gr.Row():
                with gr.Column(scale=1, min_width=230):
                    upfile2 = gr.File(label="上傳資料CSV📤")
                    plot_select = gr.CheckboxGroup(
                        ["Heatmap", "Pairplot", "Histogram", "Scatter Matrix", "PCA"],
                        value=["Heatmap", "Pairplot", "Histogram", "PCA"], label="視覺化圖"
                    )
                    vizbtn = gr.Button("📊 產生多圖分析", elem_classes=["main-btn"])
                with gr.Column(scale=3):
                    vis_outs = []
                    for i in range(5):
                        vis_outs.extend([gr.Plot(label=f"圖像{i+1}"), gr.Markdown(), gr.Markdown()])
                    recomm_card = gr.Markdown(visible=True, value="", elem_id="recommend-card")
            vizbtn.click(
                lambda f, t: (*multi_viz(f, t), auto_recommendation(f)),
                inputs=[upfile2, plot_select],
                outputs=vis_outs + [recomm_card]
            )
            # --- (2) 3D變數分布/曲面
            gr.Markdown("#### 🧬 三維分析：3D變數分布/曲面圖")
            with gr.Row():
                with gr.Column(scale=1, min_width=260):
                    columns_md = gr.Markdown(label="資料欄位", value="請先上傳資料CSV，欄位將自動顯示")
                    x_col = gr.Textbox(label="X軸欄位", placeholder="如 A", interactive=True)
                    y_col = gr.Textbox(label="Y軸欄位", placeholder="如 B", interactive=True)
                    z_col = gr.Textbox(label="Z軸欄位", placeholder="如 C", interactive=True)
                    target_col = gr.Textbox(label="目標Y/顏色", placeholder="如 y", interactive=True)
                    surface_flag = gr.Checkbox(label="顯示三維曲面", value=False)
                    plot_btn = gr.Button("🧊 生成3D散點/曲面圖", elem_classes=["main-btn"])
                with gr.Column(scale=3):
                    fig_scatter_out = gr.Plot(label="3D散點圖")
                    fig_surface_out = gr.Plot(label="3D預測曲面圖")
            upfile2.change(
                lambda file: f"資料欄位：A, B, C, y" if file else "請先上傳資料",
                inputs=[upfile2],
                outputs=[columns_md]
            )
            plot_btn.click(
                plot_3d_scatter_surface,
                inputs=[upfile2, x_col, y_col, z_col, target_col, surface_flag],
                outputs=[fig_scatter_out, fig_surface_out]
            )
            # --- (3) 二變數 3D 反應面/等高線
            gr.Markdown("#### 🧬 二變數 3D 反應面/等高線圖")
            with gr.Row():
                with gr.Column(scale=1, min_width=260):
                    columns_md2 = gr.Markdown(label="資料欄位", value="請先上傳資料CSV，欄位將自動顯示")
                    x_col2 = gr.Textbox(label="X軸欄位", placeholder="如 A", interactive=True)
                    y_col2 = gr.Textbox(label="Y軸欄位", placeholder="如 B", interactive=True)
                    z_col2 = gr.Textbox(label="目標Z(反應/產率/預測)", placeholder="如 y", interactive=True)
                    surface2_btn = gr.Button("🧊 生成3D曲面+等高線圖", elem_classes=["main-btn"])
                with gr.Column(scale=3):
                    fig_surface2 = gr.Plot(label="3D曲面圖")
                    fig_contour2 = gr.Plot(label="等高線圖")
            upfile2.change(
                lambda file: f"資料欄位：A, B, y" if file else "請先上傳資料",
                inputs=[upfile2],
                outputs=[columns_md2]
            )
            surface2_btn.click(
                plot_surface_and_contour,
                inputs=[upfile2, x_col2, y_col2, z_col2],
                outputs=[fig_surface2, fig_contour2]
            )

# 5️⃣ 超參數/貝葉斯優化
        with gr.Tab("5️⃣ 超參數/貝葉斯優化"):
            gr.Markdown("""
            ### 🏆 超參數/貝葉斯優化
            - 自動執行各類AI模型的超參數最佳化（貝葉斯法）
            - 即時繪出優化歷程，提供最佳參數組合與效能摘要
            
            **如何使用：**
            - 1️⃣ 上傳DoE實驗結果CSV
            - 2️⃣ 勾選要優化的模型（可複選），設定最大迭代次數
            - 3️⃣ 點「執行Bayes超參數優化」，自動開始優化並顯示所有歷程
            
            **注意事項：**
            - 請確認資料充足且欄位型態正確
            - 較複雜模型/高維空間下，優化需較多時間
            - 迭代次數過少時，最佳值可能不穩定
            """)
            with gr.Row():
                with gr.Column(scale=1, min_width=230):
                    with gr.Accordion("上傳/模型選擇", open=True):
                        upfile3 = gr.File(label="📤 上傳DoE結果CSV", file_types=[".csv"])
                        model_sel = gr.CheckboxGroup(
                            ["Random Forest", "XGBoost", "LightGBM", "SVR"], value=["XGBoost"], label="模型選擇（可複選）"
                        )
                        n_iter = gr.Number(label="最大迭代次數", value=16, precision=0)
                        bayes_btn = gr.Button("🚀 執行Bayes超參數優化", elem_classes=["main-btn"])
                with gr.Column(scale=2):
                    with gr.Accordion("優化歷程/結果", open=True):
                        multi_fig = gr.Plot(label="所有模型Bayes優化歷程 (CV RMSE)")
                        tab_figs = []
                        for mtype in ["Random Forest", "XGBoost", "LightGBM", "SVR"]:
                            with gr.Tab(mtype):
                                fig = gr.Plot(label=f"{mtype} 優化歷程")
                                summary = gr.Markdown()
                                best_param = gr.Markdown()
                                tab_figs.extend([fig, summary, best_param])
            bayes_btn.click(
                run_multi_bayes_optimization,
                inputs=[upfile3, model_sel, n_iter],
                outputs=[multi_fig] + tab_figs
            )

    
# 6️⃣ 智能推薦/混合策略/合併回填
        with gr.Tab("6️⃣ 智能推薦/混合策略/合併回填"):
            gr.Markdown("""
            ### 🌟 智能推薦/混合策略/合併回填
            - AI自動推薦新實驗點、混合策略智能選點
            - 一鍵搜尋最佳組合、合併新舊DoE資料
            
            **如何使用：**
            - 1️⃣ 上傳現有DoE資料，選擇推薦模式與模型
            - 2️⃣ 指定推薦點數與是否排除重複
            - 3️⃣ 點「產生推薦點組合」可直接下載推薦點
            - 4️⃣ 新舊資料合併：上傳原始與新實驗CSV，自動合併去重
            
            **注意事項：**
            - 請確認欄位名稱一致、資料格式正確
            - 合併時將以欄位名為主，自動排除重複點
            - 推薦模式可同時多選，增加實驗多樣性
            """)
            # --- (1) 多模型最佳化搜尋
            with gr.Accordion("多模型最佳化搜尋", open=True):
                with gr.Row():
                    with gr.Column(scale=1, min_width=280):
                        opt_file = gr.File(label="📤 上傳DoE資料（CSV）", file_types=[".csv"])
                        opt_model_sel = gr.CheckboxGroup(
                            ["Random Forest", "XGBoost", "LightGBM", "SVR"],
                            value=["Random Forest", "XGBoost"], label="最佳化用模型"
                        )
                        direction = gr.Radio(["最大化", "最小化"], value="最大化", label="目標")
                        is_discrete = gr.Checkbox(label="全部參數視為離散", value=False)
                        n_iter2 = gr.Number(label="搜尋迭代次數", value=28, precision=0)
                        btn_opt = gr.Button("🏆 搜尋AI預測最佳條件", elem_classes=["main-btn"])
                    with gr.Column(scale=2):
                        opt_df = gr.Dataframe(label="最佳參數組合", datatype="auto")
                        opt_txt = gr.Markdown()
                        opt_desc = gr.Markdown()
                        opt_sum = gr.Markdown()
            btn_opt.click(
                optimize_conditions,
                inputs=[opt_file, opt_model_sel, direction, is_discrete, n_iter2],
                outputs=[opt_df, opt_txt, opt_desc, opt_sum]
            )
            # --- (2) 新點推薦
            with gr.Accordion("新點推薦", open=False):
                with gr.Row():
                    with gr.Column(scale=1, min_width=280):
                        rec_file = gr.File(label="📤 請上傳DoE資料（CSV）", file_types=[".csv"])
                        recommend_mode = gr.CheckboxGroup(
                            ["探索型推薦", "混合策略推薦"],
                            value=["探索型推薦"], label="推薦模式（可複選）"
                        )
                        recommend_models = gr.CheckboxGroup(
                            ["Random Forest", "XGBoost", "LightGBM", "SVR"],
                            value=["Random Forest", "XGBoost"], label="模型選擇"
                        )
                        recommend_n = gr.Number(label="推薦點數", value=4, precision=0)
                        recommend_exclude = gr.Checkbox(label="排除現有點", value=True)
                        recommend_btn = gr.Button("🎯 產生推薦點組合", elem_classes=["main-btn"])
                    with gr.Column(scale=2):
                        recommend_out = gr.Markdown(label="推薦結果", value="")
                        recommend_download_file = gr.File(label="📥 下載推薦點（回填y用）", interactive=False)
                recommend_btn.click(
                    make_recommended_points,
                    inputs=[rec_file, recommend_models, recommend_mode, recommend_n, recommend_exclude],
                    outputs=[recommend_out, recommend_download_file]
                )
            # --- (3) 合併回填
            with gr.Accordion("合併回填資料", open=False):
                with gr.Row():
                    with gr.Column(scale=1, min_width=320):
                        base_csv = gr.File(label="原始DoE資料（CSV）")
                        new_csv = gr.File(label="新實驗資料（推薦點CSV）")
                        merge_btn = gr.Button("🧩 自動合併/去重", elem_classes=["main-btn"])
                        merge_out = gr.File(label="📥 下載合併後資料")
                merge_btn.click(
                    merge_csvs,
                    inputs=[base_csv, new_csv],
                    outputs=merge_out
                )

# AI模型回歸分析（多目標批次，支援交互作用/多模型/多y）
        with gr.Tab("7️⃣ AI模型回歸分析"):
            gr.Markdown("""
            ### 🧠 AI模型回歸分析（多目標/多模型/交互作用）
            - 批次執行多種AI模型，支援多y、多特徵交互作用
            - 各y可獨立檢視效能指標、重要性對比與預測分布
            - 自動比較前後回填資料對AI效能之提升/變化
            
            **如何使用：**
            - 1️⃣ 上傳原始DoE資料（CSV）及合併新點（CSV，可選）
            - 2️⃣ 選擇目標y欄位，可自動偵測或自行調整
            - 3️⃣ 勾選所需AI模型、設定特徵交互作用階數
            - 4️⃣ 點「批次回歸分析」，下方各分頁顯示每y結果
            
            **注意事項：**
            - 交互作用階數設定愈高，特徵數量愈多，模型訓練愈慢
            - 目標欄位過多，僅顯示前8個y的詳細結果
            - 回填資料須與原始資料欄位一致
            """)
            before_file = gr.File(label="原始DoE資料（CSV）")
            after_file = gr.File(label="🧩 合併新點DoE（CSV）")
            algo_linear = gr.CheckboxGroup(
                ["Linear Regression", "Lasso", "Ridge", "ElasticNet"],
                value=[], label="線性回歸"
            )
            algo_nonlinear = gr.CheckboxGroup(
                ["Random Forest", "XGBoost", "PLS Regression", "SVR"],
                value=["Random Forest"], label="非線性回歸"
            )
            # Degree控制
            degree_select = gr.Dropdown([1, 2, 3], value=1, label="特徵交互作用階數 (degree)")
            add_inter = gr.Checkbox(label="特徵間交互作用 (x1*x2)", value=True)
            add_y_inter = gr.Checkbox(label="y間交互作用 (y1*y2)", value=False)
        
            y_keywords = gr.Textbox(label="目標欄位關鍵字 (逗號分隔)", value="y,目標,output,target")
            y_columns = gr.CheckboxGroup(label="目標y欄位 (可複選)", choices=[], value=[])
            before_file.change(
                detect_y_columns,
                inputs=[before_file, y_keywords],
                outputs=y_columns
            )
            y_keywords.change(
                detect_y_columns,
                inputs=[before_file, y_keywords],
                outputs=y_columns
            )
            run_btn = gr.Button("🚀 批次回歸分析", elem_classes=["main-btn"])
            summary_md = gr.Dataframe(label="所有模型-y效能總表")
            y_titles, y_tables, y_feats, y_ydists = [], [], [], []
            with gr.Tabs() as tabs_container:
                for idx in range(8):
                    with gr.TabItem(f"Tab{idx+1}"):
                        y_title = gr.Markdown(value="")
                        y_table = gr.Dataframe(label="模型效能比較表")
                        with gr.Row():
                            y_feat = gr.Plot(label="特徵重要性對比圖")
                            y_ydist = gr.Plot(label="y 分布對比圖")
                        y_titles.append(y_title)
                        y_tables.append(y_table)
                        y_feats.append(y_feat)
                        y_ydists.append(y_ydist)
            run_btn.click(
                run_multi_y,
                inputs=[before_file, after_file, algo_linear, algo_nonlinear, y_columns, add_inter, add_y_inter, degree_select],
                outputs=[summary_md, *y_titles, *y_tables, *y_feats, *y_ydists]
            )

# 平台說明/索引

        with gr.Tab("⚙️平台說明與索引"):
            gr.Markdown("""
            ## 🧭 功能說明 & 導航指南
            
            本平台整合「自動實驗設計（DoE）」、「AI建模」、「資料視覺化」、「超參數優化」、「智能推薦」等模組，專為化學/材料/製程等工程應用打造，協助您**從設計點產生、數據分析到模型推薦，全流程自動化**！
            
            ---
            
            ### 🧰 主要功能分頁
            
            - **1️⃣ 標準DoE設計分布**  
              - 產生經典設計法（LHS、Sobol等）的多維參數設計點，便於建立模型訓練用基礎資料。
              - 直觀展示每種設計點分布、支援結果下載。
            
            - **2️⃣ 進階DoE(Box-Behnken/CCD)**
              - 支援正交型、中心組合等進階設計法，方便進行曲面反應分析（RSM）。
              - 產生標準化設計矩陣、對應實際參數表。
            
            - **3️⃣ AI建模/特徵重要性/SHAP**
              - 一鍵啟動多模型AI訓練、交叉驗證、特徵重要性排序、SHAP解釋。
              - 適用於尋找關鍵變數與預測能力評估。
            
            - **4️⃣ 多圖資料視覺化 + 2D/3D/等高線**
              - 提供各類視覺化工具（熱圖、pairplot、PCA、3D曲面/等高線）協助多角度理解數據分布。
              - 支援高維資料降維與多種圖表疊合分析。
            
            - **5️⃣ 超參數/貝葉斯優化**
              - 針對各種AI回歸模型自動進行超參數優化（如Random Forest、XGBoost等），即時檢視優化歷程與最佳參數。
            
            - **6️⃣ 智能推薦/混合策略/合併回填**
              - 結合AI預測與探索性搜尋，自動推薦新實驗條件，並支援資料自動合併與去重。
              - 適合推進次輪實驗設計及自動補齊數據。
            
            - **7️⃣ AI模型回歸分析（多目標/多模型/交互作用）**
              - 支援多y欄位、多模型批次建模、特徵交互作用分析，詳細呈現各y的訓練/預測效能。
            
            ---
            
            ### 📝 操作建議與常見注意事項
            
            - **所有欄位均可直接點擊或複製表格內容，並可一鍵下載分析結果**
            - **CSV資料須為純數值型（中文欄位會自動支援，但建議用英文/數字命名）**
            - **回歸與建模功能建議資料組數大於10，避免過度擬合或模型效能不穩定**
            - **每個Tab下方皆有詳細分頁說明、注意事項，建議操作前先閱讀上方說明**
            
            ---
            
            ### 🏠 應用情境
            - 多參數製程最佳化
            - 原型實驗規劃（探索型/補點/混合設計）
            - 關鍵因子敏感度分析
            - 自動推薦新實驗組合
            - AI輔助反應機制推論與模型精度提升
            
            ---
              
            **本平台持續優化，歡迎多加利用！**
            
            """)


        with gr.Tab("🧩 Function管理"):
            gr.Markdown("#### 自動偵測各分頁 UI 綁定 function / lambda / callback")
            mapping = extract_tab_functions_with_lambda_and_callback("app.py")
            gr.Markdown(value=mapping)
            gr.Markdown("#### 本程式所有 function 定義 (摘要)")
            gr.Markdown(value=extract_all_functions("app.py"))

        
    gr.Markdown("<div id='footer'> 本平台由T100團隊設計，歡迎交流建議 │ 2025_</div>")
demo.launch()