import pandas as pd import numpy as np import joblib from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error import matplotlib.pyplot as plt import seaborn as sns import os # ========================================================= # KONFIGURASI GLOBAL (tetap) # ========================================================= DATA_FILENAME = r'C:\Dokumen\One To Many_17_10_2025\MMBTU\DASHBOARD\One To Many\disagregasi_data_spraydryer_terbaru_10_17_2025.csv' MODEL_FOLDER = r'C:\Dokumen\One To Many_17_10_2025\MMBTU\DASHBOARD\One To Many\MODEL CHECKPOINT FOR INVERSE MODEL' TARGET_COLUMN = 'GAS_MMBTU_Disaggregated' PRODUCT_LIST = [ 'BMR BASE', 'CKP BASE', 'CKR BASE', 'CMR BASE', 'MORIGRO BASE' ] FEATURES = [ 'D101330TT', 'D102260TIC_CV', 'D102265TIC_PV', 'D102265TIC_CV', 'D102266TIC', 'D101264FTSCL' ] PREDICTION_COLUMN = 'Prediksi_Gas' MODEL_FILENAME_TEMPLATE = 'model_checkpoint_xgb_{}.joblib' # ========================================================= # FUNGSI UTILITAS (tetap) # ========================================================= def calculate_metrics(y_true, y_pred): """Menghitung R2, RMSE, dan MAE.""" r2 = r2_score(y_true, y_pred) rmse = np.sqrt(mean_squared_error(y_true, y_pred)) mae = mean_absolute_error(y_true, y_pred) return r2, rmse, mae def _load_model_for_product(model_dir, product): """Load model XGBoost + poly_transformer untuk satu produk.""" model_path = os.path.join(model_dir, MODEL_FILENAME_TEMPLATE.format(product)) if not os.path.exists(model_path): raise FileNotFoundError(f"File model tidak ditemukan: {model_path}") deployment_bundle = joblib.load(model_path) model = deployment_bundle.get('model') poly_transformer = deployment_bundle.get('poly_transformer') poly_feature_names = deployment_bundle.get('poly_feature_names') if model is None or poly_transformer is None or poly_feature_names is None: raise KeyError( "Bundle model tidak lengkap. Pastikan berisi " "'model', 'poly_transformer', dan 'poly_feature_names'." ) return model, poly_transformer, poly_feature_names # ========================================================= # FUNGSI UTAMA UNTUK DASHBOARD (PERBAIKAN) # ========================================================= def evaluate_models_for_dashboard( data_path: str = DATA_FILENAME, model_dir: str = MODEL_FOLDER, products: list = None, features: list = None, target_col: str = TARGET_COLUMN, data_df=None, # <--- NEW: bisa kirim DataFrame langsung dari Streamlit ): """ Fungsi utama yang melakukan evaluasi performa. Mengembalikan: - summary_df: DataFrame berisi [Product, R², RMSE, MAE] - product_figs: dict {product_name: matplotlib.figure.Figure} Prioritas data: 1) Jika data_df tidak None -> gunakan data_df (upload dari Streamlit) 2) Jika data_df None -> baca dari data_path (CSV default) """ if products is None: products = PRODUCT_LIST if features is None: features = FEATURES # --- 1. Load data --- if data_df is not None: # Pakai dataset yang di-upload user (sudah dalam bentuk DataFrame) df = data_df.copy() else: # Fallback: baca dari CSV path seperti sebelumnya try: df = pd.read_csv(data_path) except FileNotFoundError: print(f"[ERROR] Data file tidak ditemukan di: {data_path}") return pd.DataFrame(columns=['Product', 'R²', 'RMSE', 'MAE']), {} except Exception as e: print(f"[ERROR] Gagal memuat data: {e}") return pd.DataFrame(columns=['Product', 'R²', 'RMSE', 'MAE']), {} # Pastikan Date_time ada dan dalam bentuk datetime (kalau mau pakai time-series) if 'Date_time' in df.columns: df['Date_time'] = pd.to_datetime(df['Date_time'], errors='coerce') summary_results = [] plot_data_list = [] # --- 2. Loop per produk --- for product in products: df_prod = df[df['Product'] == product].copy() if df_prod.empty or len(df_prod) < 2: continue missing_features = [f for f in features if f not in df_prod.columns] if missing_features: print(f"[WARN] Fitur hilang untuk {product}: {missing_features}") continue if 'Date_time' in df_prod.columns: df_prod = df_prod.sort_values('Date_time') X_raw = df_prod[features] y_true = df_prod[target_col] # --- 2a. Load model produk --- try: model, poly_transformer, poly_feature_names = _load_model_for_product(model_dir, product) except Exception as e: print(f"[WARN] Gagal load model untuk {product}: {e}") continue # --- 2b. Transformasi dan prediksi --- try: X_transformed_np = poly_transformer.transform(X_raw) X_transformed_df = pd.DataFrame( X_transformed_np, columns=poly_feature_names, index=X_raw.index ) y_pred = model.predict(X_transformed_df) except Exception as e: print(f"[WARN] Gagal transform/predict untuk {product}: {e}") continue # --- 2c. Hitung metrik --- r2, rmse, mae = calculate_metrics(y_true, y_pred) summary_results.append({ 'Product': product, 'R²': r2, 'RMSE': rmse, 'MAE': mae }) # --- 2d. Siapkan data untuk plot --- plot_df = pd.DataFrame({ 'Actual': y_true.values, 'Predicted': y_pred, 'Product': product }) plot_data_list.append(plot_df) # --- 3. Buat summary_df --- if summary_results: summary_df = pd.DataFrame(summary_results) summary_df['Product'] = pd.Categorical(summary_df['Product'], categories=products, ordered=True) summary_df = summary_df.sort_values('Product').reset_index(drop=True) else: summary_df = pd.DataFrame(columns=['Product', 'R²', 'RMSE', 'MAE']) return summary_df, {} product_figs = {} # --- 4. Generate Figures (per produk, untuk Streamlit) --- if plot_data_list: all_plot_data = pd.concat(plot_data_list) products_evaluated = summary_df['Product'].tolist() sns.set_style("whitegrid") for product in products_evaluated: product_data = all_plot_data[all_plot_data['Product'] == product].dropna() if product_data.empty: continue metrics = summary_df[summary_df['Product'] == product].iloc[0] title = (f'{product}\n' f'$R^2$: {metrics["R²"]:.3f}, ' f'RMSE: {metrics["RMSE"]:.3f}, ' f'MAE: {metrics["MAE"]:.3f}') min_val = min(product_data['Actual'].min(), product_data['Predicted'].min()) max_val = max(product_data['Actual'].max(), product_data['Predicted'].max()) margin = (max_val - min_val) * 0.05 plot_range = [min_val - margin, max_val + margin] # Figure tunggal per produk fig_single = plt.figure(figsize=(8, 6)) ax_single = fig_single.add_subplot(111) sns.scatterplot( x='Actual', y='Predicted', data=product_data, ax=ax_single, alpha=0.6 ) ax_single.plot(plot_range, plot_range, 'r--', label='Ideal (Actual = Predicted)') ax_single.set_xlim(plot_range) ax_single.set_ylim(plot_range) ax_single.set_title(title) ax_single.set_xlabel(f'Actual {target_col}') ax_single.set_ylabel(f'Predicted {target_col}') ax_single.legend() product_figs[product] = fig_single plt.close(fig_single) return summary_df, product_figs # ========================================================= # OPSIONAL: MODE CLI (tetap) # ========================================================= if __name__ == "__main__": print("Memulai Evaluasi Performa Model Inverse...") summary_df, figs = evaluate_models_for_dashboard() print("\n" + "="*40) print("=== Ringkasan Performa Model ===") print("="*40) if not summary_df.empty: print(summary_df.to_markdown(index=False, floatfmt=".4f")) else: print("Gagal memproses data atau model. Periksa pesan error di atas.")