Spaces:
Configuration error
Configuration error
| import pandas as pd | |
| import numpy as np | |
| import joblib | |
| from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import os | |
| # ========================================================= | |
| # KONFIGURASI GLOBAL (tetap) | |
| # ========================================================= | |
| DATA_FILENAME = r'C:\Dokumen\One To Many_17_10_2025\MMBTU\DASHBOARD\One To Many\disagregasi_data_spraydryer_terbaru_10_17_2025.csv' | |
| MODEL_FOLDER = r'C:\Dokumen\One To Many_17_10_2025\MMBTU\DASHBOARD\One To Many\MODEL CHECKPOINT FOR INVERSE MODEL' | |
| TARGET_COLUMN = 'GAS_MMBTU_Disaggregated' | |
| PRODUCT_LIST = [ | |
| 'BMR BASE', | |
| 'CKP BASE', | |
| 'CKR BASE', | |
| 'CMR BASE', | |
| 'MORIGRO BASE' | |
| ] | |
| FEATURES = [ | |
| 'D101330TT', | |
| 'D102260TIC_CV', | |
| 'D102265TIC_PV', | |
| 'D102265TIC_CV', | |
| 'D102266TIC', | |
| 'D101264FTSCL' | |
| ] | |
| PREDICTION_COLUMN = 'Prediksi_Gas' | |
| MODEL_FILENAME_TEMPLATE = 'model_checkpoint_xgb_{}.joblib' | |
| # ========================================================= | |
| # FUNGSI UTILITAS (tetap) | |
| # ========================================================= | |
| def calculate_metrics(y_true, y_pred): | |
| """Menghitung R2, RMSE, dan MAE.""" | |
| r2 = r2_score(y_true, y_pred) | |
| rmse = np.sqrt(mean_squared_error(y_true, y_pred)) | |
| mae = mean_absolute_error(y_true, y_pred) | |
| return r2, rmse, mae | |
| def _load_model_for_product(model_dir, product): | |
| """Load model XGBoost + poly_transformer untuk satu produk.""" | |
| model_path = os.path.join(model_dir, MODEL_FILENAME_TEMPLATE.format(product)) | |
| if not os.path.exists(model_path): | |
| raise FileNotFoundError(f"File model tidak ditemukan: {model_path}") | |
| deployment_bundle = joblib.load(model_path) | |
| model = deployment_bundle.get('model') | |
| poly_transformer = deployment_bundle.get('poly_transformer') | |
| poly_feature_names = deployment_bundle.get('poly_feature_names') | |
| if model is None or poly_transformer is None or poly_feature_names is None: | |
| raise KeyError( | |
| "Bundle model tidak lengkap. Pastikan berisi " | |
| "'model', 'poly_transformer', dan 'poly_feature_names'." | |
| ) | |
| return model, poly_transformer, poly_feature_names | |
| # ========================================================= | |
| # FUNGSI UTAMA UNTUK DASHBOARD (PERBAIKAN) | |
| # ========================================================= | |
| def evaluate_models_for_dashboard( | |
| data_path: str = DATA_FILENAME, | |
| model_dir: str = MODEL_FOLDER, | |
| products: list = None, | |
| features: list = None, | |
| target_col: str = TARGET_COLUMN, | |
| data_df=None, # <--- NEW: bisa kirim DataFrame langsung dari Streamlit | |
| ): | |
| """ | |
| Fungsi utama yang melakukan evaluasi performa. | |
| Mengembalikan: | |
| - summary_df: DataFrame berisi [Product, R², RMSE, MAE] | |
| - product_figs: dict {product_name: matplotlib.figure.Figure} | |
| Prioritas data: | |
| 1) Jika data_df tidak None -> gunakan data_df (upload dari Streamlit) | |
| 2) Jika data_df None -> baca dari data_path (CSV default) | |
| """ | |
| if products is None: | |
| products = PRODUCT_LIST | |
| if features is None: | |
| features = FEATURES | |
| # --- 1. Load data --- | |
| if data_df is not None: | |
| # Pakai dataset yang di-upload user (sudah dalam bentuk DataFrame) | |
| df = data_df.copy() | |
| else: | |
| # Fallback: baca dari CSV path seperti sebelumnya | |
| try: | |
| df = pd.read_csv(data_path) | |
| except FileNotFoundError: | |
| print(f"[ERROR] Data file tidak ditemukan di: {data_path}") | |
| return pd.DataFrame(columns=['Product', 'R²', 'RMSE', 'MAE']), {} | |
| except Exception as e: | |
| print(f"[ERROR] Gagal memuat data: {e}") | |
| return pd.DataFrame(columns=['Product', 'R²', 'RMSE', 'MAE']), {} | |
| # Pastikan Date_time ada dan dalam bentuk datetime (kalau mau pakai time-series) | |
| if 'Date_time' in df.columns: | |
| df['Date_time'] = pd.to_datetime(df['Date_time'], errors='coerce') | |
| summary_results = [] | |
| plot_data_list = [] | |
| # --- 2. Loop per produk --- | |
| for product in products: | |
| df_prod = df[df['Product'] == product].copy() | |
| if df_prod.empty or len(df_prod) < 2: | |
| continue | |
| missing_features = [f for f in features if f not in df_prod.columns] | |
| if missing_features: | |
| print(f"[WARN] Fitur hilang untuk {product}: {missing_features}") | |
| continue | |
| if 'Date_time' in df_prod.columns: | |
| df_prod = df_prod.sort_values('Date_time') | |
| X_raw = df_prod[features] | |
| y_true = df_prod[target_col] | |
| # --- 2a. Load model produk --- | |
| try: | |
| model, poly_transformer, poly_feature_names = _load_model_for_product(model_dir, product) | |
| except Exception as e: | |
| print(f"[WARN] Gagal load model untuk {product}: {e}") | |
| continue | |
| # --- 2b. Transformasi dan prediksi --- | |
| try: | |
| X_transformed_np = poly_transformer.transform(X_raw) | |
| X_transformed_df = pd.DataFrame( | |
| X_transformed_np, | |
| columns=poly_feature_names, | |
| index=X_raw.index | |
| ) | |
| y_pred = model.predict(X_transformed_df) | |
| except Exception as e: | |
| print(f"[WARN] Gagal transform/predict untuk {product}: {e}") | |
| continue | |
| # --- 2c. Hitung metrik --- | |
| r2, rmse, mae = calculate_metrics(y_true, y_pred) | |
| summary_results.append({ | |
| 'Product': product, | |
| 'R²': r2, | |
| 'RMSE': rmse, | |
| 'MAE': mae | |
| }) | |
| # --- 2d. Siapkan data untuk plot --- | |
| plot_df = pd.DataFrame({ | |
| 'Actual': y_true.values, | |
| 'Predicted': y_pred, | |
| 'Product': product | |
| }) | |
| plot_data_list.append(plot_df) | |
| # --- 3. Buat summary_df --- | |
| if summary_results: | |
| summary_df = pd.DataFrame(summary_results) | |
| summary_df['Product'] = pd.Categorical(summary_df['Product'], categories=products, ordered=True) | |
| summary_df = summary_df.sort_values('Product').reset_index(drop=True) | |
| else: | |
| summary_df = pd.DataFrame(columns=['Product', 'R²', 'RMSE', 'MAE']) | |
| return summary_df, {} | |
| product_figs = {} | |
| # --- 4. Generate Figures (per produk, untuk Streamlit) --- | |
| if plot_data_list: | |
| all_plot_data = pd.concat(plot_data_list) | |
| products_evaluated = summary_df['Product'].tolist() | |
| sns.set_style("whitegrid") | |
| for product in products_evaluated: | |
| product_data = all_plot_data[all_plot_data['Product'] == product].dropna() | |
| if product_data.empty: | |
| continue | |
| metrics = summary_df[summary_df['Product'] == product].iloc[0] | |
| title = (f'{product}\n' | |
| f'$R^2$: {metrics["R²"]:.3f}, ' | |
| f'RMSE: {metrics["RMSE"]:.3f}, ' | |
| f'MAE: {metrics["MAE"]:.3f}') | |
| min_val = min(product_data['Actual'].min(), product_data['Predicted'].min()) | |
| max_val = max(product_data['Actual'].max(), product_data['Predicted'].max()) | |
| margin = (max_val - min_val) * 0.05 | |
| plot_range = [min_val - margin, max_val + margin] | |
| # Figure tunggal per produk | |
| fig_single = plt.figure(figsize=(8, 6)) | |
| ax_single = fig_single.add_subplot(111) | |
| sns.scatterplot( | |
| x='Actual', | |
| y='Predicted', | |
| data=product_data, | |
| ax=ax_single, | |
| alpha=0.6 | |
| ) | |
| ax_single.plot(plot_range, plot_range, 'r--', label='Ideal (Actual = Predicted)') | |
| ax_single.set_xlim(plot_range) | |
| ax_single.set_ylim(plot_range) | |
| ax_single.set_title(title) | |
| ax_single.set_xlabel(f'Actual {target_col}') | |
| ax_single.set_ylabel(f'Predicted {target_col}') | |
| ax_single.legend() | |
| product_figs[product] = fig_single | |
| plt.close(fig_single) | |
| return summary_df, product_figs | |
| # ========================================================= | |
| # OPSIONAL: MODE CLI (tetap) | |
| # ========================================================= | |
| if __name__ == "__main__": | |
| print("Memulai Evaluasi Performa Model Inverse...") | |
| summary_df, figs = evaluate_models_for_dashboard() | |
| print("\n" + "="*40) | |
| print("=== Ringkasan Performa Model ===") | |
| print("="*40) | |
| if not summary_df.empty: | |
| print(summary_df.to_markdown(index=False, floatfmt=".4f")) | |
| else: | |
| print("Gagal memproses data atau model. Periksa pesan error di atas.") |