kmi_dashboard / MonitoringModel.py
delima1234-Sunbright
KMI Dashboard
5e0490f
import pandas as pd
import numpy as np
import joblib
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
import seaborn as sns
import os
# =========================================================
# KONFIGURASI GLOBAL (tetap)
# =========================================================
DATA_FILENAME = r'C:\Dokumen\One To Many_17_10_2025\MMBTU\DASHBOARD\One To Many\disagregasi_data_spraydryer_terbaru_10_17_2025.csv'
MODEL_FOLDER = r'C:\Dokumen\One To Many_17_10_2025\MMBTU\DASHBOARD\One To Many\MODEL CHECKPOINT FOR INVERSE MODEL'
TARGET_COLUMN = 'GAS_MMBTU_Disaggregated'
PRODUCT_LIST = [
'BMR BASE',
'CKP BASE',
'CKR BASE',
'CMR BASE',
'MORIGRO BASE'
]
FEATURES = [
'D101330TT',
'D102260TIC_CV',
'D102265TIC_PV',
'D102265TIC_CV',
'D102266TIC',
'D101264FTSCL'
]
PREDICTION_COLUMN = 'Prediksi_Gas'
MODEL_FILENAME_TEMPLATE = 'model_checkpoint_xgb_{}.joblib'
# =========================================================
# FUNGSI UTILITAS (tetap)
# =========================================================
def calculate_metrics(y_true, y_pred):
"""Menghitung R2, RMSE, dan MAE."""
r2 = r2_score(y_true, y_pred)
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
mae = mean_absolute_error(y_true, y_pred)
return r2, rmse, mae
def _load_model_for_product(model_dir, product):
"""Load model XGBoost + poly_transformer untuk satu produk."""
model_path = os.path.join(model_dir, MODEL_FILENAME_TEMPLATE.format(product))
if not os.path.exists(model_path):
raise FileNotFoundError(f"File model tidak ditemukan: {model_path}")
deployment_bundle = joblib.load(model_path)
model = deployment_bundle.get('model')
poly_transformer = deployment_bundle.get('poly_transformer')
poly_feature_names = deployment_bundle.get('poly_feature_names')
if model is None or poly_transformer is None or poly_feature_names is None:
raise KeyError(
"Bundle model tidak lengkap. Pastikan berisi "
"'model', 'poly_transformer', dan 'poly_feature_names'."
)
return model, poly_transformer, poly_feature_names
# =========================================================
# FUNGSI UTAMA UNTUK DASHBOARD (PERBAIKAN)
# =========================================================
def evaluate_models_for_dashboard(
data_path: str = DATA_FILENAME,
model_dir: str = MODEL_FOLDER,
products: list = None,
features: list = None,
target_col: str = TARGET_COLUMN,
data_df=None, # <--- NEW: bisa kirim DataFrame langsung dari Streamlit
):
"""
Fungsi utama yang melakukan evaluasi performa.
Mengembalikan:
- summary_df: DataFrame berisi [Product, R², RMSE, MAE]
- product_figs: dict {product_name: matplotlib.figure.Figure}
Prioritas data:
1) Jika data_df tidak None -> gunakan data_df (upload dari Streamlit)
2) Jika data_df None -> baca dari data_path (CSV default)
"""
if products is None:
products = PRODUCT_LIST
if features is None:
features = FEATURES
# --- 1. Load data ---
if data_df is not None:
# Pakai dataset yang di-upload user (sudah dalam bentuk DataFrame)
df = data_df.copy()
else:
# Fallback: baca dari CSV path seperti sebelumnya
try:
df = pd.read_csv(data_path)
except FileNotFoundError:
print(f"[ERROR] Data file tidak ditemukan di: {data_path}")
return pd.DataFrame(columns=['Product', 'R²', 'RMSE', 'MAE']), {}
except Exception as e:
print(f"[ERROR] Gagal memuat data: {e}")
return pd.DataFrame(columns=['Product', 'R²', 'RMSE', 'MAE']), {}
# Pastikan Date_time ada dan dalam bentuk datetime (kalau mau pakai time-series)
if 'Date_time' in df.columns:
df['Date_time'] = pd.to_datetime(df['Date_time'], errors='coerce')
summary_results = []
plot_data_list = []
# --- 2. Loop per produk ---
for product in products:
df_prod = df[df['Product'] == product].copy()
if df_prod.empty or len(df_prod) < 2:
continue
missing_features = [f for f in features if f not in df_prod.columns]
if missing_features:
print(f"[WARN] Fitur hilang untuk {product}: {missing_features}")
continue
if 'Date_time' in df_prod.columns:
df_prod = df_prod.sort_values('Date_time')
X_raw = df_prod[features]
y_true = df_prod[target_col]
# --- 2a. Load model produk ---
try:
model, poly_transformer, poly_feature_names = _load_model_for_product(model_dir, product)
except Exception as e:
print(f"[WARN] Gagal load model untuk {product}: {e}")
continue
# --- 2b. Transformasi dan prediksi ---
try:
X_transformed_np = poly_transformer.transform(X_raw)
X_transformed_df = pd.DataFrame(
X_transformed_np,
columns=poly_feature_names,
index=X_raw.index
)
y_pred = model.predict(X_transformed_df)
except Exception as e:
print(f"[WARN] Gagal transform/predict untuk {product}: {e}")
continue
# --- 2c. Hitung metrik ---
r2, rmse, mae = calculate_metrics(y_true, y_pred)
summary_results.append({
'Product': product,
'R²': r2,
'RMSE': rmse,
'MAE': mae
})
# --- 2d. Siapkan data untuk plot ---
plot_df = pd.DataFrame({
'Actual': y_true.values,
'Predicted': y_pred,
'Product': product
})
plot_data_list.append(plot_df)
# --- 3. Buat summary_df ---
if summary_results:
summary_df = pd.DataFrame(summary_results)
summary_df['Product'] = pd.Categorical(summary_df['Product'], categories=products, ordered=True)
summary_df = summary_df.sort_values('Product').reset_index(drop=True)
else:
summary_df = pd.DataFrame(columns=['Product', 'R²', 'RMSE', 'MAE'])
return summary_df, {}
product_figs = {}
# --- 4. Generate Figures (per produk, untuk Streamlit) ---
if plot_data_list:
all_plot_data = pd.concat(plot_data_list)
products_evaluated = summary_df['Product'].tolist()
sns.set_style("whitegrid")
for product in products_evaluated:
product_data = all_plot_data[all_plot_data['Product'] == product].dropna()
if product_data.empty:
continue
metrics = summary_df[summary_df['Product'] == product].iloc[0]
title = (f'{product}\n'
f'$R^2$: {metrics["R²"]:.3f}, '
f'RMSE: {metrics["RMSE"]:.3f}, '
f'MAE: {metrics["MAE"]:.3f}')
min_val = min(product_data['Actual'].min(), product_data['Predicted'].min())
max_val = max(product_data['Actual'].max(), product_data['Predicted'].max())
margin = (max_val - min_val) * 0.05
plot_range = [min_val - margin, max_val + margin]
# Figure tunggal per produk
fig_single = plt.figure(figsize=(8, 6))
ax_single = fig_single.add_subplot(111)
sns.scatterplot(
x='Actual',
y='Predicted',
data=product_data,
ax=ax_single,
alpha=0.6
)
ax_single.plot(plot_range, plot_range, 'r--', label='Ideal (Actual = Predicted)')
ax_single.set_xlim(plot_range)
ax_single.set_ylim(plot_range)
ax_single.set_title(title)
ax_single.set_xlabel(f'Actual {target_col}')
ax_single.set_ylabel(f'Predicted {target_col}')
ax_single.legend()
product_figs[product] = fig_single
plt.close(fig_single)
return summary_df, product_figs
# =========================================================
# OPSIONAL: MODE CLI (tetap)
# =========================================================
if __name__ == "__main__":
print("Memulai Evaluasi Performa Model Inverse...")
summary_df, figs = evaluate_models_for_dashboard()
print("\n" + "="*40)
print("=== Ringkasan Performa Model ===")
print("="*40)
if not summary_df.empty:
print(summary_df.to_markdown(index=False, floatfmt=".4f"))
else:
print("Gagal memproses data atau model. Periksa pesan error di atas.")