Delima1 commited on
Commit
8d43b3c
·
verified ·
1 Parent(s): 3039730

Update src/MonitoringModel.py

Browse files
Files changed (1) hide show
  1. src/MonitoringModel.py +244 -244
src/MonitoringModel.py CHANGED
@@ -1,245 +1,245 @@
1
- import pandas as pd
2
- import numpy as np
3
- import joblib
4
- from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
5
- import matplotlib.pyplot as plt
6
- import seaborn as sns
7
- import os
8
-
9
- # =========================================================
10
- # KONFIGURASI GLOBAL (tetap)
11
- # =========================================================
12
- DATA_FILENAME = r'C:\Dokumen\One To Many_17_10_2025\MMBTU\DASHBOARD\One To Many\disagregasi_data_spraydryer_terbaru_10_17_2025.csv'
13
- MODEL_FOLDER = r'C:\Dokumen\One To Many_17_10_2025\MMBTU\DASHBOARD\One To Many\MODEL CHECKPOINT FOR INVERSE MODEL'
14
- TARGET_COLUMN = 'GAS_MMBTU_Disaggregated'
15
-
16
- PRODUCT_LIST = [
17
- 'BMR BASE',
18
- 'CKP BASE',
19
- 'CKR BASE',
20
- 'CMR BASE',
21
- 'MORIGRO BASE'
22
- ]
23
-
24
- FEATURES = [
25
- 'D101330TT',
26
- 'D102260TIC_CV',
27
- 'D102265TIC_PV',
28
- 'D102265TIC_CV',
29
- 'D102266TIC',
30
- 'D101264FTSCL'
31
- ]
32
-
33
- PREDICTION_COLUMN = 'Prediksi_Gas'
34
- MODEL_FILENAME_TEMPLATE = 'model_checkpoint_xgb_{}.joblib'
35
-
36
-
37
- # =========================================================
38
- # FUNGSI UTILITAS (tetap)
39
- # =========================================================
40
- def calculate_metrics(y_true, y_pred):
41
- """Menghitung R2, RMSE, dan MAE."""
42
- r2 = r2_score(y_true, y_pred)
43
- rmse = np.sqrt(mean_squared_error(y_true, y_pred))
44
- mae = mean_absolute_error(y_true, y_pred)
45
- return r2, rmse, mae
46
-
47
-
48
- def _load_model_for_product(model_dir, product):
49
- """Load model XGBoost + poly_transformer untuk satu produk."""
50
- model_path = os.path.join(model_dir, MODEL_FILENAME_TEMPLATE.format(product))
51
- if not os.path.exists(model_path):
52
- raise FileNotFoundError(f"File model tidak ditemukan: {model_path}")
53
-
54
- deployment_bundle = joblib.load(model_path)
55
-
56
- model = deployment_bundle.get('model')
57
- poly_transformer = deployment_bundle.get('poly_transformer')
58
- poly_feature_names = deployment_bundle.get('poly_feature_names')
59
-
60
- if model is None or poly_transformer is None or poly_feature_names is None:
61
- raise KeyError(
62
- "Bundle model tidak lengkap. Pastikan berisi "
63
- "'model', 'poly_transformer', dan 'poly_feature_names'."
64
- )
65
-
66
- return model, poly_transformer, poly_feature_names
67
-
68
-
69
- # =========================================================
70
- # FUNGSI UTAMA UNTUK DASHBOARD (PERBAIKAN)
71
- # =========================================================
72
- def evaluate_models_for_dashboard(
73
- data_path: str = DATA_FILENAME,
74
- model_dir: str = MODEL_FOLDER,
75
- products: list = None,
76
- features: list = None,
77
- target_col: str = TARGET_COLUMN,
78
- data_df=None, # <--- NEW: bisa kirim DataFrame langsung dari Streamlit
79
- ):
80
- """
81
- Fungsi utama yang melakukan evaluasi performa.
82
- Mengembalikan:
83
- - summary_df: DataFrame berisi [Product, R², RMSE, MAE]
84
- - product_figs: dict {product_name: matplotlib.figure.Figure}
85
-
86
- Prioritas data:
87
- 1) Jika data_df tidak None -> gunakan data_df (upload dari Streamlit)
88
- 2) Jika data_df None -> baca dari data_path (CSV default)
89
- """
90
- if products is None:
91
- products = PRODUCT_LIST
92
- if features is None:
93
- features = FEATURES
94
-
95
- # --- 1. Load data ---
96
- if data_df is not None:
97
- # Pakai dataset yang di-upload user (sudah dalam bentuk DataFrame)
98
- df = data_df.copy()
99
- else:
100
- # Fallback: baca dari CSV path seperti sebelumnya
101
- try:
102
- df = pd.read_csv(data_path)
103
- except FileNotFoundError:
104
- print(f"[ERROR] Data file tidak ditemukan di: {data_path}")
105
- return pd.DataFrame(columns=['Product', 'R²', 'RMSE', 'MAE']), {}
106
- except Exception as e:
107
- print(f"[ERROR] Gagal memuat data: {e}")
108
- return pd.DataFrame(columns=['Product', 'R²', 'RMSE', 'MAE']), {}
109
-
110
- # Pastikan Date_time ada dan dalam bentuk datetime (kalau mau pakai time-series)
111
- if 'Date_time' in df.columns:
112
- df['Date_time'] = pd.to_datetime(df['Date_time'], errors='coerce')
113
-
114
- summary_results = []
115
- plot_data_list = []
116
-
117
- # --- 2. Loop per produk ---
118
- for product in products:
119
- df_prod = df[df['Product'] == product].copy()
120
-
121
- if df_prod.empty or len(df_prod) < 2:
122
- continue
123
-
124
- missing_features = [f for f in features if f not in df_prod.columns]
125
- if missing_features:
126
- print(f"[WARN] Fitur hilang untuk {product}: {missing_features}")
127
- continue
128
-
129
- if 'Date_time' in df_prod.columns:
130
- df_prod = df_prod.sort_values('Date_time')
131
-
132
- X_raw = df_prod[features]
133
- y_true = df_prod[target_col]
134
-
135
- # --- 2a. Load model produk ---
136
- try:
137
- model, poly_transformer, poly_feature_names = _load_model_for_product(model_dir, product)
138
- except Exception as e:
139
- print(f"[WARN] Gagal load model untuk {product}: {e}")
140
- continue
141
-
142
- # --- 2b. Transformasi dan prediksi ---
143
- try:
144
- X_transformed_np = poly_transformer.transform(X_raw)
145
- X_transformed_df = pd.DataFrame(
146
- X_transformed_np,
147
- columns=poly_feature_names,
148
- index=X_raw.index
149
- )
150
- y_pred = model.predict(X_transformed_df)
151
- except Exception as e:
152
- print(f"[WARN] Gagal transform/predict untuk {product}: {e}")
153
- continue
154
-
155
- # --- 2c. Hitung metrik ---
156
- r2, rmse, mae = calculate_metrics(y_true, y_pred)
157
- summary_results.append({
158
- 'Product': product,
159
- 'R²': r2,
160
- 'RMSE': rmse,
161
- 'MAE': mae
162
- })
163
-
164
- # --- 2d. Siapkan data untuk plot ---
165
- plot_df = pd.DataFrame({
166
- 'Actual': y_true.values,
167
- 'Predicted': y_pred,
168
- 'Product': product
169
- })
170
- plot_data_list.append(plot_df)
171
-
172
- # --- 3. Buat summary_df ---
173
- if summary_results:
174
- summary_df = pd.DataFrame(summary_results)
175
- summary_df['Product'] = pd.Categorical(summary_df['Product'], categories=products, ordered=True)
176
- summary_df = summary_df.sort_values('Product').reset_index(drop=True)
177
- else:
178
- summary_df = pd.DataFrame(columns=['Product', 'R²', 'RMSE', 'MAE'])
179
- return summary_df, {}
180
-
181
- product_figs = {}
182
-
183
- # --- 4. Generate Figures (per produk, untuk Streamlit) ---
184
- if plot_data_list:
185
- all_plot_data = pd.concat(plot_data_list)
186
- products_evaluated = summary_df['Product'].tolist()
187
-
188
- sns.set_style("whitegrid")
189
-
190
- for product in products_evaluated:
191
- product_data = all_plot_data[all_plot_data['Product'] == product].dropna()
192
- if product_data.empty:
193
- continue
194
-
195
- metrics = summary_df[summary_df['Product'] == product].iloc[0]
196
- title = (f'{product}\n'
197
- f'$R^2$: {metrics["R²"]:.3f}, '
198
- f'RMSE: {metrics["RMSE"]:.3f}, '
199
- f'MAE: {metrics["MAE"]:.3f}')
200
-
201
- min_val = min(product_data['Actual'].min(), product_data['Predicted'].min())
202
- max_val = max(product_data['Actual'].max(), product_data['Predicted'].max())
203
- margin = (max_val - min_val) * 0.05
204
- plot_range = [min_val - margin, max_val + margin]
205
-
206
- # Figure tunggal per produk
207
- fig_single = plt.figure(figsize=(8, 6))
208
- ax_single = fig_single.add_subplot(111)
209
- sns.scatterplot(
210
- x='Actual',
211
- y='Predicted',
212
- data=product_data,
213
- ax=ax_single,
214
- alpha=0.6
215
- )
216
- ax_single.plot(plot_range, plot_range, 'r--', label='Ideal (Actual = Predicted)')
217
- ax_single.set_xlim(plot_range)
218
- ax_single.set_ylim(plot_range)
219
- ax_single.set_title(title)
220
- ax_single.set_xlabel(f'Actual {target_col}')
221
- ax_single.set_ylabel(f'Predicted {target_col}')
222
- ax_single.legend()
223
-
224
- product_figs[product] = fig_single
225
- plt.close(fig_single)
226
-
227
- return summary_df, product_figs
228
-
229
- # =========================================================
230
- # OPSIONAL: MODE CLI (tetap)
231
- # =========================================================
232
- if __name__ == "__main__":
233
-
234
- print("Memulai Evaluasi Performa Model Inverse...")
235
-
236
- summary_df, figs = evaluate_models_for_dashboard()
237
-
238
- print("\n" + "="*40)
239
- print("=== Ringkasan Performa Model ===")
240
- print("="*40)
241
-
242
- if not summary_df.empty:
243
- print(summary_df.to_markdown(index=False, floatfmt=".4f"))
244
- else:
245
  print("Gagal memproses data atau model. Periksa pesan error di atas.")
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import joblib
4
+ from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
5
+ import matplotlib.pyplot as plt
6
+ import seaborn as sns
7
+ import os
8
+
9
+ # =========================================================
10
+ # KONFIGURASI GLOBAL (tetap)
11
+ # =========================================================
12
+ DATA_FILENAME = r'src/disagregasi_data_spraydryer_terbaru_10_17_2025.csv'
13
+ MODEL_FOLDER = r'src/MODEL CHECKPOINT FOR INVERSE MODEL'
14
+ TARGET_COLUMN = 'GAS_MMBTU_Disaggregated'
15
+
16
+ PRODUCT_LIST = [
17
+ 'BMR BASE',
18
+ 'CKP BASE',
19
+ 'CKR BASE',
20
+ 'CMR BASE',
21
+ 'MORIGRO BASE'
22
+ ]
23
+
24
+ FEATURES = [
25
+ 'D101330TT',
26
+ 'D102260TIC_CV',
27
+ 'D102265TIC_PV',
28
+ 'D102265TIC_CV',
29
+ 'D102266TIC',
30
+ 'D101264FTSCL'
31
+ ]
32
+
33
+ PREDICTION_COLUMN = 'Prediksi_Gas'
34
+ MODEL_FILENAME_TEMPLATE = 'model_checkpoint_xgb_{}.joblib'
35
+
36
+
37
+ # =========================================================
38
+ # FUNGSI UTILITAS (tetap)
39
+ # =========================================================
40
+ def calculate_metrics(y_true, y_pred):
41
+ """Menghitung R2, RMSE, dan MAE."""
42
+ r2 = r2_score(y_true, y_pred)
43
+ rmse = np.sqrt(mean_squared_error(y_true, y_pred))
44
+ mae = mean_absolute_error(y_true, y_pred)
45
+ return r2, rmse, mae
46
+
47
+
48
+ def _load_model_for_product(model_dir, product):
49
+ """Load model XGBoost + poly_transformer untuk satu produk."""
50
+ model_path = os.path.join(model_dir, MODEL_FILENAME_TEMPLATE.format(product))
51
+ if not os.path.exists(model_path):
52
+ raise FileNotFoundError(f"File model tidak ditemukan: {model_path}")
53
+
54
+ deployment_bundle = joblib.load(model_path)
55
+
56
+ model = deployment_bundle.get('model')
57
+ poly_transformer = deployment_bundle.get('poly_transformer')
58
+ poly_feature_names = deployment_bundle.get('poly_feature_names')
59
+
60
+ if model is None or poly_transformer is None or poly_feature_names is None:
61
+ raise KeyError(
62
+ "Bundle model tidak lengkap. Pastikan berisi "
63
+ "'model', 'poly_transformer', dan 'poly_feature_names'."
64
+ )
65
+
66
+ return model, poly_transformer, poly_feature_names
67
+
68
+
69
+ # =========================================================
70
+ # FUNGSI UTAMA UNTUK DASHBOARD (PERBAIKAN)
71
+ # =========================================================
72
+ def evaluate_models_for_dashboard(
73
+ data_path: str = DATA_FILENAME,
74
+ model_dir: str = MODEL_FOLDER,
75
+ products: list = None,
76
+ features: list = None,
77
+ target_col: str = TARGET_COLUMN,
78
+ data_df=None, # <--- NEW: bisa kirim DataFrame langsung dari Streamlit
79
+ ):
80
+ """
81
+ Fungsi utama yang melakukan evaluasi performa.
82
+ Mengembalikan:
83
+ - summary_df: DataFrame berisi [Product, R², RMSE, MAE]
84
+ - product_figs: dict {product_name: matplotlib.figure.Figure}
85
+
86
+ Prioritas data:
87
+ 1) Jika data_df tidak None -> gunakan data_df (upload dari Streamlit)
88
+ 2) Jika data_df None -> baca dari data_path (CSV default)
89
+ """
90
+ if products is None:
91
+ products = PRODUCT_LIST
92
+ if features is None:
93
+ features = FEATURES
94
+
95
+ # --- 1. Load data ---
96
+ if data_df is not None:
97
+ # Pakai dataset yang di-upload user (sudah dalam bentuk DataFrame)
98
+ df = data_df.copy()
99
+ else:
100
+ # Fallback: baca dari CSV path seperti sebelumnya
101
+ try:
102
+ df = pd.read_csv(data_path)
103
+ except FileNotFoundError:
104
+ print(f"[ERROR] Data file tidak ditemukan di: {data_path}")
105
+ return pd.DataFrame(columns=['Product', 'R²', 'RMSE', 'MAE']), {}
106
+ except Exception as e:
107
+ print(f"[ERROR] Gagal memuat data: {e}")
108
+ return pd.DataFrame(columns=['Product', 'R²', 'RMSE', 'MAE']), {}
109
+
110
+ # Pastikan Date_time ada dan dalam bentuk datetime (kalau mau pakai time-series)
111
+ if 'Date_time' in df.columns:
112
+ df['Date_time'] = pd.to_datetime(df['Date_time'], errors='coerce')
113
+
114
+ summary_results = []
115
+ plot_data_list = []
116
+
117
+ # --- 2. Loop per produk ---
118
+ for product in products:
119
+ df_prod = df[df['Product'] == product].copy()
120
+
121
+ if df_prod.empty or len(df_prod) < 2:
122
+ continue
123
+
124
+ missing_features = [f for f in features if f not in df_prod.columns]
125
+ if missing_features:
126
+ print(f"[WARN] Fitur hilang untuk {product}: {missing_features}")
127
+ continue
128
+
129
+ if 'Date_time' in df_prod.columns:
130
+ df_prod = df_prod.sort_values('Date_time')
131
+
132
+ X_raw = df_prod[features]
133
+ y_true = df_prod[target_col]
134
+
135
+ # --- 2a. Load model produk ---
136
+ try:
137
+ model, poly_transformer, poly_feature_names = _load_model_for_product(model_dir, product)
138
+ except Exception as e:
139
+ print(f"[WARN] Gagal load model untuk {product}: {e}")
140
+ continue
141
+
142
+ # --- 2b. Transformasi dan prediksi ---
143
+ try:
144
+ X_transformed_np = poly_transformer.transform(X_raw)
145
+ X_transformed_df = pd.DataFrame(
146
+ X_transformed_np,
147
+ columns=poly_feature_names,
148
+ index=X_raw.index
149
+ )
150
+ y_pred = model.predict(X_transformed_df)
151
+ except Exception as e:
152
+ print(f"[WARN] Gagal transform/predict untuk {product}: {e}")
153
+ continue
154
+
155
+ # --- 2c. Hitung metrik ---
156
+ r2, rmse, mae = calculate_metrics(y_true, y_pred)
157
+ summary_results.append({
158
+ 'Product': product,
159
+ 'R²': r2,
160
+ 'RMSE': rmse,
161
+ 'MAE': mae
162
+ })
163
+
164
+ # --- 2d. Siapkan data untuk plot ---
165
+ plot_df = pd.DataFrame({
166
+ 'Actual': y_true.values,
167
+ 'Predicted': y_pred,
168
+ 'Product': product
169
+ })
170
+ plot_data_list.append(plot_df)
171
+
172
+ # --- 3. Buat summary_df ---
173
+ if summary_results:
174
+ summary_df = pd.DataFrame(summary_results)
175
+ summary_df['Product'] = pd.Categorical(summary_df['Product'], categories=products, ordered=True)
176
+ summary_df = summary_df.sort_values('Product').reset_index(drop=True)
177
+ else:
178
+ summary_df = pd.DataFrame(columns=['Product', 'R²', 'RMSE', 'MAE'])
179
+ return summary_df, {}
180
+
181
+ product_figs = {}
182
+
183
+ # --- 4. Generate Figures (per produk, untuk Streamlit) ---
184
+ if plot_data_list:
185
+ all_plot_data = pd.concat(plot_data_list)
186
+ products_evaluated = summary_df['Product'].tolist()
187
+
188
+ sns.set_style("whitegrid")
189
+
190
+ for product in products_evaluated:
191
+ product_data = all_plot_data[all_plot_data['Product'] == product].dropna()
192
+ if product_data.empty:
193
+ continue
194
+
195
+ metrics = summary_df[summary_df['Product'] == product].iloc[0]
196
+ title = (f'{product}\n'
197
+ f'$R^2$: {metrics["R²"]:.3f}, '
198
+ f'RMSE: {metrics["RMSE"]:.3f}, '
199
+ f'MAE: {metrics["MAE"]:.3f}')
200
+
201
+ min_val = min(product_data['Actual'].min(), product_data['Predicted'].min())
202
+ max_val = max(product_data['Actual'].max(), product_data['Predicted'].max())
203
+ margin = (max_val - min_val) * 0.05
204
+ plot_range = [min_val - margin, max_val + margin]
205
+
206
+ # Figure tunggal per produk
207
+ fig_single = plt.figure(figsize=(8, 6))
208
+ ax_single = fig_single.add_subplot(111)
209
+ sns.scatterplot(
210
+ x='Actual',
211
+ y='Predicted',
212
+ data=product_data,
213
+ ax=ax_single,
214
+ alpha=0.6
215
+ )
216
+ ax_single.plot(plot_range, plot_range, 'r--', label='Ideal (Actual = Predicted)')
217
+ ax_single.set_xlim(plot_range)
218
+ ax_single.set_ylim(plot_range)
219
+ ax_single.set_title(title)
220
+ ax_single.set_xlabel(f'Actual {target_col}')
221
+ ax_single.set_ylabel(f'Predicted {target_col}')
222
+ ax_single.legend()
223
+
224
+ product_figs[product] = fig_single
225
+ plt.close(fig_single)
226
+
227
+ return summary_df, product_figs
228
+
229
+ # =========================================================
230
+ # OPSIONAL: MODE CLI (tetap)
231
+ # =========================================================
232
+ if __name__ == "__main__":
233
+
234
+ print("Memulai Evaluasi Performa Model Inverse...")
235
+
236
+ summary_df, figs = evaluate_models_for_dashboard()
237
+
238
+ print("\n" + "="*40)
239
+ print("=== Ringkasan Performa Model ===")
240
+ print("="*40)
241
+
242
+ if not summary_df.empty:
243
+ print(summary_df.to_markdown(index=False, floatfmt=".4f"))
244
+ else:
245
  print("Gagal memproses data atau model. Periksa pesan error di atas.")