delima1234-Sunbright commited on
Commit
5e0490f
·
0 Parent(s):

KMI Dashboard

Browse files
Dashboard.py ADDED
@@ -0,0 +1,1675 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import streamlit as st
3
+ import pandas as pd
4
+ import numpy as np
5
+ import joblib
6
+ import os
7
+ from datetime import datetime
8
+ from sklearn.preprocessing import MinMaxScaler
9
+ import openpyxl
10
+ from scipy.optimize import differential_evolution
11
+ from MonitoringModel import (
12
+ evaluate_models_for_dashboard,
13
+ DATA_FILENAME,
14
+ MODEL_FOLDER,
15
+ PRODUCT_LIST,
16
+ FEATURES,
17
+ TARGET_COLUMN,
18
+ )
19
+ from eda_functions import (
20
+ compute_eda_summary,
21
+ create_line_plots,
22
+ identify_outliers,
23
+ compute_stats_table,
24
+ compute_anomaly_table,
25
+ compute_production_segments,
26
+ )
27
+
28
+ from Inverse_Model import (
29
+ AVAILABLE_PRODUCTS,
30
+ run_inverse_for_targets,
31
+ results_to_dataframe,
32
+ )
33
+ from Disagregasi_mmbtu import run_disagregasi_pipeline
34
+ from filter_rule_engine import apply_rule_engine
35
+ from prediksi_model_inverse import predict_forward_from_params
36
+
37
+ st.set_page_config(
38
+ page_title="Sistem Prediksi & Rekomendasi Parameter Gas (MMBTU)",
39
+ layout="wide"
40
+ )
41
+
42
+ AVAILABLE_PRODUCTS = ["BMR BASE", "CKP BASE", "CKR BASE", "CMR BASE", "MORIGRO BASE"]
43
+
44
+ # Konfigurasi fitur tetap sama untuk semua model
45
+ INPUT_FEATURES = [
46
+ "D101330TT", "D102260TIC_CV", "D102265TIC_PV",
47
+ "D102265TIC_CV", "D102266TIC", "D101264FTSCL"
48
+ ]
49
+ LAG_STEPS = [1, 2, 3, 6, 12, 24]
50
+ ROLL_WINDOWS = [3, 6, 12, 24]
51
+ CONTEXT_WINDOW = max(max(LAG_STEPS), max(ROLL_WINDOWS)) # 24
52
+
53
+ # =========================
54
+ # FUNGSI UTILITAS
55
+ # =========================
56
+ def create_temporal_features(df, lag_cols, rolling_cols):
57
+ """Membuat fitur berbasis waktu (lag, rolling, kalender)."""
58
+ df_featured = df.copy()
59
+
60
+ # Fitur kalender
61
+ if "Date_time" in df_featured.columns:
62
+ dt = pd.to_datetime(df_featured["Date_time"], errors="coerce")
63
+ df_featured["minute"] = dt.dt.minute
64
+ df_featured["hour"] = dt.dt.hour
65
+ df_featured["day_of_week"] = dt.dt.dayofweek
66
+ df_featured["month"] = dt.dt.month
67
+ df_featured["day_of_month"] = dt.dt.day
68
+
69
+ # Fitur lag
70
+ for col in lag_cols:
71
+ if col in df_featured.columns:
72
+ for lag in LAG_STEPS:
73
+ df_featured[f"{col}_lag_{lag}"] = df_featured[col].shift(lag)
74
+
75
+ # Fitur rolling
76
+ for col in rolling_cols:
77
+ if col in df_featured.columns:
78
+ s = df_featured[col]
79
+ for w in ROLL_WINDOWS:
80
+ rolled = s.rolling(window=w, min_periods=w)
81
+ df_featured[f"{col}_rolling_mean_{w}"] = rolled.mean()
82
+ df_featured[f"{col}_rolling_std_{w}"] = rolled.std()
83
+ df_featured[f"{col}_rolling_min_{w}"] = rolled.min()
84
+ df_featured[f"{col}_rolling_max_{w}"] = rolled.max()
85
+
86
+ return df_featured
87
+
88
+
89
+ def select_context_history(df_product_history, user_row_dict, input_cols, context_window=CONTEXT_WINDOW):
90
+ """
91
+ Mencari data historis paling mirip HANYA dalam data produk yang relevan.
92
+ Mengembalikan: context_df, indeks terbaik, jarak minimum.
93
+ """
94
+ hist = df_product_history.copy()
95
+ hist = hist.sort_values("Date_time").reset_index(drop=True)
96
+
97
+ # Pastikan data cukup untuk scaling
98
+ if len(hist) < 2:
99
+ return hist, 0, 0.0 # Kembalikan apa adanya jika data tidak cukup
100
+
101
+ scaler = MinMaxScaler()
102
+ hist_scaled = scaler.fit_transform(hist[input_cols])
103
+
104
+ user_vec = np.array([[user_row_dict[col] for col in input_cols]], dtype=float)
105
+ user_scaled = scaler.transform(user_vec)
106
+
107
+ deltas = hist_scaled - user_scaled
108
+ dists = np.sqrt(np.sum(deltas**2, axis=1))
109
+
110
+ best_idx = int(np.argmin(dists))
111
+ start_idx = max(0, best_idx - (context_window - 1))
112
+ context_df = hist.iloc[start_idx:best_idx + 1].copy()
113
+
114
+ return context_df, best_idx, float(dists[best_idx])
115
+
116
+
117
+ def load_artifacts_and_history(model_folder, data_source, selected_product):
118
+ """
119
+ Memuat model yang tepat dan memfilter data historis untuk produk yang dipilih.
120
+ data_source bisa berupa:
121
+ - string path CSV
122
+ - UploadedFile dari st.file_uploader
123
+ """
124
+ # Nama file model berdasarkan produk, contoh: ckr_base_checkpoint.pkl
125
+ product_file_name = f"{selected_product.lower().replace(' ', '_')}_checkpoint.pkl"
126
+ model_path = os.path.join(model_folder, product_file_name)
127
+
128
+ if not os.path.exists(model_path):
129
+ st.error(f"❌ File model tidak ditemukan di: {model_path}")
130
+ return None, None, None
131
+
132
+ # Load model
133
+ artifacts = joblib.load(model_path)
134
+ model = artifacts["model"]
135
+ feature_columns = artifacts["features"]
136
+
137
+ # Load data historis
138
+ try:
139
+ if isinstance(data_source, str):
140
+ df_raw = pd.read_csv(data_source)
141
+ else:
142
+ # Asumsikan ini UploadedFile dari Streamlit
143
+ df_raw = pd.read_csv(data_source)
144
+ except Exception as e:
145
+ st.error(f"❌ Gagal membaca file CSV historis. Error: {e}")
146
+ return None, None, None
147
+
148
+ if "Product" not in df_raw.columns:
149
+ st.error("Kolom 'Product' tidak ditemukan di data historis.")
150
+ return None, None, None
151
+
152
+ df_product = df_raw[df_raw["Product"] == selected_product].copy()
153
+ if df_product.empty:
154
+ st.error(f"Tidak ada data historis untuk produk '{selected_product}'.")
155
+ return None, None, None
156
+
157
+ if "Date_time" not in df_product.columns:
158
+ st.error("Kolom 'Date_time' tidak ditemukan di data historis.")
159
+ return None, None, None
160
+
161
+ df_product["Date_time"] = pd.to_datetime(df_product["Date_time"], errors="coerce")
162
+ df_product = df_product.dropna(subset=["Date_time"]).sort_values("Date_time").reset_index(drop=True)
163
+
164
+ return model, feature_columns, df_product
165
+
166
+ # =========================
167
+ # HALAMAN 1: Prediksi Gas dari 6 Parameter
168
+ # =========================
169
+ def page_prediksi_gas_dari_6_parameter():
170
+ st.subheader("1️⃣ Dashboard Prediksi Konsumsi Gas (MMBTU)")
171
+
172
+ st.markdown(
173
+ """
174
+ Halaman ini digunakan untuk memprediksi **konsumsi gas (MMBTU)**
175
+ berdasarkan **6 parameter proses** pada spray dryer.
176
+ """
177
+ )
178
+
179
+ # ---------- Konfigurasi Model & Data (DI MAIN PAGE, BUKAN SIDEBAR) ----------
180
+ st.markdown("### 🔧 Konfigurasi Data & Model")
181
+
182
+ config_col1, config_col2 = st.columns(2)
183
+
184
+ with config_col1:
185
+ selected_product = st.selectbox(
186
+ "Pilih Produk",
187
+ AVAILABLE_PRODUCTS,
188
+ index=2 # default CKR BASE
189
+ )
190
+
191
+ model_folder = st.text_input(
192
+ "Folder Model Checkpoints",
193
+ value="MODEL CHECKPOINT MANY TO ONE",
194
+ help="Folder tempat file-file model *.pkl disimpan."
195
+ )
196
+
197
+ with config_col2:
198
+ data_source_option = st.radio(
199
+ "Sumber Data Historis",
200
+ ["Path File CSV", "Upload File CSV"],
201
+ horizontal=True
202
+ )
203
+
204
+ data_source = None
205
+ if data_source_option == "Path File CSV":
206
+ data_file_path = st.text_input(
207
+ "Path File Data CSV (historis)",
208
+ value=r"disagregasi_data_spraydryer_terbaru_10_17_2025.csv"
209
+ )
210
+ if data_file_path:
211
+ data_source = data_file_path
212
+ else:
213
+ uploaded_file = st.file_uploader(
214
+ "Upload File CSV Historis",
215
+ type=["csv"]
216
+ )
217
+ if uploaded_file is not None:
218
+ data_source = uploaded_file
219
+
220
+ st.markdown("---")
221
+
222
+ # Jika data_source belum ada, jangan lanjut ke prediksi
223
+ if data_source is None:
224
+ st.info("ℹ️ Silakan pilih sumber data historis (path atau upload CSV) untuk melanjutkan.")
225
+ return
226
+
227
+ # ---------- Load Model & Data Historis ----------
228
+ with st.spinner("📦 Memuat model & data historis..."):
229
+ model, feature_columns, df_history = load_artifacts_and_history(
230
+ model_folder=model_folder,
231
+ data_source=data_source,
232
+ selected_product=selected_product
233
+ )
234
+
235
+ if (model is None) or (df_history is None):
236
+ return # pesan error sudah ditampilkan di fungsi loader
237
+
238
+ if len(df_history) < CONTEXT_WINDOW:
239
+ st.error(
240
+ f"Data historis untuk '{selected_product}' kurang dari {CONTEXT_WINDOW} baris "
241
+ f"(hanya {len(df_history)}). Prediksi mungkin tidak akurat."
242
+ )
243
+ return
244
+
245
+ # ---------- Input Parameter dari User ----------
246
+ st.markdown("### 🧪 Masukkan 6 Parameter Input (Data Baru)")
247
+
248
+ c1, c2, c3 = st.columns(3)
249
+
250
+ with c1:
251
+ v_D101330TT = st.number_input("Temperature Outlet Chamber (D101330TT)", value=95.0, format="%.4f")
252
+ v_D102265TIC_PV = st.number_input("Temperature Inlet Chamber (D102265TIC_PV)", value=185.0, format="%.4f")
253
+
254
+ with c2:
255
+ v_D102260TIC_CV = st.number_input("High Pressure Steam Damper (D102260TIC_CV)", value=45.0, format="%.4f")
256
+ v_D102265TIC_CV = st.number_input("Low Pressure Steam Damper (D102265TIC_CV)", value=17.0, format="%.4f")
257
+
258
+ with c3:
259
+ v_D102266TIC = st.number_input("Dehumidifier Temperature (D102266TIC)", value=16.0, format="%.4f")
260
+ v_D101264FTSCL = st.number_input("Flow Feed Dryer (D101264FTSCL)", value=3800.0, format="%.4f")
261
+
262
+ st.markdown("---")
263
+
264
+ # ---------- Tombol Prediksi ----------
265
+ if st.button("🔮 Prediksi Konsumsi Gas (MMBTU)", type="primary", use_container_width=True):
266
+ # Susun baris input user
267
+ user_row = {
268
+ "Date_time": pd.to_datetime(datetime.now()),
269
+ "Product": selected_product,
270
+ "D101330TT": v_D101330TT,
271
+ "D102260TIC_CV": v_D102260TIC_CV,
272
+ "D102265TIC_PV": v_D102265TIC_PV,
273
+ "D102265TIC_CV": v_D102265TIC_CV,
274
+ "D102266TIC": v_D102266TIC,
275
+ "D101264FTSCL": v_D101264FTSCL,
276
+ }
277
+
278
+ with st.spinner(f"🔎 Mencari konteks historis paling mirip di data '{selected_product}'..."):
279
+ ctx_df, best_idx, best_dist = select_context_history(
280
+ df_history,
281
+ user_row,
282
+ INPUT_FEATURES,
283
+ context_window=CONTEXT_WINDOW
284
+ )
285
+
286
+ #st.info(
287
+ #f"Konteks historis paling mirip ditemukan pada index ke-**{best_idx}** "
288
+ #f"(jarak: **{best_dist:.6f}**) dengan timestamp: "
289
+ #f"**{ctx_df.iloc[-1]['Date_time']}**"
290
+ #)
291
+
292
+ with st.spinner("🧩 Membentuk fitur temporal & melakukan prediksi..."):
293
+ # Gabungkan konteks historis dan baris user
294
+ df_new = pd.DataFrame([user_row])
295
+ df_combined = pd.concat([ctx_df, df_new], ignore_index=True)
296
+
297
+ # Buat fitur temporal
298
+ df_featured = create_temporal_features(
299
+ df_combined,
300
+ lag_cols=INPUT_FEATURES,
301
+ rolling_cols=INPUT_FEATURES
302
+ )
303
+
304
+ # Ambil baris terakhir sebagai input final
305
+ final_input_row = df_featured.tail(1)
306
+
307
+ # Cek fitur yang dibutuhkan model
308
+ missing = [c for c in feature_columns if c not in final_input_row.columns]
309
+ if missing:
310
+ st.error(f"⚠️ Beberapa fitur yang dibutuhkan model tidak tersedia: {missing}")
311
+ return
312
+
313
+ # Cek NaN
314
+ if final_input_row[feature_columns].isnull().values.any():
315
+ st.warning(
316
+ "Input akhir mengandung nilai NaN. "
317
+ "Ini bisa terjadi jika konteks historis tidak cukup panjang "
318
+ "atau data historis memiliki gap."
319
+ )
320
+ st.dataframe(final_input_row[feature_columns].T)
321
+ return
322
+
323
+ # Prediksi
324
+ X_pred = final_input_row[feature_columns]
325
+ y_pred = model.predict(X_pred)
326
+
327
+ st.metric(
328
+ f"✅ Hasil Prediksi Konsumsi GAS MMBTU untuk {selected_product}",
329
+ f"{float(y_pred[0]):.6f} MMBTU"
330
+ )
331
+
332
+ with st.expander("🔍 Lihat Input Fitur Final yang Digunakan untuk Prediksi"):
333
+ st.dataframe(X_pred)
334
+
335
+ with st.expander("📈 Lihat Konteks Historis yang Dipakai"):
336
+ st.dataframe(ctx_df.tail(CONTEXT_WINDOW))
337
+
338
+ def load_inverse_from_csv(csv_path: str):
339
+ """
340
+ Membaca file CSV hasil inverse (global untuk semua produk).
341
+ Return: DataFrame atau None kalau file tidak ada / kosong.
342
+ """
343
+ if not os.path.exists(csv_path):
344
+ return None
345
+
346
+ try:
347
+ df = pd.read_csv(csv_path)
348
+ if df.empty:
349
+ return None
350
+ return df
351
+ except Exception as e:
352
+ st.warning(f"Gagal membaca CSV hasil inverse: {e}")
353
+ return None
354
+
355
+
356
+ def append_inverse_to_csv(df_new: pd.DataFrame, csv_path: str):
357
+ """
358
+ Menambahkan df_new ke file CSV hasil inverse.
359
+
360
+ Perilaku:
361
+ - Kalau file belum ada → dibuat baru
362
+ - Kalau sudah ada → dibaca, diselaraskan kolomnya, lalu dikonkaten dan disimpan ulang
363
+ - Kolom yang belum ada di file lama akan ditambahkan otomatis
364
+ """
365
+ if df_new is None or df_new.empty:
366
+ return
367
+
368
+ if os.path.exists(csv_path):
369
+ try:
370
+ df_existing = pd.read_csv(csv_path)
371
+ except Exception as e:
372
+ st.warning(
373
+ f"Gagal membaca CSV lama, akan overwrite dengan hasil baru. Error: {e}"
374
+ )
375
+ df_new.to_csv(csv_path, index=False)
376
+ return
377
+
378
+ # Samakan set kolom antara existing dan new
379
+ all_cols = sorted(set(df_existing.columns).union(df_new.columns))
380
+ df_existing = df_existing.reindex(columns=all_cols)
381
+ df_new = df_new.reindex(columns=all_cols)
382
+
383
+ df_all = pd.concat([df_existing, df_new], ignore_index=True)
384
+ df_all.to_csv(csv_path, index=False)
385
+ else:
386
+ # File belum ada → buat baru
387
+ df_new.to_csv(csv_path, index=False)
388
+
389
+ # =========================
390
+ # HALAMAN 2–6 (STUB SEMENTARA)
391
+ # =========================
392
+ def page_prediksi_parameter_dari_gas():
393
+ st.subheader("2️⃣ Prediksi Parameter dari Gas (MMBTU)")
394
+ st.markdown(
395
+ """
396
+ Halaman ini digunakan untuk **mencari kombinasi 6 parameter proses** yang paling optimal
397
+ untuk mencapai **target konsumsi Gas (MMBTU)** tertentu, kemudian
398
+ memvalidasi hasilnya menggunakan **forward model XGBoost many-to-one**.
399
+ """
400
+ )
401
+
402
+ # ====== Layout dua kolom utama ======
403
+ col_left, col_right = st.columns([2, 1])
404
+
405
+ # -----------------------------
406
+ # BAGIAN KIRI – INVERSE MODEL
407
+ # -----------------------------
408
+ with col_left:
409
+ st.markdown("### 🔁 Inverse Model – Parameter Recommendation")
410
+
411
+ # 1. Pilih Produk
412
+ selected_product = st.selectbox(
413
+ "Pilih Produk",
414
+ AVAILABLE_PRODUCTS,
415
+ index=AVAILABLE_PRODUCTS.index("CKR BASE") if "CKR BASE" in AVAILABLE_PRODUCTS else 0,
416
+ key="inv_product_select"
417
+ )
418
+
419
+ # 2. Input Target Gas
420
+ target_mmbtu = st.number_input(
421
+ "Target Gas Consumption (MMBTU)",
422
+ min_value=0.10,
423
+ max_value=0.50,
424
+ value=0.29,
425
+ step=0.0001,
426
+ format="%.4f",
427
+ key="inv_target_mmbtu"
428
+ )
429
+
430
+ # 3. Konfigurasi path model & Excel
431
+ # 3. Konfigurasi path model & CSV
432
+ st.markdown("#### ⚙️ Konfigurasi Model & Database Hasil")
433
+
434
+ default_model_folder = r"MODEL CHECKPOINT FOR INVERSE MODEL"
435
+ model_folder = st.text_input(
436
+ "Folder Model Checkpoint XGBoost (Many-to-One)",
437
+ value=default_model_folder,
438
+ help="Folder berisi file model_checkpoint_xgb_{PRODUCT}.joblib"
439
+ )
440
+
441
+ default_csv_path = r"Hasil_Inverse_Model.csv"
442
+ csv_path = st.text_input(
443
+ "File CSV Hasil Inverse Model",
444
+ value=default_csv_path,
445
+ help="Semua produk disimpan dalam satu file CSV dengan kolom 'Product' dan 'Target_MMBTU'."
446
+ )
447
+
448
+ tol = st.number_input(
449
+ "Toleransi pencarian target di CSV (±)",
450
+ min_value=0.0,
451
+ max_value=0.01,
452
+ value=0.0005,
453
+ step=0.0001,
454
+ format="%.4f",
455
+ help="Misal 0.0005 → akan mencari baris dengan |Target_MMBTU - target| ≤ 0.0005"
456
+ )
457
+
458
+ # Tombol utama
459
+ run_btn = st.button(
460
+ "🔍 Cari / Optimasi Parameter",
461
+ type="primary",
462
+ use_container_width=True
463
+ )
464
+
465
+ # Variabel untuk dikirim ke kolom kanan
466
+ last_result_row = None
467
+
468
+ if run_btn:
469
+ # ------- 3. Lookup di CSV dulu -------
470
+ df_cache = load_inverse_from_csv(csv_path)
471
+ found_from_cache = False
472
+ last_result_row = None
473
+
474
+ if df_cache is not None:
475
+ # Pastikan kolom Product ada
476
+ if "Product" not in df_cache.columns:
477
+ st.info(
478
+ "Kolom 'Product' tidak ditemukan di CSV hasil inverse. "
479
+ "Akan menjalankan optimasi baru."
480
+ )
481
+ else:
482
+ df_prod = df_cache[df_cache["Product"] == selected_product].copy()
483
+ if df_prod.empty:
484
+ st.info(
485
+ f"Tidak ada histori inverse untuk produk '{selected_product}' "
486
+ f"di CSV. Akan menjalankan optimasi baru."
487
+ )
488
+ else:
489
+ # Cari kolom target (utama: 'Target_MMBTU', fallback: nama lain)
490
+ target_col = None
491
+ for c in df_prod.columns:
492
+ if c.lower() in [
493
+ "target_mmbtu",
494
+ "target",
495
+ "target_gas",
496
+ "target_gas_mmbtu",
497
+ "target_input",
498
+ ]:
499
+ target_col = c
500
+ break
501
+
502
+ if target_col is not None:
503
+ diffs = (df_prod[target_col] - target_mmbtu).abs()
504
+ mask = diffs <= tol
505
+ if mask.any():
506
+ df_match = df_prod.loc[mask].copy()
507
+ df_match["__diff__"] = (df_match[target_col] - target_mmbtu).abs()
508
+ df_match = df_match.sort_values("__diff__")
509
+ row = df_match.iloc[0].drop(labels="__diff__")
510
+ last_result_row = row
511
+ found_from_cache = True
512
+ st.success(
513
+ "✅ Rekomendasi parameter ditemukan di database CSV "
514
+ "(tanpa perlu menjalankan Differential Evolution)."
515
+ )
516
+ else:
517
+ st.info(
518
+ "ℹ️ Tidak ditemukan target yang mendekati di CSV. "
519
+ "Akan menjalankan optimasi baru."
520
+ )
521
+ else:
522
+ st.info(
523
+ "Kolom target tidak ditemukan di CSV hasil inverse. "
524
+ "Akan menjalankan optimasi baru."
525
+ )
526
+ else:
527
+ st.info(
528
+ "File CSV hasil inverse belum ada. Akan dibuat setelah optimasi pertama."
529
+ )
530
+
531
+ # ------- 4. Jika tidak ditemukan → jalankan optimasi real-time -------
532
+ # ------- 4. Jika tidak ditemukan di CSV → jalankan optimasi real-time -------
533
+ if not found_from_cache:
534
+ model_filename = f"model_checkpoint_xgb_{selected_product}.joblib"
535
+ model_path = os.path.join(model_folder, model_filename)
536
+
537
+ if not os.path.exists(model_path):
538
+ st.error(f"❌ File model tidak ditemukan: {model_path}")
539
+ return
540
+
541
+ st.info(
542
+ "Sedang mencari kombinasi parameter paling optimal untuk "
543
+ "mencapai target Gas Consumption Anda."
544
+ )
545
+ with st.spinner(
546
+ "Menjalankan Differential Evolution untuk inverse model..."
547
+ ):
548
+ # Jalankan inverse hanya untuk 1 target
549
+ results = run_inverse_for_targets(
550
+ model_path, selected_product, [target_mmbtu]
551
+ )
552
+ df_new = results_to_dataframe(results, selected_product)
553
+
554
+ # --- Pastikan struktur kolom minimal untuk CSV global ---
555
+ # 1) Tambah kolom Product
556
+ df_new["Product"] = selected_product
557
+
558
+ # 2) Normalisasi kolom target → 'Target_MMBTU'
559
+ target_col = None
560
+ for c in df_new.columns:
561
+ if c.lower() in [
562
+ "target_mmbtu",
563
+ "target",
564
+ "target_gas",
565
+ "target_gas_mmbtu",
566
+ "target_input",
567
+ ]:
568
+ target_col = c
569
+ break
570
+
571
+ if target_col is None:
572
+ df_new["Target_MMBTU"] = float(target_mmbtu)
573
+ else:
574
+ if target_col != "Target_MMBTU":
575
+ df_new["Target_MMBTU"] = df_new[target_col]
576
+
577
+ # 3) Tambah kolom-kolom penting jika belum ada
578
+ required_cols = [
579
+ "Level",
580
+ "Predicted_MMBTU",
581
+ "Error",
582
+ "Error_Pct",
583
+ "Objective_Value",
584
+ "Converged",
585
+ "Iterations",
586
+ "Soft_Violations",
587
+ ]
588
+ for col in required_cols:
589
+ if col not in df_new.columns:
590
+ df_new[col] = np.nan
591
+
592
+ # Ambil baris pertama sebagai hasil terakhir
593
+ last_result_row = df_new.iloc[0]
594
+
595
+ # 4) Simpan ke CSV (append)
596
+ try:
597
+ append_inverse_to_csv(df_new, csv_path)
598
+ st.success(
599
+ "✅ Hasil optimasi baru berhasil disimpan ke CSV "
600
+ "(Hasil_Inverse_Model)."
601
+ )
602
+ except Exception as e:
603
+ st.error(f"Gagal menyimpan hasil ke CSV: {e}")
604
+
605
+
606
+ # ------- 5. Tampilkan hasil dalam format tabel -------
607
+ if last_result_row is not None:
608
+ # Simpan ke session_state agar bisa diakses kolom kanan
609
+ st.session_state["last_inverse_result"] = {
610
+ "product": selected_product,
611
+ "target": float(target_mmbtu),
612
+ "row": last_result_row.to_dict()
613
+ }
614
+
615
+ # Ambil nilai-nilai parameter
616
+ row_dict = last_result_row.to_dict()
617
+
618
+ # (Note) Prediksi & error di Excel tidak lagi dipakai untuk forward,
619
+ # tapi masih boleh ditampilkan sebagai informasi historis
620
+ pred_col = None
621
+ for c in row_dict.keys():
622
+ if c.lower() in ["predicted_mmbtu", "prediction", "prediction_mmbtu"]:
623
+ pred_col = c
624
+ break
625
+
626
+ prediction_val = row_dict.get(pred_col, None)
627
+ error_val = row_dict.get("Error", None)
628
+
629
+ display_row = {
630
+ "D101330TT": row_dict.get("D101330TT", np.nan),
631
+ "D102260TIC_CV": row_dict.get("D102260TIC_CV", np.nan),
632
+ "D102265TIC_CV": row_dict.get("D102265TIC_CV", np.nan),
633
+ "D102265TIC_PV": row_dict.get("D102265TIC_PV", np.nan),
634
+ "D102266TIC": row_dict.get("D102266TIC", np.nan),
635
+ "D101264FTSCL": row_dict.get("D101264FTSCL", np.nan),
636
+ "Prediction (MMBTU) [Excel/Inverse]": prediction_val,
637
+ "Error (MMBTU) [Excel/Inverse]": error_val,
638
+ "Target Input": float(target_mmbtu),
639
+ }
640
+
641
+ st.markdown("#### 📊 Hasil Rekomendasi Parameter")
642
+ st.dataframe(pd.DataFrame([display_row]), use_container_width=True)
643
+
644
+ with st.expander("🔍 Detail Lengkap Hasil Inverse Model (Raw)"):
645
+ st.json(row_dict)
646
+ else:
647
+ st.warning("Tidak ada hasil yang bisa ditampilkan.")
648
+
649
+ # ---------------------------------------------
650
+ # BAGIAN KANAN – FORWARD MODELLING (VALIDASI)
651
+ # ---------------------------------------------
652
+ with col_right:
653
+ st.markdown("### 📈 Forward Modelling – Validasi XGBoost Many-to-One")
654
+
655
+ info_box = st.empty()
656
+
657
+ if "last_inverse_result" not in st.session_state:
658
+ info_box.info(
659
+ "Belum ada hasil inverse model.\n\n"
660
+ "Silakan jalankan **Cari / Optimasi Parameter** di sisi kiri terlebih dahulu."
661
+ )
662
+ return
663
+
664
+ # Ambil hasil terakhir dari inverse model
665
+ last_res = st.session_state["last_inverse_result"]
666
+ product_name = last_res["product"]
667
+ target_input = last_res["target"]
668
+ row_dict = last_res["row"]
669
+
670
+ info_box.success(f"Validasi forward model untuk **{product_name}** (Target: {target_input:.4f} MMBTU)")
671
+
672
+ # Tampilkan parameter yang digunakan forward model
673
+ st.markdown("#### Parameter Input ke Forward Model")
674
+ param_df = pd.DataFrame([{
675
+ "D101330TT": row_dict.get("D101330TT", np.nan),
676
+ "D102260TIC_CV": row_dict.get("D102260TIC_CV", np.nan),
677
+ "D102265TIC_PV": row_dict.get("D102265TIC_PV", np.nan),
678
+ "D102265TIC_CV": row_dict.get("D102265TIC_CV", np.nan),
679
+ "D102266TIC": row_dict.get("D102266TIC", np.nan),
680
+ "D101264FTSCL": row_dict.get("D101264FTSCL", np.nan),
681
+ }])
682
+ st.dataframe(param_df, use_container_width=True)
683
+
684
+ # 🔁 Jalankan forward model beneran (bukan baca dari Excel)
685
+ st.markdown("#### ✅ Hasil Prediksi Forward Model (Recomputed)")
686
+
687
+ try:
688
+ forward_input = {
689
+ "D101330TT": float(row_dict.get("D101330TT", np.nan)),
690
+ "D102260TIC_CV": float(row_dict.get("D102260TIC_CV", np.nan)),
691
+ "D102265TIC_PV": float(row_dict.get("D102265TIC_PV", np.nan)),
692
+ "D102265TIC_CV": float(row_dict.get("D102265TIC_CV", np.nan)),
693
+ "D102266TIC": float(row_dict.get("D102266TIC", np.nan)),
694
+ "D101264FTSCL": float(row_dict.get("D101264FTSCL", np.nan)),
695
+ }
696
+
697
+ pred_val = predict_forward_from_params(product_name, forward_input, model_folder)
698
+ err_val = float(pred_val) - float(target_input)
699
+
700
+ m1, m2 = st.columns(2)
701
+ with m1:
702
+ st.metric("Prediksi GAS (MMBTU)", f"{pred_val:.6f}")
703
+ with m2:
704
+ st.metric("Error terhadap Target", f"{err_val:+.6f}")
705
+
706
+ except Exception as e:
707
+ st.error(f"Terjadi error saat menghitung ulang prediksi forward: {e}")
708
+
709
+ # --------------------------------------------------
710
+ # 🔽 BAGIAN: SIMULASI PREDIKSI (FORWARD MODELLING)
711
+ # --------------------------------------------------
712
+ st.markdown("---")
713
+ st.markdown("### 🧪 Simulasi Prediksi Konsumsi Gas (Forward Modelling)")
714
+
715
+ st.caption(
716
+ "Pilih produk dan masukkan nilai 6 parameter proses secara manual untuk mensimulasikan "
717
+ "prediksi konsumsi gas (MMBTU) menggunakan model XGBoost Many-to-One."
718
+ )
719
+
720
+ # Pilih produk untuk simulasi – default ke produk dari inverse terakhir
721
+ sim_product = st.selectbox(
722
+ "Produk untuk Simulasi Forward",
723
+ AVAILABLE_PRODUCTS,
724
+ index=AVAILABLE_PRODUCTS.index(product_name) if product_name in AVAILABLE_PRODUCTS else 0,
725
+ key="sim_product_select"
726
+ )
727
+
728
+ # Nilai default:
729
+ if sim_product == product_name:
730
+ default_vals = {
731
+ "D101330TT": float(row_dict.get("D101330TT", 95.0)),
732
+ "D102260TIC_CV": float(row_dict.get("D102260TIC_CV", 45.0)),
733
+ "D102265TIC_PV": float(row_dict.get("D102265TIC_PV", 185.0)),
734
+ "D102265TIC_CV": float(row_dict.get("D102265TIC_CV", 17.0)),
735
+ "D102266TIC": float(row_dict.get("D102266TIC", 16.0)),
736
+ "D101264FTSCL": float(row_dict.get("D101264FTSCL", 3800.0)),
737
+ }
738
+ else:
739
+ default_vals = {
740
+ "D101330TT": 95.0,
741
+ "D102260TIC_CV": 45.0,
742
+ "D102265TIC_PV": 185.0,
743
+ "D102265TIC_CV": 17.0,
744
+ "D102266TIC": 16.0,
745
+ "D101264FTSCL": 3800.0,
746
+ }
747
+
748
+ s1, s2, s3 = st.columns(3)
749
+ with s1:
750
+ sim_D101330TT = st.number_input(
751
+ "D101330TT",
752
+ value=default_vals["D101330TT"],
753
+ format="%.4f",
754
+ key="sim_D101330TT"
755
+ )
756
+ sim_D102265TIC_PV = st.number_input(
757
+ "D102265TIC_PV",
758
+ value=default_vals["D102265TIC_PV"],
759
+ format="%.4f",
760
+ key="sim_D102265TIC_PV"
761
+ )
762
+ with s2:
763
+ sim_D102260TIC_CV = st.number_input(
764
+ "D102260TIC_CV",
765
+ value=default_vals["D102260TIC_CV"],
766
+ format="%.4f",
767
+ key="sim_D102260TIC_CV"
768
+ )
769
+ sim_D102265TIC_CV = st.number_input(
770
+ "D102265TIC_CV",
771
+ value=default_vals["D102265TIC_CV"],
772
+ format="%.4f",
773
+ key="sim_D102265TIC_CV"
774
+ )
775
+ with s3:
776
+ sim_D102266TIC = st.number_input(
777
+ "D102266TIC",
778
+ value=default_vals["D102266TIC"],
779
+ format="%.4f",
780
+ key="sim_D102266TIC"
781
+ )
782
+ sim_D101264FTSCL = st.number_input(
783
+ "D101264FTSCL",
784
+ value=default_vals["D101264FTSCL"],
785
+ format="%.4f",
786
+ key="sim_D101264FTSCL"
787
+ )
788
+
789
+ sim_btn = st.button(
790
+ "▶️ Jalankan Simulasi Prediksi GAS (MMBTU)",
791
+ type="primary",
792
+ use_container_width=True,
793
+ key="sim_forward_btn"
794
+ )
795
+
796
+ if sim_btn:
797
+ sim_input = {
798
+ "D101330TT": sim_D101330TT,
799
+ "D102260TIC_CV": sim_D102260TIC_CV,
800
+ "D102265TIC_PV": sim_D102265TIC_PV,
801
+ "D102265TIC_CV": sim_D102265TIC_CV,
802
+ "D102266TIC": sim_D102266TIC,
803
+ "D101264FTSCL": sim_D101264FTSCL,
804
+ }
805
+
806
+ try:
807
+ y_sim = predict_forward_from_params(sim_product, sim_input, model_folder)
808
+
809
+ diff_from_target = None
810
+ if (sim_product == product_name) and (target_input is not None):
811
+ diff_from_target = float(y_sim) - float(target_input)
812
+
813
+ st.success(f"✅ Simulasi prediksi konsumsi GAS untuk produk **{sim_product}** berhasil.")
814
+ c_res1, c_res2 = st.columns(2)
815
+ with c_res1:
816
+ st.metric(
817
+ "Prediksi Konsumsi GAS (MMBTU)",
818
+ f"{float(y_sim):.6f}"
819
+ )
820
+ with c_res2:
821
+ if diff_from_target is not None:
822
+ st.metric(
823
+ "Selisih terhadap Target Inverse",
824
+ f"{diff_from_target:+.6f}"
825
+ )
826
+ else:
827
+ st.caption(
828
+ "Selisih terhadap target hanya dihitung jika produk simulasi sama dengan produk inverse terakhir."
829
+ )
830
+
831
+ except FileNotFoundError as e:
832
+ st.error(str(e))
833
+ except Exception as e:
834
+ st.error(f"Terjadi error saat menjalankan simulasi forward modelling: {e}")
835
+
836
+
837
+ def page_monitoring_model():
838
+ st.subheader("3️⃣ Evaluasi Performa Model Prediksi Gas (MMBTU) per Produk")
839
+
840
+ st.markdown(
841
+ """
842
+ Halaman ini menampilkan **ringkasan metrik performa model XGBoost** untuk setiap produk,
843
+ serta **grafik perbandingan Actual vs Predicted GAS_MMBTU**.
844
+
845
+ Kamu bisa:
846
+ - Menggunakan **dataset default** dari path lokal, atau
847
+ - Meng-upload **dataset terbaru (CSV)** untuk dievaluasi dengan model yang sama.
848
+ """
849
+ )
850
+
851
+ # --- Konfigurasi sumber data & model ---
852
+ st.markdown("#### ⚙️ Konfigurasi Sumber Data & Model")
853
+ col1, col2 = st.columns(2)
854
+ with col1:
855
+ data_path = st.text_input(
856
+ "Path Data Disaggregated (default)",
857
+ value=DATA_FILENAME,
858
+ help="Dipakai jika tidak ada file yang di-upload."
859
+ )
860
+ with col2:
861
+ model_dir = st.text_input(
862
+ "Folder Model Checkpoint",
863
+ value=MODEL_FOLDER,
864
+ help="Folder berisi file model_checkpoint_xgb_{PRODUCT}.joblib"
865
+ )
866
+
867
+ st.markdown("#### 📂 Upload Dataset Terbaru (Opsional)")
868
+ uploaded_file = st.file_uploader(
869
+ "Upload file CSV baru (struktur kolom harus sama dengan dataset sebelumnya)",
870
+ type=["csv"]
871
+ )
872
+
873
+ run_btn = st.button("🔎 Run Evaluation", type="primary", use_container_width=True)
874
+
875
+ if not run_btn:
876
+ st.info(
877
+ "• Upload dataset baru (opsional), lalu klik **Run Evaluation**\n\n"
878
+ "• Jika tidak upload apa-apa, sistem akan menggunakan **dataset default**"
879
+ )
880
+ return
881
+
882
+ # --- Siapkan data_df (jika ada upload) ---
883
+ data_df = None
884
+ if uploaded_file is not None:
885
+ try:
886
+ data_df = pd.read_csv(uploaded_file)
887
+ st.success("✅ Dataset baru berhasil dibaca dan akan digunakan untuk evaluasi.")
888
+ except Exception as e:
889
+ st.error(f"❌ Gagal membaca file CSV yang di-upload: {e}")
890
+ return
891
+ else:
892
+ st.warning("Tidak ada file yang di-upload. Sistem akan menggunakan dataset default dari path di atas.")
893
+
894
+ # --- Jalankan evaluasi ---
895
+ with st.spinner("Menghitung metrik performa dan menyiapkan grafik..."):
896
+ summary_df, product_figs = evaluate_models_for_dashboard(
897
+ data_path=data_path, # tetap dikirim sebagai fallback
898
+ model_dir=model_dir,
899
+ products=PRODUCT_LIST,
900
+ features=FEATURES,
901
+ target_col=TARGET_COLUMN,
902
+ data_df=data_df # <<-- NEW: jika None → pakai data_path
903
+ )
904
+
905
+ if summary_df.empty:
906
+ st.warning("Tidak ada hasil evaluasi yang dapat ditampilkan. Periksa kembali data dan model.")
907
+ return
908
+
909
+ # =====================================================
910
+ # BAGIAN 1 – Tabel Ringkasan Performa
911
+ # =====================================================
912
+ st.markdown("### 📊 Ringkasan Performa Model")
913
+
914
+ df_display = summary_df.copy()
915
+ df_display["R²"] = df_display["R²"].round(3)
916
+ df_display["RMSE"] = df_display["RMSE"].round(3)
917
+ df_display["MAE"] = df_display["MAE"].round(3)
918
+
919
+ def color_r2(val):
920
+ try:
921
+ v = float(val)
922
+ except Exception:
923
+ return ""
924
+ if v >= 0.90:
925
+ return "background-color: #d4edda; color: #155724;" # hijau
926
+ elif v >= 0.80:
927
+ return "background-color: #cce5ff; color: #004085;" # biru muda
928
+ else:
929
+ return "background-color: #fff3cd; color: #856404;" # kuning
930
+
931
+ styled = (
932
+ df_display.style
933
+ .applymap(color_r2, subset=["R²"])
934
+ .format({"R²": "{:.3f}", "RMSE": "{:.3f}", "MAE": "{:.3f}"})
935
+ )
936
+
937
+ st.dataframe(styled, use_container_width=True)
938
+
939
+ # =====================================================
940
+ # BAGIAN 2 – Grafik Actual vs Predicted per Produk
941
+ # =====================================================
942
+ st.markdown("### 📈 Grafik Actual vs Predicted per Produk")
943
+
944
+ tabs = st.tabs(PRODUCT_LIST)
945
+
946
+ for i, product in enumerate(PRODUCT_LIST):
947
+ with tabs[i]:
948
+ st.subheader(f"Actual vs Predicted GAS_MMBTU – {product}")
949
+
950
+ row = summary_df[summary_df["Product"] == product]
951
+ if not row.empty:
952
+ r2 = row["R²"].values[0]
953
+ rmse = row["RMSE"].values[0]
954
+ mae = row["MAE"].values[0]
955
+ st.caption(f"R² = {r2:.3f} | RMSE = {rmse:.3f} | MAE = {mae:.3f}")
956
+
957
+ fig = product_figs.get(product)
958
+ if fig is not None:
959
+ st.pyplot(fig, use_container_width=True)
960
+ else:
961
+ st.info("Tidak ada grafik untuk produk ini (mungkin data/model tidak tersedia).")
962
+
963
+
964
+
965
+ def page_eda():
966
+ st.subheader("4️⃣ Exploratory Data Analysis (EDA) – Data Konsumsi Gas Spray Dryer")
967
+
968
+ st.markdown(
969
+ """
970
+ Halaman ini digunakan untuk melakukan **Exploratory Data Analysis (EDA)** terhadap
971
+ dataset disagregasi spray dryer yang sama dengan halaman **Monitoring Model**.
972
+
973
+ Kamu bisa:
974
+ - Menggunakan **dataset default** dari path lokal, atau
975
+ - Meng-upload **dataset baru (CSV)** untuk dianalisis.
976
+ """
977
+ )
978
+
979
+ # ------------------------------------------------------
980
+ # 1. Konfigurasi sumber data
981
+ # ------------------------------------------------------
982
+ st.markdown("#### ⚙️ Konfigurasi Sumber Data")
983
+
984
+ col1, col2 = st.columns(2)
985
+ with col1:
986
+ data_path = st.text_input(
987
+ "Path Data Disaggregated (default)",
988
+ value=DATA_FILENAME,
989
+ help="Dipakai jika tidak ada file yang di-upload."
990
+ )
991
+ with col2:
992
+ st.caption(
993
+ "Pastikan struktur kolom sama dengan dataset yang digunakan di halaman **Monitoring Model** "
994
+ "(minimal kolom: `Date_time`, `Product`, parameter proses, dan kolom gas)."
995
+ )
996
+
997
+ st.markdown("#### 📂 Upload Dataset untuk EDA (Opsional)")
998
+ uploaded_file = st.file_uploader(
999
+ "Upload file CSV untuk dianalisis",
1000
+ type=["csv"],
1001
+ key="eda_upload",
1002
+ )
1003
+
1004
+ run_btn = st.button(
1005
+ "🔁 Refresh / Jalankan EDA",
1006
+ type="primary",
1007
+ use_container_width=True,
1008
+ key="eda_run_btn",
1009
+ )
1010
+
1011
+ if not run_btn:
1012
+ st.info(
1013
+ "• Upload dataset baru (opsional), kemudian klik **Refresh / Jalankan EDA**.\n\n"
1014
+ "• Jika tidak upload apa-apa, sistem akan menggunakan **dataset default** dari path di atas."
1015
+ )
1016
+ return
1017
+
1018
+ # ------------------------------------------------------
1019
+ # 2. Load data (upload > default path)
1020
+ # ------------------------------------------------------
1021
+ if uploaded_file is not None:
1022
+ try:
1023
+ df_raw = pd.read_csv(uploaded_file)
1024
+ st.success("✅ Dataset baru berhasil dibaca dan akan digunakan untuk EDA.")
1025
+ except Exception as e:
1026
+ st.error(f"❌ Gagal membaca file CSV yang di-upload: {e}")
1027
+ return
1028
+ else:
1029
+ try:
1030
+ df_raw = pd.read_csv(data_path)
1031
+ st.warning("Tidak ada file yang di-upload. Menggunakan dataset default dari path di atas.")
1032
+ except Exception as e:
1033
+ st.error(f"❌ Gagal membaca dataset default dari `{data_path}`: {e}")
1034
+ return
1035
+
1036
+ if df_raw.empty:
1037
+ st.error("Dataset kosong. Tidak ada yang bisa dianalisis.")
1038
+ return
1039
+
1040
+ # Pastikan kolom Date_time ada dan dikonversi
1041
+ if "Date_time" in df_raw.columns:
1042
+ df_raw["Date_time"] = pd.to_datetime(df_raw["Date_time"], errors="coerce")
1043
+ else:
1044
+ st.warning("Kolom 'Date_time' tidak ditemukan. EDA tetap berjalan, tapi fitur berbasis waktu terbatas.")
1045
+
1046
+ # ------------------------------------------------------
1047
+ # 3. Data Summary (metric cards)
1048
+ # ------------------------------------------------------
1049
+ PROCESS_PARAMS = [
1050
+ "D101330TT",
1051
+ "D102260TIC_CV",
1052
+ "D102265TIC_PV",
1053
+ "D102265TIC_CV",
1054
+ "D102266TIC",
1055
+ "D101264FTSCL",
1056
+ ]
1057
+
1058
+ summary = compute_eda_summary(df_raw, date_col="Date_time", product_col="Product")
1059
+
1060
+ date_min = summary["date_min"]
1061
+ date_max = summary["date_max"]
1062
+ date_range_text = "-"
1063
+ if pd.notna(date_min) and pd.notna(date_max):
1064
+ date_range_text = f"{date_min:%Y-%m-%d %H:%M} → {date_max:%Y-%m-%d %H:%M}"
1065
+
1066
+ st.markdown("### 📊 Data Summary")
1067
+
1068
+ m1, m2, m3, m4, m5 = st.columns(5)
1069
+ m1.metric("Total Rows", f"{summary['total_rows']:,}")
1070
+ m2.metric("Total Columns", f"{summary['total_columns']}")
1071
+ m3.metric("Total Missing Values", f"{summary['total_missing']:,}")
1072
+ m4.metric("Jumlah Unique Product", f"{summary['unique_products']}")
1073
+ m5.metric("Data Duplikat", f"{summary['duplicate_rows']:,}" if "duplicate_rows" in summary else "-")
1074
+
1075
+ with st.expander("📦 Detail Jumlah Data per Produk"):
1076
+ if summary["product_counts"]:
1077
+ product_count_df = (
1078
+ pd.DataFrame(list(summary["product_counts"].items()), columns=["Product", "Count"])
1079
+ .sort_values("Product")
1080
+ )
1081
+ st.dataframe(product_count_df, use_container_width=True)
1082
+ else:
1083
+ st.write("Tidak ada kolom 'Product' atau tidak ada data produk.")
1084
+
1085
+ # ------------------------------------------------------
1086
+ # 3b. Anomali data (error validasi / anomaly_df)
1087
+ # ------------------------------------------------------
1088
+ st.markdown("---")
1089
+ st.markdown("### ⚠️ Ringkasan Anomali Data")
1090
+
1091
+ anomaly_df = compute_anomaly_table(df_raw, product_col="Product")
1092
+ if anomaly_df.empty:
1093
+ st.info("Tidak ditemukan anomali berdasarkan rule yang didefinisikan.")
1094
+ else:
1095
+ st.dataframe(anomaly_df, use_container_width=True)
1096
+
1097
+ st.markdown("---")
1098
+
1099
+ # ------------------------------------------------------
1100
+ # 4. Segmen produksi (precompute sekali)
1101
+ # ------------------------------------------------------
1102
+ segments_df = compute_production_segments(df_raw, product_col="Product", time_col="Date_time")
1103
+
1104
+ # ------------------------------------------------------
1105
+ # 5. Tabs per produk (All Data + tiap produk)
1106
+ # ------------------------------------------------------
1107
+ st.markdown("### 🔍 EDA per Produk")
1108
+
1109
+ product_counts = summary["product_counts"]
1110
+ product_names = list(product_counts.keys()) if product_counts else []
1111
+
1112
+ # Urutan prioritas tab seperti requirement
1113
+ PRIORITY_PRODUCTS = ["BMR BASE", "CKP BASE", "CKR BASE", "CMR BASE", "MORIGRO BASE"]
1114
+
1115
+ ordered_products = (
1116
+ [p for p in PRIORITY_PRODUCTS if p in product_names]
1117
+ + [p for p in sorted(product_names) if p not in PRIORITY_PRODUCTS]
1118
+ )
1119
+
1120
+ tab_labels = ["All Data"] + ordered_products
1121
+ tabs = st.tabs(tab_labels)
1122
+
1123
+ def highlight_min_max_rows(row):
1124
+ if row.name == "min":
1125
+ return ["background-color: #f8d7da"] * len(row) # merah muda
1126
+ elif row.name == "max":
1127
+ return ["background-color: #d4edda"] * len(row) # hijau muda
1128
+ return [""] * len(row)
1129
+
1130
+ for idx, label in enumerate(tab_labels):
1131
+ with tabs[idx]:
1132
+ if label == "All Data":
1133
+ df_tab = df_raw.copy()
1134
+ title_suffix = "All Data"
1135
+ else:
1136
+ df_tab = df_raw[df_raw["Product"] == label].copy()
1137
+ title_suffix = label
1138
+
1139
+ if df_tab.empty:
1140
+ st.warning(f"Tidak ada data untuk: **{label}**")
1141
+ continue
1142
+
1143
+ # 1️⃣ Distribusi parameter utama (2x3 grid, matplotlib)
1144
+ st.markdown(f"#### 1️⃣ Distribusi Parameter Proses – {title_suffix}")
1145
+ fig_dist = create_line_plots(
1146
+ df_tab,
1147
+ params=PROCESS_PARAMS,
1148
+ product_label=title_suffix,
1149
+ time_col="Date_time",
1150
+ )
1151
+ st.pyplot(fig_dist, use_container_width=True)
1152
+
1153
+ # 2️⃣ Outlier detection & visualisasi (2x3 grid, matplotlib)
1154
+ st.markdown(f"#### 2️⃣ Outlier Detection & Visualisasi – {title_suffix}")
1155
+ fig_out, total_outliers, outlier_stats_df = identify_outliers(
1156
+ df_tab,
1157
+ PROCESS_PARAMS,
1158
+ product_label=title_suffix,
1159
+ time_col="Date_time",
1160
+ )
1161
+ st.pyplot(fig_out, use_container_width=True)
1162
+
1163
+ st.caption(
1164
+ f"Total outliers terdeteksi: **{total_outliers}** data points "
1165
+ "(metode IQR per parameter)."
1166
+ )
1167
+
1168
+ with st.expander("Klik untuk lihat detail outlier per parameter"):
1169
+ if not outlier_stats_df.empty:
1170
+ st.dataframe(outlier_stats_df, use_container_width=True)
1171
+ else:
1172
+ st.write("Tidak ada outlier terdeteksi untuk parameter yang dianalisis.")
1173
+
1174
+ # 3️⃣ Statistical description table
1175
+ st.markdown(f"#### 3️⃣ Statistical Description – {title_suffix}")
1176
+ desc_df = compute_stats_table(df_tab, PROCESS_PARAMS, target_col=TARGET_COLUMN)
1177
+
1178
+ if desc_df.empty:
1179
+ st.info("Tidak ada kolom numerik yang cukup untuk dihitung statistik deskriptif.")
1180
+ else:
1181
+ styled_desc = (
1182
+ desc_df.style
1183
+ .format("{:.3f}")
1184
+ .apply(highlight_min_max_rows, axis=0)
1185
+ )
1186
+ st.dataframe(styled_desc, use_container_width=True)
1187
+
1188
+ # 4️⃣ Segmen produksi per produk
1189
+ st.markdown(f"#### 4️⃣ Segmen Produksi – {title_suffix}")
1190
+
1191
+ if segments_df.empty or "Product" not in segments_df.columns:
1192
+ st.info("Segmen produksi tidak tersedia (kolom waktu/produk tidak lengkap).")
1193
+ else:
1194
+ if label == "All Data":
1195
+ seg_to_show = segments_df.copy()
1196
+ else:
1197
+ seg_to_show = segments_df[segments_df["Product"] == label].copy()
1198
+
1199
+ if seg_to_show.empty:
1200
+ st.info(f"Tidak ada segmen produksi untuk {title_suffix}.")
1201
+ else:
1202
+ seg_to_show = seg_to_show.sort_values(["Product", "Start_Time"])
1203
+ seg_to_show["Duration_Minutes"] = seg_to_show["Duration_Minutes"].round(1)
1204
+ st.dataframe(
1205
+ seg_to_show[["Product", "Start_Time", "End_Time", "Duration_Minutes", "Data_Points"]],
1206
+ use_container_width=True,
1207
+ )
1208
+ def disagregasi_data():
1209
+ st.subheader("5️⃣ Disagregasi Konsumsi Gas Spray Dryer")
1210
+
1211
+ st.markdown(
1212
+ """
1213
+ Halaman ini digunakan untuk mengubah data konsumsi gas **per jam** menjadi
1214
+ data **per menit** menggunakan **disagregasi berbasis bobot indikator proses**
1215
+ (weight split proporsional).
1216
+
1217
+ **Alur:**
1218
+ 1. Upload file CSV (struktur sama dengan data training, minimal kolom:
1219
+ `Date_time`, `fixed_rounded_time`, `GAS_MMBTU`, dan 6 indikator proses).
1220
+ 2. Klik tombol **Jalankan Proses Disagregasi Sekarang**.
1221
+ 3. Sistem menjalankan pipeline disagregasi dan menampilkan ringkasan + hasil dalam bentuk CSV yang bisa diunduh.
1222
+ """
1223
+ )
1224
+
1225
+ st.markdown("### 📂 Upload Data Sumber (Per Jam)")
1226
+
1227
+ uploaded_file = st.file_uploader(
1228
+ "Unggah file CSV data konsumsi gas (per jam) untuk didisagregasi",
1229
+ type="csv",
1230
+ key="disagg_upload",
1231
+ )
1232
+
1233
+ if uploaded_file is None:
1234
+ st.info(
1235
+ "Silakan unggah file CSV terlebih dahulu.\n\n"
1236
+ "Pastikan file memiliki kolom:\n"
1237
+ "- `Date_time`\n"
1238
+ "- `fixed_rounded_time` (timestamp jam, misal: 2025-03-18 01:00:00)\n"
1239
+ "- `GAS_MMBTU` (total gas per jam)\n"
1240
+ "- 6 indikator proses: `D101330TT`, `D102260TIC_CV`, `D102265TIC_PV`, "
1241
+ "`D102265TIC_CV`, `D102266TIC`, `D101264FTSCL`"
1242
+ )
1243
+ return
1244
+
1245
+ # Tombol utama
1246
+ run_btn = st.button(
1247
+ "🚀 Jalankan Proses Disagregasi Sekarang",
1248
+ type="primary",
1249
+ use_container_width=True,
1250
+ key="disagg_run_btn",
1251
+ )
1252
+
1253
+ if not run_btn:
1254
+ return
1255
+
1256
+ # Jalankan pipeline
1257
+ with st.spinner("Sedang melakukan disagregasi... Mohon tunggu 10–20 detik"):
1258
+ df_result, report = run_disagregasi_pipeline(uploaded_file, min_minutes_threshold=50)
1259
+
1260
+ if df_result.empty:
1261
+ st.error("❌ Proses disagregasi gagal atau tidak ada jam yang memenuhi kriteria validasi.")
1262
+ reason = report.get("reason", "")
1263
+ if reason == "no_valid_hours":
1264
+ st.warning(
1265
+ "Tidak ada jam dengan jumlah menit ≥ ambang batas (default 50 menit). "
1266
+ "Silakan cek kembali data input."
1267
+ )
1268
+ return
1269
+
1270
+ st.markdown("## ✅ SELESAI! Proses Disagregasi Berhasil")
1271
+
1272
+ # ===============================
1273
+ # RINGKASAN EKSEKUSI PIPELINE
1274
+ # ===============================
1275
+ st.markdown("### 📊 Ringkasan Eksekusi Pipeline")
1276
+
1277
+ total_jam = report.get("total_jam_input", 0)
1278
+ jam_valid = report.get("jam_valid", 0)
1279
+ jam_tidak_valid = report.get("jam_tidak_valid", 0)
1280
+ total_baris_input = report.get("total_baris_input", 0)
1281
+ total_baris_diproses = report.get("total_baris_diproses", 0)
1282
+ persen_diproses = report.get("persentase_data_diproses", 0.0)
1283
+ total_selisih = report.get("total_selisih_disagregasi", 0.0)
1284
+
1285
+ cols = st.columns(6)
1286
+ cols[0].metric("Total jam dalam dataset", f"{total_jam}")
1287
+ cols[1].metric("Jam valid (diproses)", f"{jam_valid}", "Success")
1288
+ cols[2].metric("Jam tidak valid (skip)", f"{jam_tidak_valid}", "Warning")
1289
+ cols[3].metric(
1290
+ "Total baris yang diproses",
1291
+ f"{total_baris_diproses:,} / {total_baris_input:,}"
1292
+ )
1293
+ cols[4].metric(
1294
+ "Persentase data yang digunakan",
1295
+ f"{persen_diproses:.2f}%",
1296
+ "Success" if persen_diproses >= 95 else ""
1297
+ )
1298
+ cols[5].metric(
1299
+ "Ak. disagregasi (total selisih)",
1300
+ f"{total_selisih:.10f}",
1301
+ "Success" if total_selisih < 1e-8 else "Periksa"
1302
+ )
1303
+
1304
+ # ===============================
1305
+ # TAHAP-Tahap Proses
1306
+ # ===============================
1307
+ st.markdown("### 🧩 Tahap-Tahap Proses Disagregasi")
1308
+
1309
+ # Tahap 1: Validasi Jam Produksi
1310
+ st.success("**Tahap 1: Validasi Jam Produksi**")
1311
+ st.write(
1312
+ f"- Total jam dalam dataset: **{total_jam} jam**\n"
1313
+ f"- Jam valid (diproses): **{jam_valid} jam**\n"
1314
+ f"- Jam tidak valid (di-skip karena durasi < 50 menit): **{jam_tidak_valid} jam**"
1315
+ )
1316
+
1317
+ jam_tidak_valid_detail = report.get("jam_tidak_valid_detail")
1318
+ if jam_tidak_valid_detail is not None and not jam_tidak_valid_detail.empty:
1319
+ with st.expander("📋 Lihat Daftar Jam Tidak Valid"):
1320
+ jam_tidak_valid_detail_sorted = jam_tidak_valid_detail.sort_values(
1321
+ "Jumlah_Menit"
1322
+ )
1323
+ st.dataframe(jam_tidak_valid_detail_sorted, use_container_width=True)
1324
+
1325
+ # Tahap 2 & 3: Perhitungan Bobot & Disagregasi
1326
+ st.info("**Tahap 2 & 3: Perhitungan Bobot & Disagregasi**")
1327
+ st.write(
1328
+ "- Indikator proses yang digunakan:\n"
1329
+ " `D101330TT`, `D102260TIC_CV`, `D102265TIC_PV`, "
1330
+ "`D102265TIC_CV`, `D102266TIC`, `D101264FTSCL`"
1331
+ )
1332
+ weight_min = report.get("weight_min", None)
1333
+ weight_max = report.get("weight_max", None)
1334
+ if weight_min is not None and weight_max is not None:
1335
+ st.write(
1336
+ f"- Rentang bobot (w_m): **{weight_min:.4f} → {weight_max:.4f}**\n"
1337
+ "- Disagregasi selesai dilakukan untuk seluruh jam valid."
1338
+ )
1339
+
1340
+ # Tahap 4: Validasi Akurasi
1341
+ st.success("**Tahap 4: Validasi Akurasi (Kunci Keberhasilan!)**")
1342
+ validation_df = report.get("validation_df")
1343
+ if validation_df is not None and not validation_df.empty:
1344
+ max_diff = validation_df["Difference"].abs().max()
1345
+ st.write(
1346
+ f"- Total jam divalidasi: **{len(validation_df)} jam**\n"
1347
+ f"- Total selisih (original vs hasil): **{total_selisih:.10f}**\n"
1348
+ f"- Selisih maksimum per jam: **{max_diff:.10f}**"
1349
+ )
1350
+ if total_selisih < 1e-8:
1351
+ st.success("→ **VALIDASI BERHASIL: Total gas terjaga SEMPURNA (akurasi ~100%)**")
1352
+
1353
+ with st.expander("🔎 Lihat Laporan Validasi Lengkap per Jam"):
1354
+ st.dataframe(validation_df, use_container_width=True)
1355
+
1356
+ # Tahap 5: Output Final
1357
+ st.info("**Tahap 5: Output Final**")
1358
+ n_rows, n_cols = df_result.shape
1359
+ st.write(
1360
+ f"- Dimensi data hasil: **{n_rows:,} baris × {n_cols} kolom**\n"
1361
+ "- Kolom baru: `GAS_MMBTU_Disaggregated`\n"
1362
+ "- Semua kolom asli tetap dipertahankan."
1363
+ )
1364
+
1365
+ # ===============================
1366
+ # Analisis Hasil Disagregasi
1367
+ # ===============================
1368
+ st.markdown("### 📈 Analisis Hasil Disagregasi")
1369
+
1370
+ stats = report.get("gas_disagg_stats", None)
1371
+ if stats is not None:
1372
+ st.markdown("**Statistik Konsumsi Gas per Menit (Setelah Disagregasi)**")
1373
+ st.markdown(
1374
+ "```text\n"
1375
+ f"Mean : {stats['mean']:.3f} MMBTU/menit\n"
1376
+ f"Std : {stats['std']:.3f}\n"
1377
+ f"Min : {stats['min']:.3f}\n"
1378
+ f"25% : {stats['25%']:.3f}\n"
1379
+ f"50% : {stats['50%']:.3f}\n"
1380
+ f"75% : {stats['75%']:.3f}\n"
1381
+ f"Max : {stats['max']:.3f}\n"
1382
+ "```"
1383
+ )
1384
+
1385
+ top_hours = report.get("top_hours", None)
1386
+ if top_hours is not None and not top_hours.empty:
1387
+ st.markdown("**10 Jam dengan Konsumsi Gas Tertinggi (Total per Jam)**")
1388
+ df_top = top_hours.reset_index()
1389
+ df_top.columns = ["fixed_rounded_time", "Total_GAS_MMBTU"]
1390
+ st.dataframe(df_top, use_container_width=True)
1391
+
1392
+ zero_hours = report.get("zero_hours", None)
1393
+ if zero_hours is not None and not zero_hours.empty:
1394
+ st.markdown("**Jam dengan Konsumsi Gas = 0 (Kemungkinan Shutdown)**")
1395
+ df_zero = zero_hours.reset_index()
1396
+ df_zero.columns = ["fixed_rounded_time", "Total_GAS_MMBTU"]
1397
+ st.dataframe(df_zero, use_container_width=True)
1398
+
1399
+ # ===============================
1400
+ # Tombol Aksi
1401
+ # ===============================
1402
+ st.markdown("### 📥 Aksi Lanjutan")
1403
+
1404
+ csv_bytes = df_result.to_csv(index=False).encode("utf-8")
1405
+ st.download_button(
1406
+ "📥 Download Data Hasil Disagregasi (CSV)",
1407
+ data=csv_bytes,
1408
+ file_name="hasil_disagregasi_spraydryer.csv",
1409
+ mime="text/csv",
1410
+ use_container_width=True,
1411
+ )
1412
+
1413
+ def filter_rule_engine():
1414
+ st.title("Filter Data dengan Rule Engine")
1415
+ st.markdown("**Upload file hasil disagregasi → Otomatis deteksi & hapus anomali → Download data bersih**")
1416
+
1417
+ uploaded_file = st.file_uploader("Upload file CSV hasil disagregasi", type="csv")
1418
+
1419
+ if uploaded_file is not None:
1420
+ df = pd.read_csv(uploaded_file)
1421
+ st.success(f"File berhasil diupload: {len(df):,} baris")
1422
+
1423
+ if st.button("Jalankan Rule Engine", type="primary", use_container_width=True):
1424
+ with st.spinner("Mendeteksi anomali..."):
1425
+ df_clean, df_anomalies, summary = apply_rule_engine(df)
1426
+
1427
+ st.success("Rule Engine selesai!")
1428
+ st.balloons()
1429
+
1430
+ # --- Ringkasan ---
1431
+ col1, col2, col3, col4 = st.columns(4)
1432
+ col1.metric("Total Baris Awal", f"{summary['total_rows_initial relator']:,}")
1433
+ col2.metric("Baris Bersih", f"{summary['total_rows_clean']:,}")
1434
+ col3.metric("Anomali Dihapus", f"{summary['total_anomalies']:,}",
1435
+ f"-{summary['anomaly_percentage']}%")
1436
+ col4.metric("Data Bersih", f"{100 - summary['anomaly_percentage']:.2f}%")
1437
+
1438
+ # --- Detail Anomali ---
1439
+ st.subheader("Detail Jenis Anomali yang Dihapus")
1440
+ breakdown_df = pd.DataFrame([
1441
+ {"Jenis Anomali": reason, "Jumlah": count, "Persentase": f"{count/len(df)*100:.2f}%"}
1442
+ for reason, count in summary['anomaly_breakdown'].items()
1443
+ ])
1444
+ st.dataframe(breakdown_df, use_container_width=True, hide_index=True)
1445
+
1446
+ # --- Download ---
1447
+ csv_clean = df_clean.to_csv(index=False).encode()
1448
+ csv_anomalies = df_anomalies.to_csv(index=False).encode()
1449
+
1450
+ col1, col2 = st.columns(2)
1451
+ with col1:
1452
+ st.download_button(
1453
+ "Download Data Bersih (Siap Modelling)",
1454
+ csv_clean,
1455
+ "data_bersih_spray_dryer.csv",
1456
+ "text/csv"
1457
+ )
1458
+ with col2:
1459
+ st.download_button(
1460
+ "Download Data Anomali (untuk Review)",
1461
+ csv_anomalies,
1462
+ "data_anomali_dihapus.csv",
1463
+ "text/csv"
1464
+ )
1465
+
1466
+ with st.expander("Lihat contoh baris yang dihapus"):
1467
+ st.dataframe(df_anomalies[['Date_time', 'anomaly_reason']].head(20))
1468
+
1469
+ def filter_rule_engine():
1470
+ st.subheader("6️⃣ Filter Data dengan Rule Engine – Deteksi & Penghapusan Anomali")
1471
+ st.caption("Memastikan data berkualitas yang digunakan untuk analisis dan modelling.")
1472
+
1473
+ st.markdown(
1474
+ """
1475
+ Halaman ini digunakan untuk melakukan **pembersihan data otomatis** menggunakan
1476
+ **Rule Engine** berbasis domain knowledge, dengan input berupa
1477
+ **file hasil disagregasi** (sudah memiliki kolom `GAS_MMBTU_Disaggregated`).
1478
+ """
1479
+ )
1480
+
1481
+ st.markdown("### 📂 Upload Data Hasil Disagregasi")
1482
+ uploaded_file = st.file_uploader(
1483
+ "Upload file CSV hasil disagregasi (per menit, sudah ada kolom GAS_MMBTU_Disaggregated)",
1484
+ type="csv",
1485
+ key="rule_engine_upload",
1486
+ )
1487
+
1488
+ if uploaded_file is None:
1489
+ st.info(
1490
+ "Silakan upload file CSV hasil disagregasi terlebih dahulu.\n\n"
1491
+ "Pastikan minimal ada kolom:\n"
1492
+ "- `Date_time`\n"
1493
+ "- `Product`\n"
1494
+ "- `GAS_MMBTU_Disaggregated`\n"
1495
+ "- Parameter proses utama: `D101330TT`, `D102260TIC_CV`, `D102265TIC_PV`, "
1496
+ "`D102265TIC_CV`, `D102266TIC`, `D101264FTSCL`, `D101463PIC_PV`"
1497
+ )
1498
+ return
1499
+
1500
+ run_btn = st.button(
1501
+ "🚦 Jalankan Rule Engine",
1502
+ type="primary",
1503
+ use_container_width=True,
1504
+ key="rule_engine_run_btn",
1505
+ )
1506
+
1507
+ if not run_btn:
1508
+ return
1509
+
1510
+ # ---------------------------------------------------
1511
+ # Jalankan Rule Engine
1512
+ # ---------------------------------------------------
1513
+ try:
1514
+ df_input = pd.read_csv(uploaded_file)
1515
+ except Exception as e:
1516
+ st.error(f"❌ Gagal membaca file CSV: {e}")
1517
+ return
1518
+
1519
+ with st.spinner("Mendeteksi anomali dan melakukan pembersihan data..."):
1520
+ df_clean, df_anomalies, summary = apply_rule_engine(df_input)
1521
+
1522
+ st.success("Rule Engine selesai dijalankan! Data siap digunakan untuk analisis & modelling.")
1523
+ st.balloons()
1524
+
1525
+ total_initial = summary.get("total_rows_initial", len(df_input))
1526
+ total_after = summary.get("total_rows_after_filter", len(df_clean))
1527
+ total_removed = summary.get("total_rows_removed", total_initial - total_after)
1528
+ percent_clean = summary.get("percent_clean", (total_after / total_initial * 100 if total_initial > 0 else 0.0))
1529
+ num_anomaly_types = summary.get("num_anomaly_types", 0)
1530
+ cip_removed = summary.get("cip_rows_removed", 0)
1531
+ rule_removed = summary.get("rule_rows_removed", total_removed - cip_removed)
1532
+
1533
+ # ---------------------------------------------------
1534
+ # Ringkasan Hasil Pembersihan Data (6 metric)
1535
+ # ---------------------------------------------------
1536
+ st.markdown("### 📊 Ringkasan Hasil Pembersihan Data")
1537
+
1538
+ m1, m2, m3, m4, m5 = st.columns(5)
1539
+
1540
+ m1.metric("Total baris awal", f"{total_initial:,}")
1541
+ m2.metric("Total baris setelah filter", f"{total_after:,}")
1542
+ m3.metric("Total baris dihapus (termasuk CIP)", f"{total_removed:,}")
1543
+ m4.metric("Jumlah jenis anomali", f"{num_anomaly_types}")
1544
+ m5.metric(
1545
+ "Total baris dihapus tanpa CIP",
1546
+ f"{rule_removed:,}",
1547
+ delta=f"{cip_removed:,} baris CIP",)
1548
+
1549
+
1550
+ # ---------------------------------------------------
1551
+ # Detail Anomali yang Terdeteksi
1552
+ # ---------------------------------------------------
1553
+ st.markdown("### 🧾 Detail Anomali yang Terdeteksi")
1554
+
1555
+ if df_anomalies.empty:
1556
+ st.info("Tidak ada baris yang dihapus oleh rule engine. Semua data dianggap bersih.")
1557
+ else:
1558
+ if "anomaly_reason" not in df_anomalies.columns:
1559
+ st.warning("Kolom 'anomaly_reason' tidak ditemukan di df_anomalies. Breakdown jenis anomali tidak dapat ditampilkan.")
1560
+ else:
1561
+ # Group by jenis anomali
1562
+ rows = []
1563
+ grouped = df_anomalies.groupby("anomaly_reason")
1564
+ for i, (reason, g) in enumerate(grouped, start=1):
1565
+ count = len(g)
1566
+ pct = (count / total_initial * 100) if total_initial > 0 else 0.0
1567
+
1568
+ # Contoh baris (tanggal/waktu) – ambil sampai 3
1569
+ if "Date_time" in g.columns:
1570
+ dt_series = pd.to_datetime(g["Date_time"], errors="coerce").dropna()
1571
+ examples = ", ".join(dt_series.astype(str).head(3).tolist())
1572
+ else:
1573
+ examples = "-"
1574
+
1575
+ rows.append({
1576
+ "No": i,
1577
+ "Jenis Anomali": reason,
1578
+ "Jumlah Baris": count,
1579
+ "Persentase": f"{pct:.2f}%",
1580
+ "Contoh Baris (Tanggal/Waktu)": examples,
1581
+ "Status": "Dihapus",
1582
+ })
1583
+
1584
+ detail_df = pd.DataFrame(rows)
1585
+
1586
+ st.dataframe(
1587
+ detail_df,
1588
+ use_container_width=True,
1589
+ hide_index=True,
1590
+ )
1591
+
1592
+ # ---------------------------------------------------
1593
+ # Tombol Aksi (Download)
1594
+ # ---------------------------------------------------
1595
+ st.markdown("### 📥 Aksi Lanjutan")
1596
+
1597
+ col_dl1, col_dl2 = st.columns(2)
1598
+ csv_clean = df_clean.to_csv(index=False).encode("utf-8")
1599
+ csv_anom = df_anomalies.to_csv(index=False).encode("utf-8")
1600
+
1601
+ with col_dl1:
1602
+ st.download_button(
1603
+ "📥 Download Data Bersih (Siap Modelling)",
1604
+ data=csv_clean,
1605
+ file_name="data_bersih_spray_dryer_rule_engine.csv",
1606
+ mime="text/csv",
1607
+ use_container_width=True,
1608
+ )
1609
+
1610
+ with col_dl2:
1611
+ st.download_button(
1612
+ "📥 Download Data Anomali yang Dihapus",
1613
+ data=csv_anom,
1614
+ file_name="data_anomali_spray_dryer_rule_engine.csv",
1615
+ mime="text/csv",
1616
+ use_container_width=True,
1617
+ )
1618
+
1619
+ # ---------------------------------------------------
1620
+ # Expander: Lihat Semua Baris yang Dihapus
1621
+ # ---------------------------------------------------
1622
+ with st.expander("🔍 Lihat Semua Baris yang Dihapus (Detail Anomali)"):
1623
+ if df_anomalies.empty:
1624
+ st.write("Tidak ada baris yang dihapus.")
1625
+ else:
1626
+ cols_to_show = ["Date_time", "Product", "anomaly_reason"]
1627
+ cols_existing = [c for c in cols_to_show if c in df_anomalies.columns]
1628
+ other_cols = [c for c in df_anomalies.columns if c not in cols_existing]
1629
+
1630
+ st.dataframe(
1631
+ df_anomalies[cols_existing + other_cols],
1632
+ use_container_width=True,
1633
+ )
1634
+
1635
+
1636
+ def main():
1637
+ # Judul besar sistem (selalu tampil di atas)
1638
+ st.title("Sistem Prediksi dan Rekomendasi Parameter Berdasarkan Input Gas Consumption (MMBTU)")
1639
+
1640
+ st.caption(
1641
+ "Platform internal untuk memprediksi konsumsi gas dan merekomendasikan parameter proses "
1642
+ "berdasarkan histori operasi spray dryer."
1643
+ )
1644
+
1645
+ # Sidebar navigasi utama
1646
+ with st.sidebar:
1647
+ st.header("📂 Menu Utama")
1648
+ menu = st.radio(
1649
+ "Pilih Halaman",
1650
+ [
1651
+ "1. Prediksi Gas dari 6 Parameter",
1652
+ "2. Prediksi Parameter dari Gas (MMBTU)",
1653
+ "3. Monitoring Model",
1654
+ "4. EDA",
1655
+ "5. Disagregasi Data",
1656
+ "6. Filter Rule Engine"
1657
+ ]
1658
+ )
1659
+
1660
+ # Ringkasan di bagian atas main page (opsional, bisa kamu modif)
1661
+ if menu.startswith("1."):
1662
+ page_prediksi_gas_dari_6_parameter()
1663
+ elif menu.startswith("2."):
1664
+ page_prediksi_parameter_dari_gas()
1665
+ elif menu.startswith("3."):
1666
+ page_monitoring_model()
1667
+ elif menu.startswith("4."):
1668
+ page_eda()
1669
+ elif menu.startswith("5."):
1670
+ disagregasi_data()
1671
+ elif menu.startswith("6."):
1672
+ filter_rule_engine()
1673
+
1674
+ if __name__ == "__main__":
1675
+ main()
Disagregasi_mmbtu.py ADDED
@@ -0,0 +1,321 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from sklearn.preprocessing import MinMaxScaler
4
+ import warnings
5
+ warnings.filterwarnings('ignore')
6
+
7
+ def pipeline_disagregasi(Data_Terbaru, min_minutes_threshold=50):
8
+ """
9
+ Pipeline untuk disagregasi data dengan validasi jam produksi
10
+
11
+ Parameters:
12
+ -----------
13
+ Data_Terbaru : DataFrame
14
+ DataFrame input dengan kolom yang diperlukan
15
+ min_minutes_threshold : int
16
+ Minimum jumlah menit per jam untuk proses disagregasi (default: 50)
17
+
18
+ Returns:
19
+ --------
20
+ df_disaggregated_final : DataFrame
21
+ DataFrame hasil disagregasi
22
+ validation_report : dict
23
+ Laporan validasi dan statistik
24
+ """
25
+
26
+ print("="*60)
27
+ print("PIPELINE DISAGREGASI DATA DENGAN VALIDASI JAM PRODUKSI")
28
+ print("="*60)
29
+
30
+ # Simpan daftar kolom asli untuk memastikan semuanya dipertahankan
31
+ original_cols = list(Data_Terbaru.columns)
32
+
33
+ # ========================================
34
+ # TAHAP 1: VALIDASI JAM PRODUKSI
35
+ # ========================================
36
+ print("\n--- TAHAP 1: Validasi Jam Produksi ---")
37
+
38
+ # Hitung jumlah data (menit) per jam
39
+ jumlah_data_per_jam = Data_Terbaru.groupby('fixed_rounded_time').size()
40
+ jumlah_data_per_jam_df = jumlah_data_per_jam.reset_index(name='Jumlah_Menit')
41
+
42
+ # Identifikasi jam yang valid (>= 50 menit)
43
+ jam_valid = jumlah_data_per_jam_df[jumlah_data_per_jam_df['Jumlah_Menit'] >= min_minutes_threshold]['fixed_rounded_time'].tolist()
44
+ jam_tidak_valid = jumlah_data_per_jam_df[jumlah_data_per_jam_df['Jumlah_Menit'] < min_minutes_threshold]
45
+
46
+ print(f"Total jam dalam dataset: {len(jumlah_data_per_jam_df)}")
47
+ print(f"Jam valid (>= {min_minutes_threshold} menit): {len(jam_valid)}")
48
+ print(f"Jam tidak valid (< {min_minutes_threshold} menit): {len(jam_tidak_valid)}")
49
+
50
+ # Tampilkan detail jam yang tidak valid
51
+ if len(jam_tidak_valid) > 0:
52
+ print("\nDetail jam yang TIDAK akan diproses:")
53
+ print(jam_tidak_valid.sort_values('Jumlah_Menit')[['fixed_rounded_time', 'Jumlah_Menit']].to_string(index=False))
54
+ else:
55
+ print("\nSemua jam valid untuk diproses!")
56
+
57
+ # Filter data hanya untuk jam yang valid
58
+ df_work = Data_Terbaru[Data_Terbaru['fixed_rounded_time'].isin(jam_valid)].copy()
59
+
60
+ if df_work.empty:
61
+ print("\n⚠️ PERINGATAN: Tidak ada data yang memenuhi kriteria validasi!")
62
+ return pd.DataFrame(), {"status": "failed", "reason": "no_valid_hours"}
63
+
64
+ print(f"\nData yang akan diproses: {len(df_work)} baris dari {len(Data_Terbaru)} baris total")
65
+ print(f"Persentase data yang diproses: {len(df_work)/len(Data_Terbaru)*100:.2f}%")
66
+
67
+ # ========================================
68
+ # TAHAP 2: PERHITUNGAN BOBOT INDIKATOR
69
+ # ========================================
70
+ print("\n--- TAHAP 2: Perhitungan Bobot Indikator ---")
71
+
72
+ indicator_cols = [
73
+ "D101330TT", "D102260TIC_CV", "D102265TIC_PV",
74
+ "D102265TIC_CV", "D102266TIC", "D101264FTSCL"
75
+ ]
76
+
77
+ # Handle missing values
78
+ df_work[indicator_cols] = df_work[indicator_cols].fillna(0)
79
+
80
+ # Normalisasi menggunakan MinMaxScaler
81
+ scaled_cols = [col + '_scaled' for col in indicator_cols]
82
+ scaler = MinMaxScaler()
83
+ df_work[scaled_cols] = scaler.fit_transform(df_work[indicator_cols])
84
+
85
+ # Hitung bobot total per baris
86
+ df_work['w_m'] = df_work[scaled_cols].sum(axis=1)
87
+
88
+ print(f"Indikator yang digunakan: {', '.join(indicator_cols)}")
89
+ print(f"Range bobot (w_m): Min={df_work['w_m'].min():.4f}, Max={df_work['w_m'].max():.4f}")
90
+
91
+ # ========================================
92
+ # TAHAP 3: APLIKASI ALGORITMA DISAGREGASI
93
+ # ========================================
94
+ print("\n--- TAHAP 3: Aplikasi Algoritma Disagregasi ---")
95
+
96
+ # Hitung total bobot per jam
97
+ total_weight_per_block = df_work.groupby('fixed_rounded_time')['w_m'].transform('sum')
98
+ total_weight_per_block[total_weight_per_block == 0] = 1
99
+
100
+ # Hitung proporsi share
101
+ df_work['proportional_share'] = df_work['w_m'] / total_weight_per_block
102
+
103
+ # Ambil nilai GAS_MMBTU total per jam
104
+ gas_total_per_block = df_work.groupby('fixed_rounded_time')['GAS_MMBTU'].transform('first')
105
+
106
+ # Hitung GAS_MMBTU yang sudah didisagregasi
107
+ df_work['GAS_MMBTU_Disaggregated'] = gas_total_per_block * df_work['proportional_share']
108
+
109
+ # Handle kasus khusus: jam dengan total bobot = 0
110
+ zero_weight_blocks = df_work.groupby('fixed_rounded_time')['w_m'].sum()
111
+ zero_weight_blocks = zero_weight_blocks[zero_weight_blocks == 0].index
112
+
113
+ if not zero_weight_blocks.empty:
114
+ print(f"⚠️ Ditemukan {len(zero_weight_blocks)} jam dengan total bobot = 0")
115
+ print(" Menggunakan distribusi merata untuk jam tersebut")
116
+
117
+ for block_time in zero_weight_blocks:
118
+ mask = df_work['fixed_rounded_time'] == block_time
119
+ gas_value = df_work.loc[mask, 'GAS_MMBTU'].iloc[0]
120
+ count_in_block = mask.sum()
121
+ df_work.loc[mask, 'GAS_MMBTU_Disaggregated'] = gas_value / count_in_block if count_in_block > 0 else 0
122
+
123
+ print("✓ Disagregasi selesai dilakukan")
124
+
125
+ # ========================================
126
+ # TAHAP 4: VALIDASI HASIL DISAGREGASI
127
+ # ========================================
128
+ print("\n--- TAHAP 4: Validasi Hasil Disagregasi ---")
129
+
130
+ # Bandingkan total per jam
131
+ original_total = df_work.groupby('fixed_rounded_time')['GAS_MMBTU'].first()
132
+ disaggregated_total = df_work.groupby('fixed_rounded_time')['GAS_MMBTU_Disaggregated'].sum()
133
+
134
+ validation_df = pd.DataFrame({
135
+ 'Original_Total': original_total,
136
+ 'Disaggregated_Total': disaggregated_total,
137
+ 'Difference': original_total - disaggregated_total
138
+ })
139
+
140
+ # Statistik validasi
141
+ max_diff = validation_df['Difference'].abs().max()
142
+ total_diff = validation_df['Difference'].abs().sum()
143
+
144
+ print(f"Jumlah jam yang divalidasi: {len(validation_df)}")
145
+ print(f"Total selisih absolut: {total_diff:.10f}")
146
+ print(f"Selisih maksimum: {max_diff:.10f}")
147
+
148
+ if total_diff < 1e-8:
149
+ print("✓ Validasi BERHASIL: Total gas terjaga dengan sempurna")
150
+ else:
151
+ print("⚠️ PERINGATAN: Terdapat selisih kecil dalam disagregasi")
152
+
153
+ # Tampilkan 5 jam dengan selisih terbesar
154
+ if max_diff > 1e-10:
155
+ print("\n5 Jam dengan selisih terbesar:")
156
+ top_diff = validation_df.nlargest(5, 'Difference')[['Original_Total', 'Disaggregated_Total', 'Difference']]
157
+ print(top_diff.to_string())
158
+
159
+ # ========================================
160
+ # TAHAP 5: PERSIAPAN OUTPUT FINAL
161
+ # ========================================
162
+ print("\n--- TAHAP 5: Persiapan Output Final ---")
163
+
164
+ # --- PERUBAHAN KUNCI ---
165
+ # Daripada menghapus kolom, kita secara eksplisit memilih semua kolom asli
166
+ # ditambah kolom hasil disagregasi yang baru. Ini memastikan semua
167
+ # kolom lain yang tidak terpakai tetap ada di hasil akhir.
168
+
169
+ # Tentukan daftar kolom final
170
+ final_cols = original_cols + ['GAS_MMBTU_Disaggregated']
171
+ # Hapus duplikat jika 'GAS_MMBTU_Disaggregated' sudah ada
172
+ final_cols = list(dict.fromkeys(final_cols))
173
+
174
+ # Buat dataframe final dengan memilih kolom yang relevan dari df_work
175
+ df_disaggregated_final = df_work[final_cols]
176
+
177
+ print(f"Dimensi data final: {df_disaggregated_final.shape}")
178
+ print("Kolom-kolom asli yang tidak digunakan dalam proses telah berhasil dipertahankan.")
179
+
180
+ # ========================================
181
+ # LAPORAN RINGKASAN
182
+ # ========================================
183
+ print("\n" + "="*60)
184
+ print("RINGKASAN PIPELINE")
185
+ print("="*60)
186
+
187
+ validation_report = {
188
+ "total_jam_input": len(jumlah_data_per_jam_df),
189
+ "jam_valid": len(jam_valid),
190
+ "jam_tidak_valid": len(jam_tidak_valid),
191
+ "total_baris_input": len(Data_Terbaru),
192
+ "total_baris_diproses": len(df_work),
193
+ "persentase_data_diproses": len(df_work)/len(Data_Terbaru)*100 if len(Data_Terbaru) > 0 else 0,
194
+ "total_selisih_disagregasi": total_diff,
195
+ "jam_dengan_bobot_nol": len(zero_weight_blocks) if not zero_weight_blocks.empty else 0,
196
+ "validation_df": validation_df,
197
+ "jam_tidak_valid_detail": jam_tidak_valid,
198
+ "weight_min": float(df_work["w_m"].min()),
199
+ "weight_max": float(df_work["w_m"].max())
200
+ }
201
+
202
+ print(f"• Total jam input: {validation_report['total_jam_input']}")
203
+ print(f"• Jam valid untuk disagregasi: {validation_report['jam_valid']}")
204
+ print(f"• Jam tidak valid (skip): {validation_report['jam_tidak_valid']}")
205
+ print(f"• Total baris yang diproses: {validation_report['total_baris_diproses']:,} dari {validation_report['total_baris_input']:,}")
206
+ print(f"• Persentase data diproses: {validation_report['persentase_data_diproses']:.2f}%")
207
+ print(f"• Akurasi disagregasi (total selisih): {validation_report['total_selisih_disagregasi']:.10f}")
208
+
209
+ print("\n✅ Pipeline selesai dijalankan!")
210
+
211
+ return df_disaggregated_final, validation_report
212
+
213
+
214
+ # ========================================
215
+ # FUNGSI UTILITAS TAMBAHAN
216
+ # ========================================
217
+
218
+ def analyze_disagregation_results(df_result, validation_report):
219
+ """
220
+ Analisis mendalam hasil disagregasi
221
+ """
222
+ print("\n" + "="*60)
223
+ print("ANALISIS HASIL DISAGREGASI")
224
+ print("="*60)
225
+
226
+ # Cek jika df_result kosong
227
+ if df_result.empty:
228
+ print("Tidak ada hasil untuk dianalisis.")
229
+ return None
230
+
231
+ # Statistik GAS_MMBTU sebelum dan sesudah
232
+ print("\n📊 Statistik GAS_MMBTU_Disaggregated:")
233
+ print(df_result['GAS_MMBTU_Disaggregated'].describe())
234
+
235
+ # Distribusi per jam
236
+ print("\n📊 Distribusi data per jam (top 10):")
237
+ hourly_stats = df_result.groupby('fixed_rounded_time').agg({
238
+ 'GAS_MMBTU_Disaggregated': ['sum', 'mean', 'std', 'count']
239
+ }).round(4)
240
+ print(hourly_stats.head(10))
241
+
242
+ # Jam dengan nilai ekstrem
243
+ print("\n⚠️ Jam dengan total GAS tertinggi:")
244
+ top_hours = df_result.groupby('fixed_rounded_time')['GAS_MMBTU_Disaggregated'].sum().nlargest(5)
245
+ print(top_hours)
246
+
247
+ print("\n⚠️ Jam dengan total GAS terendah:")
248
+ bottom_hours = df_result.groupby('fixed_rounded_time')['GAS_MMBTU_Disaggregated'].sum().nsmallest(5)
249
+ print(bottom_hours)
250
+
251
+ return hourly_stats
252
+
253
+ def run_disagregasi_pipeline(file_obj, min_minutes_threshold=50):
254
+ """
255
+ Wrapper yang dipakai dashboard Streamlit.
256
+ - file_obj bisa berupa path string atau UploadedFile dari Streamlit.
257
+ - Mengembalikan: (df_hasil_disagregasi, validation_report_diperluas)
258
+ """
259
+ # Baca CSV dari file_obj (path string atau UploadedFile)
260
+ if isinstance(file_obj, str):
261
+ df_input = pd.read_csv(file_obj)
262
+ else:
263
+ # Asumsikan ini adalah UploadedFile dari st.file_uploader
264
+ df_input = pd.read_csv(file_obj)
265
+
266
+ # Jalankan pipeline utama
267
+ df_hasil, laporan = pipeline_disagregasi(
268
+ Data_Terbaru=df_input,
269
+ min_minutes_threshold=min_minutes_threshold
270
+ )
271
+
272
+ # Jika hasil kosong → langsung kembalikan
273
+ if df_hasil.empty:
274
+ return df_hasil, laporan
275
+
276
+ # Tambah statistik GAS_MMBTU_Disaggregated untuk kebutuhan dashboard
277
+ if "GAS_MMBTU_Disaggregated" in df_hasil.columns:
278
+ stats = df_hasil["GAS_MMBTU_Disaggregated"].describe()
279
+ laporan["gas_disagg_stats"] = stats
280
+
281
+ # Total per jam
282
+ hourly_total = (
283
+ df_hasil
284
+ .groupby("fixed_rounded_time")["GAS_MMBTU_Disaggregated"]
285
+ .sum()
286
+ )
287
+
288
+ # 10 jam dengan konsumsi gas tertinggi
289
+ laporan["top_hours"] = hourly_total.sort_values(ascending=False).head(10)
290
+
291
+ # Jam dengan total GAS = 0 (kemungkinan shutdown)
292
+ laporan["zero_hours"] = hourly_total[hourly_total == 0.0]
293
+
294
+ return df_hasil, laporan
295
+
296
+
297
+
298
+ # ========================================
299
+ # CARA PENGGUNAAN
300
+ # ========================================
301
+
302
+ # Ganti dengan path file Anda yang sebenarnya
303
+ if __name__ == "__main__":
304
+ # Jalankan pipeline utama (mode CLI / testing)
305
+ df_hasil, laporan = pipeline_disagregasi(
306
+ Data_Terbaru=pd.read_csv("/work/Dataset 18 Mar - 19 Jun/Processed Data Pipeline EDA_10_17_2025.csv"),
307
+ min_minutes_threshold=50
308
+ )
309
+
310
+ # Analisis hasil (opsional)
311
+ stats = analyze_disagregation_results(df_hasil, laporan)
312
+
313
+ # Simpan hasil ke CSV (opsional)
314
+ df_hasil = df_hasil[~df_hasil['Product'].isin(['CIP', 'CIP CHAMBER'])]
315
+ df_hasil.to_csv('/work/Dataset 18 Mar - 19 Jun/disagregasi_data_spraydryer_terbaru_10_17_2025.csv', index=False)
316
+
317
+ # Akses detail validasi
318
+ print(laporan['validation_df'])
319
+ print(laporan['jam_tidak_valid_detail'])
320
+
321
+
Hasil_Inverse_Model.csv ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Converged,D101264FTSCL,D101330TT,D102260TIC_CV,D102265TIC_CV,D102265TIC_PV,D102266TIC,Error,Error_Pct,Iterations,Level,Objective_Value,Predicted_MMBTU,Product,Soft_Violations,Target_MMBTU,prediction
2
+ ,3955.77,93.24,53.44,31.11,174.33,18.5,0.000392,,,,,,CKR BASE,,0.2667,0.267092
3
+ ,3952.31,93.32,60.19,31.64,174.19,18.53,0.000064,,,,,,CKR BASE,,0.27,0.269936
4
+ ,3970.4,93.0,60.64,32.82,174.33,18.53,0.003857,,,,,,CKR BASE,,0.2733,0.277157
5
+ ,4290.9,94.53,62.77,47.4,180.93,16.04,0.017178,,,,,,CKR BASE,,0.2767,0.293878
6
+ ,4431.08,94.49,60.99,46.49,181.08,16.44,0.003841,,,,,,CKR BASE,,0.28,0.283841
7
+ ,4457.67,94.32,60.39,46.73,181.46,16.4,0.000053,,,,,,CKR BASE,,0.2833,0.283353
8
+ ,4476.58,94.4,61.71,44.93,180.09,16.38,0.000087,,,,,,CKR BASE,,0.2867,0.286613
9
+ ,4221.75,94.28,60.94,45.09,185.49,16.22,0.000004,,,,,,CKR BASE,,0.29,0.290004
10
+ ,4156.33,95.47,65.84,45.4,184.37,16.13,0.000005,,,,,,CKR BASE,,0.2933,0.293295
11
+ ,4314.51,94.52,64.5,46.42,185.9,16.27,0.000061,,,,,,CKR BASE,,0.2967,0.296761
12
+ ,4195.16,94.31,67.84,52.73,180.15,16.11,0.000067,,,,,,CKR BASE,,0.3,0.299933
13
+ ,4140.37,97.51,67.45,51.3,187.75,17.61,0.000041 (0.01%),,,,,,CKR BASE,,0.3033,0.303292
14
+ ,4360.28,96.65,66.48,50.23,187.79,17.35,0.000072 (0.02%),,,,,,CKR BASE,,0.3067,0.306738
15
+ ,4408.62,93.81,62.23,59.33,189.59,17.48,0.000096 (0.03%),,,,,,CKR BASE,,0.31,0.310096
16
+ ,4468.98,97.68,59.9,54.51,194.32,19.25,0.000009 (0.00%),,,,,,CKR BASE,,0.3133,0.313342
17
+ ,4481.76,92.15,67.27,55.53,193.24,19.06,0.000001 (0.00%),,,,,,CKR BASE,,0.3333,0.333332
18
+ ,4833.92,93.0,64.14,54.36,190.49,19.39,0.000010 (0.00%),,,,,,CKR BASE,,0.3367,0.33671
19
+ ,4675.53,93.3,66.21,53.31,188.33,18.82,0.000035 (0.01%),,,,,,CKR BASE,,0.34,0.340035
20
+ ,4587.25,92.91,66.09,53.97,189.73,18.78,0.000035 (0.01%),,,,,,CKR BASE,,0.3467,0.346743
21
+ ,4589.9,92.78,65.33,55.22,189.54,19.54,0.000002 (0.00%),,,,,,CKR BASE,,0.3433,0.343332
22
+ ,4615.33,92.4,65.76,54.24,190.33,18.83,0.004873 (1.39%),,,,,,CKR BASE,,0.35,0.345127
23
+ ,4195.164127,94.31367329,67.83752807,52.72994004,180.1513437,16.11178086,6.67453E-05,,,,,,CKR BASE,,0.3,0.299933255
24
+ ,4345.077523,95.73564815,61.94377257,47.97807791,178.5670734,15.94367712,0.015211162,,,,,,CKR BASE,,0.278,0.293211162
25
+ ,3714.134992,94.12849457,50.19545981,24.45841829,176.6102062,17.71849347,0.041373372,,,,,,CKR BASE,,0.25,0.291373372
26
+ ,4329.033348,96.36362196,61.44665789,47.72381537,177.4725578,16.00013482,0.017786913,,,,,,CKR BASE,,0.276,0.293786913
27
+ ,4221.747978,94.27552004,60.94445271,45.09113873,185.4924565,16.22248105,4.49181E-06,,,,,,CKR BASE,,0.29,0.290004492
28
+ ,4492.864459,94.21253004,61.40316114,46.06813204,180.9288264,16.4082628,6.45216E-05,,,,,,CKR BASE,,0.287,0.286935478
29
+ ,4474.17151,94.69035643,62.42412584,46.69251985,185.2116553,16.41205744,5.86555E-05,,,,,,CKR BASE,,0.289,0.289058656
30
+ ,3941.755261,93.92589611,44.02782054,21.35222818,174.9003476,18.31076668,0.034599949,,,,,,CKR BASE,,0.23,0.264599949
31
+ ,3935.359007,93.82213382,44.06991256,21.19780621,174.4379727,18.34101723,0.030370471,,,,,,CKR BASE,,0.234,0.264370471
32
+ ,3934.465917,94.2948149,44.12207343,21.16965631,174.5110539,18.33528665,0.054408622,,,,,,CKR BASE,,0.212,0.266408622
33
+ ,4496.939745,94.48287836,60.40561786,46.98256551,179.586896,16.3735464,7.61817E-05,,,,,,CKR BASE,,0.2865,0.286576182
34
+ ,3939.045858,94.37142197,44.09776324,21.14782051,174.5390788,18.32985818,0.056263753,,,,,,CKR BASE,,0.21,0.266263753
35
+ ,3960.72,96.25,50.99,18.7,170.17,18.36,0.000049,,,,,,CMR BASE,,0.2667,0.266749
36
+ ,4022.34,97.5,50.49,18.54,176.04,16.45,0.000016,,,,,,CMR BASE,,0.27,0.269984
37
+ ,4112.43,98.52,55.82,21.49,169.98,16.67,0.000025,,,,,,CMR BASE,,0.2733,0.273275
38
+ ,3995.18,96.52,52.26,36.78,172.76,18.57,0.00008,,,,,,CMR BASE,,0.2767,0.27662
39
+ ,3920.17,95.81,55.88,38.57,172.86,18.57,0.000011,,,,,,CMR BASE,,0.28,0.279989
40
+ ,4212.96,98.78,53.91,24.09,173.13,19.01,0.000019,,,,,,CMR BASE,,0.2833,0.283281
41
+ ,4018.11,97.85,61.8,28.33,178.51,16.55,0.000051,,,,,,CMR BASE,,0.2867,0.286751
42
+ ,4234.78,98.53,61.17,23.86,175.44,16.48,0.000078,,,,,,CMR BASE,,0.29,0.290078
43
+ ,4074.51,98.44,63.57,26.17,183.23,18.89,0.000091,,,,,,CMR BASE,,0.2933,0.293391
44
+ ,4280.19,96.32,61.46,24.87,185.23,19.01,0.000091,,,,,,CMR BASE,,0.2967,0.296609
45
+ ,4275.9,93.46,60.26,29.42,184.92,18.92,0.000063,,,,,,CMR BASE,,0.3,0.300063
46
+ ,4460.72,94.73,61.74,31.95,184.81,17.66,0.000002,,,,,,CMR BASE,,0.3033,0.303298
47
+ ,4423.21,92.59,61.8,29.02,181.42,18.31,0.00004,,,,,,CMR BASE,,0.3067,0.30666
48
+ ,4105.81,95.59,56.25,30.73,177.18,17.26,0.000075,,,,,,CMR BASE,,0.31,0.310075
49
+ ,4511.25,98.45,53.97,27.77,185.56,17.09,0.000093,,,,,,CMR BASE,,0.3133,0.313393
50
+ ,4545.64,95.05,56.61,34.31,178.17,17.13,0.000099,,,,,,CMR BASE,,0.3167,0.316799
51
+ ,4387.78,95.76,51.89,27.58,181.26,17.22,0.00004,,,,,,CMR BASE,,0.32,0.32004
52
+ ,4247.24,95.56,50.73,36.75,182.35,17.12,0.000026,,,,,,CMR BASE,,0.3233,0.323326
53
+ ,4320.47,94.24,53.0,35.12,181.15,17.63,0.000008,,,,,,CMR BASE,,0.3267,0.326692
54
+ ,4280.32,97.17,51.85,37.78,180.91,17.3,0.000049,,,,,,CMR BASE,,0.33,0.330049
55
+ ,4292.95,92.68,54.28,34.86,181.28,17.64,0.002914,,,,,,CMR BASE,,0.3333,0.330386
56
+ ,4323.88,92.73,54.1,31.21,181.95,17.63,0.008765,,,,,,CMR BASE,,0.3367,0.327935
57
+ ,4371.36,93.22,54.18,31.47,182.42,17.58,0.012532,,,,,,CMR BASE,,0.34,0.327468
58
+ ,4404.97,92.77,54.08,31.66,183.11,17.58,0.01776,,,,,,CMR BASE,,0.3433,0.32554
59
+ ,4530.4,95.43,54.19,31.63,183.68,17.53,0.022474,,,,,,CMR BASE,,0.3467,0.324226
60
+ ,4511.7,95.23,54.68,31.56,184.31,17.54,0.028114,,,,,,CMR BASE,,0.35,0.321886
61
+ ,4232.09,92.9,55.2,40.6,173.35,17.42,0.012035,,,,,,CKP BASE,,0.2667,0.278735
62
+ ,4225.85,95.07,56.02,39.07,181.37,16.01,0.006521,,,,,,CKP BASE,,0.27,0.276521
63
+ ,4314.5,95.08,56.01,39.72,181.97,16.02,0.001893,,,,,,CKP BASE,,0.2733,0.275193
64
+ ,4320.76,94.88,63.23,41.57,173.55,17.38,0.000048,,,,,,CKP BASE,,0.2767,0.276652
65
+ ,4275.47,98.23,58.08,45.22,181.94,15.87,0.000073,,,,,,CKP BASE,,0.28,0.280073
66
+ ,4325.12,98.2,55.74,43.9,183.04,16.17,0.000049,,,,,,CKP BASE,,0.2833,0.283349
67
+ ,4336.85,94.77,57.4,39.75,177.04,16.14,0.000028,,,,,,CKP BASE,,0.2867,0.286672
68
+ ,4223.68,96.0,62.08,40.75,182.92,16.27,0.000075,,,,,,CKP BASE,,0.29,0.289925
69
+ ,4102.85,97.25,65.7,40.94,184.35,17.51,0.000013,,,,,,CKP BASE,,0.2933,0.293313
70
+ ,4180.6,93.34,61.73,40.57,177.51,17.58,0.000077,,,,,,CKP BASE,,0.2967,0.296623
71
+ ,4184.98,95.03,58.75,40.35,186.57,17.36,0.000099,,,,,,CKP BASE,,0.3,0.299901
72
+ ,4445.89,94.24,63.47,47.46,181.55,18.3,0.000063,,,,,,CKP BASE,,0.3033,0.303237
73
+ ,4277.34,93.85,64.3,43.57,182.0,17.39,0.000008,,,,,,CKP BASE,,0.3067,0.306708
74
+ ,4453.7,98.33,63.97,38.56,181.43,18.48,0.000086,,,,,,CKP BASE,,0.31,0.310086
75
+ ,4773.75,95.2,63.56,39.53,180.62,18.81,0.000038,,,,,,CKP BASE,,0.3133,0.313262
76
+ ,4535.66,97.4,62.11,45.25,179.82,17.31,0.000039,,,,,,CKP BASE,,0.3167,0.316739
77
+ ,4584.37,93.98,64.3,38.7,182.31,18.53,0.000034,,,,,,CKP BASE,,0.32,0.320034
78
+ ,4546.56,93.87,57.79,45.99,188.63,17.82,0.00006,,,,,,CKP BASE,,0.3233,0.32324
79
+ ,4870.81,97.41,55.3,37.72,182.27,18.6,0.000093,,,,,,CKP BASE,,0.3267,0.326607
80
+ ,4867.39,93.27,58.38,38.59,186.96,17.98,0.000099,,,,,,CKP BASE,,0.33,0.329901
81
+ ,4541.39,94.08,57.45,41.44,187.13,18.6,0.00005,,,,,,CKP BASE,,0.3333,0.33335
82
+ ,4410.88,93.19,59.46,44.06,183.72,18.67,0.000094,,,,,,CKP BASE,,0.3367,0.336794
83
+ ,4425.0,95.6,58.1,42.62,186.65,19.0,0.000053,,,,,,CKP BASE,,0.34,0.340053
84
+ ,4430.36,92.7,66.4,42.52,193.34,18.99,0.000018,,,,,,CKP BASE,,0.3433,0.343318
85
+ ,4532.0,93.35,59.67,39.58,186.57,18.67,0.000086,,,,,,CKP BASE,,0.3467,0.346614
86
+ ,4809.65,92.92,65.24,46.53,188.16,18.94,0.000047,,,,,,CKP BASE,,0.35,0.349953
87
+ ,4218.146647,95.08684851,56.00051072,39.59284696,181.3237826,16.0520592,0.007520789,,,,,,CKP BASE,,0.269,0.276520789
88
+ ,3795.55,98.05,55.61,28.68,181.44,18.16,0.000083,,,,,,MORIGRO BASE,,0.2667,0.266617
89
+ ,3694.83,92.72,56.65,29.32,181.89,18.19,0.000038,,,,,,MORIGRO BASE,,0.27,0.269962
90
+ ,3784.48,98.26,55.43,32.16,181.93,18.18,0.000071,,,,,,MORIGRO BASE,,0.2733,0.273229
91
+ ,3729.28,94.49,56.11,32.26,182.14,18.18,0.000015,,,,,,MORIGRO BASE,,0.2767,0.276685
92
+ ,3694.71,94.5,56.59,30.27,184.71,18.2,0.00003,,,,,,MORIGRO BASE,,0.28,0.28003
93
+ ,3741.95,94.61,63.73,39.42,182.76,18.28,0.000001,,,,,,MORIGRO BASE,,0.2833,0.283299
94
+ ,3770.44,92.65,59.31,31.7,183.89,18.16,0.00001,,,,,,MORIGRO BASE,,0.2867,0.28671
95
+ ,3860.63,94.59,63.26,38.77,183.82,18.29,0.000075,,,,,,MORIGRO BASE,,0.29,0.289925
96
+ ,3838.07,92.09,63.31,38.71,185.39,18.29,0.000069,,,,,,MORIGRO BASE,,0.2933,0.293231
97
+ ,3943.6,94.41,55.48,38.91,182.92,17.89,0.000009,,,,,,MORIGRO BASE,,0.2967,0.296709
98
+ ,3754.46,94.68,60.21,41.18,185.66,18.26,0.000057,,,,,,MORIGRO BASE,,0.3,0.300057
99
+ ,4006.48,93.89,60.87,43.27,188.25,18.27,0.00003,,,,,,MORIGRO BASE,,0.3033,0.30333
100
+ ,3887.14,92.38,59.76,42.61,185.05,18.28,0.000068,,,,,,MORIGRO BASE,,0.3067,0.306632
101
+ ,3803.81,92.58,57.93,44.17,187.97,17.76,0.000048,,,,,,MORIGRO BASE,,0.31,0.310048
102
+ ,3776.87,96.95,63.09,44.77,187.02,18.25,0.000063,,,,,,MORIGRO BASE,,0.3133,0.313237
103
+ ,3814.03,96.55,58.76,35.79,186.12,18.23,0.000011,,,,,,MORIGRO BASE,,0.3167,0.316711
104
+ ,3824.88,94.02,57.86,41.7,188.3,17.58,0.000003,,,,,,MORIGRO BASE,,0.32,0.320003
105
+ ,3864.91,94.68,60.99,44.18,188.13,17.39,0.000034,,,,,,MORIGRO BASE,,0.3233,0.323334
106
+ ,3860.49,97.0,61.18,44.95,186.93,18.15,0.000038,,,,,,MORIGRO BASE,,0.3267,0.326662
107
+ ,4014.13,92.26,61.19,43.86,188.73,17.72,0.000002,,,,,,MORIGRO BASE,,0.33,0.329998
108
+ ,4008.89,94.22,64.65,43.83,186.2,16.94,0.000038,,,,,,MORIGRO BASE,,0.3333,0.333262
109
+ ,4061.17,93.44,61.93,45.06,187.11,17.9,0.000015,,,,,,MORIGRO BASE,,0.3367,0.336715
110
+ ,3849.15,94.57,62.98,38.43,188.74,17.77,0.000036,,,,,,MORIGRO BASE,,0.34,0.339964
111
+ ,4053.06,93.45,58.44,40.78,186.31,17.88,0.000059,,,,,,MORIGRO BASE,,0.3433,0.343241
112
+ ,4049.82,93.53,60.48,41.4,186.96,17.48,0.000028,,,,,,MORIGRO BASE,,0.3467,0.346728
113
+ ,3942.5,93.26,60.25,44.66,186.89,17.59,0.000084,,,,,,MORIGRO BASE,,0.35,0.349916
114
+ ,3662.153746,92.87885955,56.44008445,37.16931959,182.1015547,18.23756242,1.50828E-05,,,,,,MORIGRO BASE,,0.278,0.277984917
115
+ ,3583.481721,94.61536745,47.72407145,33.93406928,184.1877979,18.15319544,9.45765E-05,,,,,,MORIGRO BASE,,0.22,0.220094576
116
+ ,4004.1,95.82,53.85,17.76,173.9,18.84,0.000001,,,,,,BMR BASE,,0.2667,0.266701
117
+ ,3889.04,92.86,56.08,16.61,176.49,18.39,0.000064,,,,,,BMR BASE,,0.27,0.270064
118
+ ,3977.01,94.89,50.45,17.26,172.84,17.88,0.000028,,,,,,BMR BASE,,0.2733,0.273328
119
+ ,3975.74,96.97,58.0,20.65,176.81,17.91,0.000046,,,,,,BMR BASE,,0.2767,0.276654
120
+ ,4118.13,97.06,59.87,21.46,179.1,17.95,0.000034,,,,,,BMR BASE,,0.28,0.279966
121
+ ,4036.57,94.61,57.52,22.4,175.41,17.96,0.000047,,,,,,BMR BASE,,0.2833,0.283253
122
+ ,3957.07,94.96,59.03,25.83,180.33,18.06,0.000019,,,,,,BMR BASE,,0.2867,0.286719
123
+ ,4012.08,95.13,55.6,23.87,173.89,18.25,0.000079,,,,,,BMR BASE,,0.29,0.290079
124
+ ,4006.01,97.24,59.59,26.92,182.22,18.13,0.000009,,,,,,BMR BASE,,0.2933,0.293291
125
+ ,4095.45,94.26,57.81,22.97,179.89,18.12,0.000078,,,,,,BMR BASE,,0.2967,0.296778
126
+ ,4118.56,95.79,53.04,24.09,182.91,17.94,0.000025,,,,,,BMR BASE,,0.3,0.299975
127
+ ,4173.75,93.33,46.73,25.32,180.23,17.22,0.000026,,,,,,BMR BASE,,0.3033,0.303326
128
+ ,4261.18,93.59,48.9,25.45,179.85,17.07,0.000032,,,,,,BMR BASE,,0.3067,0.306668
129
+ ,3859.52,94.76,51.81,23.39,181.66,17.29,0.000027,,,,,,BMR BASE,,0.31,0.310027
130
+ ,3980.3,94.28,51.82,24.51,180.67,17.15,0.000088,,,,,,BMR BASE,,0.3133,0.313212
131
+ ,4016.03,93.24,55.74,29.26,186.03,18.0,0.000033,,,,,,BMR BASE,,0.3167,0.316667
132
+ ,4130.35,93.03,52.02,28.41,180.54,17.21,0.000099,,,,,,BMR BASE,,0.32,0.319901
133
+ ,4060.71,95.76,53.21,31.61,190.97,17.24,0.000055,,,,,,BMR BASE,,0.3233,0.323355
134
+ ,4100.03,95.47,59.71,28.84,181.87,17.08,0.000005,,,,,,BMR BASE,,0.3267,0.326695
135
+ ,4076.16,98.89,58.05,26.81,185.79,17.13,0.000095,,,,,,BMR BASE,,0.33,0.329905
136
+ ,4218.2,94.07,53.28,29.11,187.83,17.04,0.00003,,,,,,BMR BASE,,0.3333,0.33333
137
+ ,4083.73,96.28,52.63,27.47,189.31,16.92,0.000009,,,,,,BMR BASE,,0.3367,0.336709
138
+ ,4131.45,92.68,52.91,29.2,187.46,17.19,0.000004,,,,,,BMR BASE,,0.34,0.339996
139
+ ,4273.64,92.39,52.52,32.09,188.33,17.57,0.000054,,,,,,BMR BASE,,0.3433,0.343354
140
+ ,4173.31,94.49,53.5,27.82,191.52,16.45,0.000034,,,,,,BMR BASE,,0.3467,0.346734
141
+ ,4037.2,97.81,57.66,30.44,186.81,17.41,0.000089,,,,,,BMR BASE,,0.35,0.350089
142
+ 0.0,3954.9311079077775,94.45976055285084,48.6254940047852,19.32750786639565,172.50786350143548,18.96226889703589,5.821013450624246e-05,0.0216394552067815,100.0,menengah,0.0,0.2689417898654938,BMR BASE,,0.269,
143
+ 1.0,4208.694512984114,92.90771191305322,56.013646682243845,28.699410878647587,190.6548727013951,16.751303269434544,1.3470649717906014e-07,3.741847143862782e-05,38.0,tinggi,0.0,0.3599998652935028,BMR BASE,,0.36,
144
+ 1.0,3905.2788959322415,94.51705193020004,44.83638640133726,20.8867494655602,174.5534716840486,18.29888208092669,0.07266210317611693,36.33105158805847,100.0,rendah,0.0052797812379766,0.2726621031761169,CKR BASE,,0.2,
145
+ 1.0,3958.483778501467,93.25064970149444,53.72320503170738,31.112422878232174,174.36778855806637,18.497168825666424,0.0020924067497253285,0.789587452726539,52.0,menengah,4.378166006296113e-06,0.26709240674972534,CKR BASE,,0.265,
Inverse_Model.py ADDED
@@ -0,0 +1,440 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Inverse_Model.py
2
+ import numpy as np
3
+ import pandas as pd
4
+ import joblib
5
+ from scipy.optimize import differential_evolution
6
+
7
+ # =========================================================
8
+ # KONFIGURASI GLOBAL (TIDAK ADA STREAMLIT DI FILE INI)
9
+ # =========================================================
10
+
11
+ # List produk yang digunakan untuk inverse model dan dapat juga di-import ke Dashboard
12
+ AVAILABLE_PRODUCTS = ["BMR BASE", "CKP BASE", "CKR BASE", "CMR BASE", "MORIGRO BASE"]
13
+
14
+ # Batas global parameter (dipakai di perhitungan bounds)
15
+ PARAMS_BOUNDS = {
16
+ "D101330TT": (92, 99),
17
+ "D102260TIC_CV": (35, 80),
18
+ "D102265TIC_PV": (160, 195),
19
+ "D102265TIC_CV": (10, 70),
20
+ "D102266TIC": (15, 22),
21
+ "D101264FTSCL": (3300, 4900),
22
+ }
23
+
24
+ # Konfigurasi per-produk: GAS range, korelasi, dan BINNING_DATA
25
+ PRODUCT_CONFIG = {
26
+ "CKR BASE": {
27
+ "gas_min": 0.20,
28
+ "gas_max": 0.35,
29
+ "param_corr": {
30
+ "D101330TT": "negatif",
31
+ "D102260TIC_CV": "positif",
32
+ "D102265TIC_PV": "positif",
33
+ "D102265TIC_CV": "positif",
34
+ "D102266TIC": "netral",
35
+ "D101264FTSCL": "positif",
36
+ },
37
+ "binning": {
38
+ (0.20, 0.275): {
39
+ "D101330TT": (92.01, 95.14),
40
+ "D102260TIC_CV": (44.0, 70.0),
41
+ "D102265TIC_PV": (173.96, 193.58),
42
+ "D102265TIC_CV": (19.68, 52.23),
43
+ "D102266TIC": (17.42, 18.59),
44
+ "D101264FTSCL": (3710.76, 4690.91),
45
+ },
46
+ (0.275, 0.35): {
47
+ "D101330TT": (92.01, 98.31),
48
+ "D102260TIC_CV": (38.0, 68.0),
49
+ "D102265TIC_PV": (171.59, 194.97),
50
+ "D102265TIC_CV": (15.29, 63.62),
51
+ "D102266TIC": (15.94, 19.59),
52
+ "D101264FTSCL": (3496.96, 4888.82),
53
+ },
54
+ },
55
+ },
56
+ "BMR BASE": {
57
+ "gas_min": 0.20,
58
+ "gas_max": 0.375,
59
+ "param_corr": {
60
+ "D101330TT": "netral",
61
+ "D102260TIC_CV": "positif",
62
+ "D102265TIC_PV": "positif",
63
+ "D102265TIC_CV": "positif",
64
+ "D102266TIC": "netral",
65
+ "D101264FTSCL": "positif",
66
+ },
67
+ "binning": {
68
+ (0.20, 0.275): {
69
+ "D101330TT": (92.62, 97.05),
70
+ "D102260TIC_CV": (38, 62),
71
+ "D102265TIC_PV": (171.64, 190.05),
72
+ "D102265TIC_CV": (14.47, 24.46),
73
+ "D102266TIC": (17.01, 18.98),
74
+ "D101264FTSCL": (3633.08, 4125.52),
75
+ },
76
+ (0.275, 0.375): {
77
+ "D101330TT": (92.23, 98.96),
78
+ "D102260TIC_CV": (36.0, 60.0),
79
+ "D102265TIC_PV": (171.64, 192.53),
80
+ "D102265TIC_CV": (11.75, 33.91),
81
+ "D102266TIC": (16.16, 18.43),
82
+ "D101264FTSCL": (3535.08, 4283.65),
83
+ },
84
+ },
85
+ },
86
+ "CKP BASE": {
87
+ "gas_min": 0.18,
88
+ "gas_max": 0.375,
89
+ "param_corr": {
90
+ "D101330TT": "netral",
91
+ "D102260TIC_CV": "positif",
92
+ "D102265TIC_PV": "positif",
93
+ "D102265TIC_CV": "positif",
94
+ "D102266TIC": "netral",
95
+ "D101264FTSCL": "positif",
96
+ },
97
+ "binning": {
98
+ (0.18, 0.28): {
99
+ "D101330TT": (92.01, 98.83),
100
+ "D102260TIC_CV": (36, 68),
101
+ "D102265TIC_PV": (168.11, 194.97),
102
+ "D102265TIC_CV": (13.99, 49.36),
103
+ "D102266TIC": (15.83, 18.84),
104
+ "D101264FTSCL": (3632.62, 4890.58),
105
+ },
106
+ (0.28, 0.38): {
107
+ "D101330TT": (92.01, 99.00),
108
+ "D102260TIC_CV": (38, 68),
109
+ "D102265TIC_PV": (169.50, 194.97),
110
+ "D102265TIC_CV": (13.93, 49.36),
111
+ "D102266TIC": (15.86, 19.02),
112
+ "D101264FTSCL": (3658.91, 4890.58),
113
+ },
114
+ },
115
+ },
116
+ "CMR BASE": {
117
+ "gas_min": 0.19,
118
+ "gas_max": 0.375,
119
+ "param_corr": {
120
+ "D101330TT": "netral",
121
+ "D102260TIC_CV": "positif",
122
+ "D102265TIC_PV": "positif",
123
+ "D102265TIC_CV": "positif",
124
+ "D102266TIC": "netral",
125
+ "D101264FTSCL": "positif",
126
+ },
127
+ "binning": {
128
+ (0.19, 0.275): {
129
+ "D101264FTSCL": (3618.73, 4539.96),
130
+ "D101330TT": (92.1, 98.91),
131
+ "D102260TIC_CV": (38, 62),
132
+ "D102265TIC_CV": (15.3, 26.01),
133
+ "D102265TIC_PV": (163.14, 192.25),
134
+ "D102266TIC": (16.35, 19.55),
135
+ },
136
+ (0.275, 0.375): {
137
+ "D101264FTSCL": (3445.31, 4684.92),
138
+ "D101330TT": (92.06, 99.0),
139
+ "D102260TIC_CV": (36, 64),
140
+ "D102265TIC_CV": (14.75, 39.87),
141
+ "D102265TIC_PV": (162.09, 191.96),
142
+ "D102266TIC": (16.2, 19.55),
143
+ },
144
+ },
145
+ },
146
+ "MORIGRO BASE": {
147
+ "gas_min": 0.12,
148
+ "gas_max": 0.375,
149
+ "param_corr": {
150
+ "D101330TT": "netral",
151
+ "D102260TIC_CV": "positif",
152
+ "D102265TIC_PV": "positif",
153
+ "D102265TIC_CV": "positif",
154
+ "D102266TIC": "netral",
155
+ "D101264FTSCL": "positif",
156
+ },
157
+ "binning": {
158
+ (0.12, 0.28): {
159
+ "D101264FTSCL": (3437.81, 3922.18),
160
+ "D101330TT": (92.01, 98.78),
161
+ "D102260TIC_CV": (36, 70),
162
+ "D102265TIC_CV": (20.0, 42.95),
163
+ "D102265TIC_PV": (179.98, 194.97),
164
+ "D102266TIC": (17.3, 18.32),
165
+ },
166
+ (0.28, 0.375): {
167
+ "D101264FTSCL": (3389.88, 4072.64),
168
+ "D101330TT": (92.01, 97.27),
169
+ "D102260TIC_CV": (38, 66),
170
+ "D102265TIC_CV": (19.65, 45.87),
171
+ "D102265TIC_PV": (180.32, 189.0),
172
+ "D102266TIC": (16.91, 18.32),
173
+ },
174
+ },
175
+ },
176
+ }
177
+
178
+ # =========================================================
179
+ # FUNGSI MODEL & OPTIMISASI (BACKEND)
180
+ # =========================================================
181
+
182
+ def load_model(model_path: str):
183
+ """Load forward model XGBoost + poly_transformer dari checkpoint."""
184
+ deployment_bundle = joblib.load(model_path)
185
+ return (
186
+ deployment_bundle["model"],
187
+ deployment_bundle["poly_transformer"],
188
+ deployment_bundle["input_features"],
189
+ deployment_bundle["poly_feature_names"],
190
+ )
191
+
192
+ def predict_mmbtu(params_array, model, poly_transformer, input_features, poly_feature_names):
193
+ """Prediksi GAS_MMBTU dari array parameter."""
194
+ params_dict = dict(zip(input_features, params_array))
195
+ X = pd.DataFrame([params_dict])[input_features]
196
+ X_poly = poly_transformer.transform(X)
197
+ X_poly_df = pd.DataFrame(X_poly, columns=poly_feature_names)
198
+ return float(model.predict(X_poly_df)[0])
199
+
200
+ def get_operational_bounds(target_mmbtu: float, binning: dict):
201
+ """
202
+ Ambil bounds operasional dari BINNING_DATA terdekat.
203
+ - Jika target di dalam salah satu bin => pakai bin itu
204
+ - Jika di bawah minimum => pakai bin pertama
205
+ - Jika di atas maksimum => pakai bin terakhir
206
+ """
207
+ bins_sorted = sorted(binning.keys(), key=lambda x: x[0]) # sort by lower bound
208
+
209
+ for (lo, hi) in bins_sorted:
210
+ if lo <= target_mmbtu <= hi:
211
+ return binning[(lo, hi)]
212
+
213
+ # fallback
214
+ if target_mmbtu < bins_sorted[0][0]:
215
+ return binning[bins_sorted[0]]
216
+ else:
217
+ return binning[bins_sorted[-1]]
218
+
219
+ def calculate_bounds(target_mmbtu, input_features, product_cfg):
220
+ """
221
+ Hitung hard_bounds & soft_bounds untuk satu produk dan satu target MMBTU.
222
+ """
223
+ gas_min = product_cfg["gas_min"]
224
+ gas_max = product_cfg["gas_max"]
225
+ binning = product_cfg["binning"]
226
+ param_corr = product_cfg["param_corr"]
227
+
228
+ # Step 1: SP_target dan level
229
+ gas_range = gas_max - gas_min
230
+ sp_target = (target_mmbtu - gas_min) / gas_range
231
+ sp_target = float(np.clip(sp_target, 0, 1))
232
+ sp_inverse = 1.0 - sp_target
233
+
234
+ if sp_target < 0.33:
235
+ level = "rendah"
236
+ elif sp_target < 0.67:
237
+ level = "menengah"
238
+ else:
239
+ level = "tinggi"
240
+
241
+ # Step 2: hard bounds dari binning
242
+ operational_bounds = get_operational_bounds(target_mmbtu, binning)
243
+ hard_bounds = {}
244
+ soft_bounds = {}
245
+
246
+ # Step 3: soft bounds per parameter
247
+ for param in input_features:
248
+ keras_min, keras_max = operational_bounds[param]
249
+ keras_range = keras_max - keras_min
250
+ hard_bounds[param] = (keras_min, keras_max)
251
+
252
+ korelasi = param_corr.get(param, "netral")
253
+
254
+ # SP relevan tergantung korelasi
255
+ sp = sp_inverse if korelasi == "negatif" else sp_target
256
+
257
+ # Target ideal global dari global bounds
258
+ min_global, max_global = PARAMS_BOUNDS[param]
259
+ range_global = max_global - min_global
260
+ target_ideal_global = min_global + (sp * range_global)
261
+
262
+ # Jika target ideal global masih within bin => pakai
263
+ if keras_min <= target_ideal_global <= keras_max:
264
+ target_ideal = target_ideal_global
265
+ else:
266
+ target_ideal = keras_min + (sp * keras_range)
267
+
268
+ buffer = 0.2 * keras_range # 20% dari range
269
+
270
+ if korelasi == "netral":
271
+ ideal_min = keras_min
272
+ ideal_max = keras_max
273
+ elif level == "rendah":
274
+ if korelasi == "positif":
275
+ ideal_min = keras_min
276
+ ideal_max = target_ideal + buffer
277
+ else: # negatif
278
+ ideal_min = target_ideal - buffer
279
+ ideal_max = keras_max
280
+ elif level == "menengah":
281
+ ideal_min = target_ideal - buffer
282
+ ideal_max = target_ideal + buffer
283
+ else: # tinggi
284
+ if korelasi == "positif":
285
+ ideal_min = target_ideal - buffer
286
+ ideal_max = keras_max
287
+ else: # negatif
288
+ ideal_min = keras_min
289
+ ideal_max = target_ideal + buffer
290
+
291
+ ideal_min = max(ideal_min, keras_min)
292
+ ideal_max = min(ideal_max, keras_max)
293
+ soft_bounds[param] = (ideal_min, ideal_max)
294
+
295
+ return hard_bounds, soft_bounds, level
296
+
297
+ def objective_function(
298
+ params_array,
299
+ target_mmbtu,
300
+ model,
301
+ poly_transformer,
302
+ input_features,
303
+ poly_feature_names,
304
+ hard_bounds,
305
+ soft_bounds,
306
+ ):
307
+ """Fungsi objektif untuk Differential Evolution."""
308
+ prediction = predict_mmbtu(params_array, model, poly_transformer, input_features, poly_feature_names)
309
+ error_pred = (prediction - target_mmbtu) ** 2
310
+
311
+ total_penalty = 0.0
312
+ for i, param in enumerate(input_features):
313
+ value = params_array[i]
314
+ ideal_min, ideal_max = soft_bounds[param]
315
+ keras_min, keras_max = hard_bounds[param]
316
+ param_range = keras_max - keras_min
317
+
318
+ violation = 0.0
319
+ if value < ideal_min:
320
+ violation = ideal_min - value
321
+ elif value > ideal_max:
322
+ violation = value - ideal_max
323
+
324
+ if param_range > 0:
325
+ total_penalty += (violation / param_range)
326
+
327
+ ERROR_THRESHOLD = 1e-4
328
+ if abs(prediction - target_mmbtu) < ERROR_THRESHOLD:
329
+ return total_penalty
330
+ else:
331
+ return error_pred + total_penalty
332
+
333
+ def optimize_one_target(
334
+ target_mmbtu,
335
+ model,
336
+ poly_transformer,
337
+ input_features,
338
+ poly_feature_names,
339
+ product_cfg,
340
+ maxiter=100,
341
+ popsize=30,
342
+ ):
343
+ """Optimasi inverse model untuk satu nilai target MMBTU."""
344
+ hard_bounds, soft_bounds, level = calculate_bounds(target_mmbtu, input_features, product_cfg)
345
+ optimizer_bounds = [hard_bounds[param] for param in input_features]
346
+
347
+ def obj_wrapper(params_array):
348
+ return objective_function(
349
+ params_array,
350
+ target_mmbtu,
351
+ model,
352
+ poly_transformer,
353
+ input_features,
354
+ poly_feature_names,
355
+ hard_bounds,
356
+ soft_bounds,
357
+ )
358
+
359
+ result = differential_evolution(
360
+ func=obj_wrapper,
361
+ bounds=optimizer_bounds,
362
+ strategy="best1bin",
363
+ maxiter=maxiter,
364
+ popsize=popsize,
365
+ tol=1e-4,
366
+ mutation=(0.5, 1),
367
+ recombination=0.7,
368
+ seed=42,
369
+ polish=True,
370
+ atol=1e-6,
371
+ disp=False,
372
+ )
373
+
374
+ optimal_params = dict(zip(input_features, result.x))
375
+ final_pred = predict_mmbtu(result.x, model, poly_transformer, input_features, poly_feature_names)
376
+
377
+ violations = []
378
+ for param, value in optimal_params.items():
379
+ ideal_min, ideal_max = soft_bounds[param]
380
+ if not (ideal_min <= value <= ideal_max):
381
+ violations.append(param)
382
+
383
+ return {
384
+ "target": float(target_mmbtu),
385
+ "level": level,
386
+ "optimal_params": optimal_params,
387
+ "prediction": float(final_pred),
388
+ "error": abs(final_pred - target_mmbtu),
389
+ "error_pct": abs(final_pred - target_mmbtu) / target_mmbtu * 100.0,
390
+ "objective_value": float(result.fun),
391
+ "converged": bool(result.success),
392
+ "iterations": int(result.nit),
393
+ "soft_violations": violations,
394
+ "hard_bounds": hard_bounds,
395
+ "soft_bounds": soft_bounds,
396
+ }
397
+
398
+ def run_inverse_for_targets(model_path, product_name, targets):
399
+ """
400
+ Wrapper: load model dan jalankan optimasi untuk list target.
401
+ Dipanggil dari Dashboard.
402
+ """
403
+ product_cfg = PRODUCT_CONFIG[product_name]
404
+ model, poly_transformer, input_features, poly_feature_names = load_model(model_path)
405
+
406
+ results = []
407
+ for t in targets:
408
+ res = optimize_one_target(
409
+ target_mmbtu=t,
410
+ model=model,
411
+ poly_transformer=poly_transformer,
412
+ input_features=input_features,
413
+ poly_feature_names=poly_feature_names,
414
+ product_cfg=product_cfg,
415
+ maxiter=100,
416
+ popsize=30,
417
+ )
418
+ results.append(res)
419
+ return results
420
+
421
+ def results_to_dataframe(results, product_name):
422
+ """Convert list of result dicts menjadi DataFrame flat untuk ditampilkan / disimpan."""
423
+ rows = []
424
+ for r in results:
425
+ base = {
426
+ "Product": product_name,
427
+ "Target_MMBTU": r["target"],
428
+ "Level": r["level"],
429
+ "Predicted_MMBTU": r["prediction"],
430
+ "Error": r["error"],
431
+ "Error_Pct": r["error_pct"],
432
+ "Objective_Value": r["objective_value"],
433
+ "Converged": r["converged"],
434
+ "Iterations": r["iterations"],
435
+ "Soft_Violations": ", ".join(r["soft_violations"]) if r["soft_violations"] else "",
436
+ }
437
+ for param, value in r["optimal_params"].items():
438
+ base[param] = value
439
+ rows.append(base)
440
+ return pd.DataFrame(rows)
MonitoringModel.py ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import joblib
4
+ from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
5
+ import matplotlib.pyplot as plt
6
+ import seaborn as sns
7
+ import os
8
+
9
+ # =========================================================
10
+ # KONFIGURASI GLOBAL (tetap)
11
+ # =========================================================
12
+ DATA_FILENAME = r'C:\Dokumen\One To Many_17_10_2025\MMBTU\DASHBOARD\One To Many\disagregasi_data_spraydryer_terbaru_10_17_2025.csv'
13
+ MODEL_FOLDER = r'C:\Dokumen\One To Many_17_10_2025\MMBTU\DASHBOARD\One To Many\MODEL CHECKPOINT FOR INVERSE MODEL'
14
+ TARGET_COLUMN = 'GAS_MMBTU_Disaggregated'
15
+
16
+ PRODUCT_LIST = [
17
+ 'BMR BASE',
18
+ 'CKP BASE',
19
+ 'CKR BASE',
20
+ 'CMR BASE',
21
+ 'MORIGRO BASE'
22
+ ]
23
+
24
+ FEATURES = [
25
+ 'D101330TT',
26
+ 'D102260TIC_CV',
27
+ 'D102265TIC_PV',
28
+ 'D102265TIC_CV',
29
+ 'D102266TIC',
30
+ 'D101264FTSCL'
31
+ ]
32
+
33
+ PREDICTION_COLUMN = 'Prediksi_Gas'
34
+ MODEL_FILENAME_TEMPLATE = 'model_checkpoint_xgb_{}.joblib'
35
+
36
+
37
+ # =========================================================
38
+ # FUNGSI UTILITAS (tetap)
39
+ # =========================================================
40
+ def calculate_metrics(y_true, y_pred):
41
+ """Menghitung R2, RMSE, dan MAE."""
42
+ r2 = r2_score(y_true, y_pred)
43
+ rmse = np.sqrt(mean_squared_error(y_true, y_pred))
44
+ mae = mean_absolute_error(y_true, y_pred)
45
+ return r2, rmse, mae
46
+
47
+
48
+ def _load_model_for_product(model_dir, product):
49
+ """Load model XGBoost + poly_transformer untuk satu produk."""
50
+ model_path = os.path.join(model_dir, MODEL_FILENAME_TEMPLATE.format(product))
51
+ if not os.path.exists(model_path):
52
+ raise FileNotFoundError(f"File model tidak ditemukan: {model_path}")
53
+
54
+ deployment_bundle = joblib.load(model_path)
55
+
56
+ model = deployment_bundle.get('model')
57
+ poly_transformer = deployment_bundle.get('poly_transformer')
58
+ poly_feature_names = deployment_bundle.get('poly_feature_names')
59
+
60
+ if model is None or poly_transformer is None or poly_feature_names is None:
61
+ raise KeyError(
62
+ "Bundle model tidak lengkap. Pastikan berisi "
63
+ "'model', 'poly_transformer', dan 'poly_feature_names'."
64
+ )
65
+
66
+ return model, poly_transformer, poly_feature_names
67
+
68
+
69
+ # =========================================================
70
+ # FUNGSI UTAMA UNTUK DASHBOARD (PERBAIKAN)
71
+ # =========================================================
72
+ def evaluate_models_for_dashboard(
73
+ data_path: str = DATA_FILENAME,
74
+ model_dir: str = MODEL_FOLDER,
75
+ products: list = None,
76
+ features: list = None,
77
+ target_col: str = TARGET_COLUMN,
78
+ data_df=None, # <--- NEW: bisa kirim DataFrame langsung dari Streamlit
79
+ ):
80
+ """
81
+ Fungsi utama yang melakukan evaluasi performa.
82
+ Mengembalikan:
83
+ - summary_df: DataFrame berisi [Product, R², RMSE, MAE]
84
+ - product_figs: dict {product_name: matplotlib.figure.Figure}
85
+
86
+ Prioritas data:
87
+ 1) Jika data_df tidak None -> gunakan data_df (upload dari Streamlit)
88
+ 2) Jika data_df None -> baca dari data_path (CSV default)
89
+ """
90
+ if products is None:
91
+ products = PRODUCT_LIST
92
+ if features is None:
93
+ features = FEATURES
94
+
95
+ # --- 1. Load data ---
96
+ if data_df is not None:
97
+ # Pakai dataset yang di-upload user (sudah dalam bentuk DataFrame)
98
+ df = data_df.copy()
99
+ else:
100
+ # Fallback: baca dari CSV path seperti sebelumnya
101
+ try:
102
+ df = pd.read_csv(data_path)
103
+ except FileNotFoundError:
104
+ print(f"[ERROR] Data file tidak ditemukan di: {data_path}")
105
+ return pd.DataFrame(columns=['Product', 'R²', 'RMSE', 'MAE']), {}
106
+ except Exception as e:
107
+ print(f"[ERROR] Gagal memuat data: {e}")
108
+ return pd.DataFrame(columns=['Product', 'R²', 'RMSE', 'MAE']), {}
109
+
110
+ # Pastikan Date_time ada dan dalam bentuk datetime (kalau mau pakai time-series)
111
+ if 'Date_time' in df.columns:
112
+ df['Date_time'] = pd.to_datetime(df['Date_time'], errors='coerce')
113
+
114
+ summary_results = []
115
+ plot_data_list = []
116
+
117
+ # --- 2. Loop per produk ---
118
+ for product in products:
119
+ df_prod = df[df['Product'] == product].copy()
120
+
121
+ if df_prod.empty or len(df_prod) < 2:
122
+ continue
123
+
124
+ missing_features = [f for f in features if f not in df_prod.columns]
125
+ if missing_features:
126
+ print(f"[WARN] Fitur hilang untuk {product}: {missing_features}")
127
+ continue
128
+
129
+ if 'Date_time' in df_prod.columns:
130
+ df_prod = df_prod.sort_values('Date_time')
131
+
132
+ X_raw = df_prod[features]
133
+ y_true = df_prod[target_col]
134
+
135
+ # --- 2a. Load model produk ---
136
+ try:
137
+ model, poly_transformer, poly_feature_names = _load_model_for_product(model_dir, product)
138
+ except Exception as e:
139
+ print(f"[WARN] Gagal load model untuk {product}: {e}")
140
+ continue
141
+
142
+ # --- 2b. Transformasi dan prediksi ---
143
+ try:
144
+ X_transformed_np = poly_transformer.transform(X_raw)
145
+ X_transformed_df = pd.DataFrame(
146
+ X_transformed_np,
147
+ columns=poly_feature_names,
148
+ index=X_raw.index
149
+ )
150
+ y_pred = model.predict(X_transformed_df)
151
+ except Exception as e:
152
+ print(f"[WARN] Gagal transform/predict untuk {product}: {e}")
153
+ continue
154
+
155
+ # --- 2c. Hitung metrik ---
156
+ r2, rmse, mae = calculate_metrics(y_true, y_pred)
157
+ summary_results.append({
158
+ 'Product': product,
159
+ 'R²': r2,
160
+ 'RMSE': rmse,
161
+ 'MAE': mae
162
+ })
163
+
164
+ # --- 2d. Siapkan data untuk plot ---
165
+ plot_df = pd.DataFrame({
166
+ 'Actual': y_true.values,
167
+ 'Predicted': y_pred,
168
+ 'Product': product
169
+ })
170
+ plot_data_list.append(plot_df)
171
+
172
+ # --- 3. Buat summary_df ---
173
+ if summary_results:
174
+ summary_df = pd.DataFrame(summary_results)
175
+ summary_df['Product'] = pd.Categorical(summary_df['Product'], categories=products, ordered=True)
176
+ summary_df = summary_df.sort_values('Product').reset_index(drop=True)
177
+ else:
178
+ summary_df = pd.DataFrame(columns=['Product', 'R²', 'RMSE', 'MAE'])
179
+ return summary_df, {}
180
+
181
+ product_figs = {}
182
+
183
+ # --- 4. Generate Figures (per produk, untuk Streamlit) ---
184
+ if plot_data_list:
185
+ all_plot_data = pd.concat(plot_data_list)
186
+ products_evaluated = summary_df['Product'].tolist()
187
+
188
+ sns.set_style("whitegrid")
189
+
190
+ for product in products_evaluated:
191
+ product_data = all_plot_data[all_plot_data['Product'] == product].dropna()
192
+ if product_data.empty:
193
+ continue
194
+
195
+ metrics = summary_df[summary_df['Product'] == product].iloc[0]
196
+ title = (f'{product}\n'
197
+ f'$R^2$: {metrics["R²"]:.3f}, '
198
+ f'RMSE: {metrics["RMSE"]:.3f}, '
199
+ f'MAE: {metrics["MAE"]:.3f}')
200
+
201
+ min_val = min(product_data['Actual'].min(), product_data['Predicted'].min())
202
+ max_val = max(product_data['Actual'].max(), product_data['Predicted'].max())
203
+ margin = (max_val - min_val) * 0.05
204
+ plot_range = [min_val - margin, max_val + margin]
205
+
206
+ # Figure tunggal per produk
207
+ fig_single = plt.figure(figsize=(8, 6))
208
+ ax_single = fig_single.add_subplot(111)
209
+ sns.scatterplot(
210
+ x='Actual',
211
+ y='Predicted',
212
+ data=product_data,
213
+ ax=ax_single,
214
+ alpha=0.6
215
+ )
216
+ ax_single.plot(plot_range, plot_range, 'r--', label='Ideal (Actual = Predicted)')
217
+ ax_single.set_xlim(plot_range)
218
+ ax_single.set_ylim(plot_range)
219
+ ax_single.set_title(title)
220
+ ax_single.set_xlabel(f'Actual {target_col}')
221
+ ax_single.set_ylabel(f'Predicted {target_col}')
222
+ ax_single.legend()
223
+
224
+ product_figs[product] = fig_single
225
+ plt.close(fig_single)
226
+
227
+ return summary_df, product_figs
228
+
229
+ # =========================================================
230
+ # OPSIONAL: MODE CLI (tetap)
231
+ # =========================================================
232
+ if __name__ == "__main__":
233
+
234
+ print("Memulai Evaluasi Performa Model Inverse...")
235
+
236
+ summary_df, figs = evaluate_models_for_dashboard()
237
+
238
+ print("\n" + "="*40)
239
+ print("=== Ringkasan Performa Model ===")
240
+ print("="*40)
241
+
242
+ if not summary_df.empty:
243
+ print(summary_df.to_markdown(index=False, floatfmt=".4f"))
244
+ else:
245
+ print("Gagal memproses data atau model. Periksa pesan error di atas.")
README.md ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Dashboard Prediksi Model XGBoost dan Inverse Model untuk Spray Dryer
2
+
3
+ Dashboard ini adalah aplikasi Streamlit yang dirancang untuk melakukan prediksi konsumsi gas (MMBTU) pada proses spray dryer menggunakan model XGBoost dan inverse modelling. Dashboard ini juga menyertakan fitur EDA (Exploratory Data Analysis), disagregasi data, filtering dengan rule engine, serta monitoring performa model. Aplikasi ini mendukung prediksi berdasarkan parameter proses, simulasi inverse untuk menemukan parameter optimal dari target gas, serta cleaning data untuk memastikan kualitas input.
4
+
5
+ ## Cara Instalasi dan Menjalankan Dashboard Secara Lokal
6
+ Ikuti langkah-langkah berikut untuk menjalankan dashboard :
7
+
8
+ 1. **Buka Terminal atau Command Prompt**
9
+ Jika menggunakan VS Code, buka terminal terintegrasi dengan `Ctrl + ` (backtick).
10
+
11
+ 2. **Beralih ke Direktori Proyek**
12
+ Gunakan perintah berikut untuk pindah ke folder tempat proyek disimpan:
13
+ ```
14
+ cd "path/ke/folder/proyek"
15
+ ```
16
+ Ganti `"path/ke/folder/proyek"` dengan lokasi folder Anda (contoh: `cd "C:\Users\NamaUser\Documents\DashboardSprayDryer"`).
17
+
18
+ 3. **Buat Environment Virtual Baru**
19
+ Buat environment virtual untuk mengisolasi dependensi:
20
+ ```
21
+ python -m venv nama_env
22
+ ```
23
+ Ganti `nama_env` dengan nama yang diinginkan (contoh: `python -m venv spraydryer_env`).
24
+
25
+ 4. **Aktifkan Environment Virtual**
26
+ Aktifkan environment:
27
+ - Pada Windows:
28
+ ```
29
+ nama_env\Scripts\activate
30
+ ```
31
+ - Pada macOS/Linux:
32
+ ```
33
+ source nama_env/bin/activate
34
+ ```
35
+ Setelah diaktifkan, prompt terminal Anda akan menampilkan nama environment (misalnya: `(nama_env)`).
36
+
37
+ 5. **Instal Dependensi**
38
+ Instal semua package yang diperlukan dari file `requirements.txt`:
39
+ ```
40
+ pip install -r requirements.txt
41
+ ```
42
+ Pastikan file `requirements.txt` ada di direktori proyek (contoh isi: `streamlit`, `pandas`, `numpy`, `xgboost`, `scikit-learn`, dll.).
43
+
44
+ 6. **Jalankan Dashboard**
45
+ Jalankan aplikasi Streamlit dengan perintah:
46
+ ```
47
+ streamlit run Dashboard.py
48
+ ```
49
+ (Catatan: Jika nama file utama berbeda, sesuaikan dengan nama file Anda.)
50
+
51
+ 7. **Akses Dashboard**
52
+ Setelah dijalankan, Streamlit akan menampilkan pesan seperti:
53
+ ```
54
+ You can now view your Streamlit app in your browser.
55
+ Local URL: http://localhost:8501
56
+ Network URL: http://192.168.x.x:8501
57
+ ```
58
+ Klik salah satu URL untuk membuka dashboard di browser. Jika gagal, coba akses `http://localhost:8501` secara manual.
59
+
60
+ Jika mengalami error, periksa:
61
+ - Versi Python yang benar.
62
+ - Semua dependensi terinstal (jalankan `pip list` untuk verifikasi).
63
+ - File model (misalnya XGBoost) dan data historis tersedia di direktori yang tepat.
64
+
65
+ ## Fitur dan Halaman Dashboard
66
+ Dashboard ini memiliki 6 halaman utama, masing-masing dengan fungsi spesifik untuk mendukung prediksi, analisis, dan pembersihan data spray dryer. Berikut deskripsi lengkapnya:
67
+
68
+ ### 1. Prediksi Gas dari 6 Parameter
69
+ Halaman ini menggunakan model XGBoost dengan feature engineering berbasis waktu untuk memprediksi konsumsi gas (MMBTU) dari 6 parameter proses utama:
70
+ - `D101330TT` (Outlet Temperature)
71
+ - `D102260TIC_CV` (HP Steam Damper CV)
72
+ - `D102265TIC_PV` (Inlet Temperature PV)
73
+ - `D102265TIC_CV` (LP Steam Damper CV)
74
+ - `D102266TIC` (Dehumidifier Temperature)
75
+ - `D101264FTSCL` (Flow Feed)
76
+
77
+ **Cara Penggunaan:**
78
+ - Input rentang nilai untuk masing-masing parameter.
79
+ - Klik tombol **"Prediksi Konsumsi Gas (MMBTU)"** untuk menjalankan prediksi.
80
+ - Opsi tambahan: Unggah file CSV untuk prediksi batch.
81
+ Fitur ini membutuhkan konteks data historis untuk menghitung fitur waktu.
82
+
83
+ ### 2. Prediksi Parameter dari Gas (MMBTU)
84
+ Halaman ini terdiri dari 3 bagian utama: Inverse Model, Validasi Model, dan Simulasi Prediksi. Model yang digunakan adalah XGBoost dengan Polynomial Features untuk inverse modelling (memprediksi 6 parameter dari target gas).
85
+
86
+ **Alur Penggunaan:**
87
+ 1. **Inverse Model:**
88
+ - Pilih produk
89
+ - Input target konsumsi gas (MMBTU) yang diinginkan.
90
+ - Klik **"Optimasi Parameter"** untuk mencari rekomendasi 6 parameter.
91
+ - Metode pencarian:
92
+ - Cari di file csv historis (jika tersedia).
93
+ - Atau jalankan algoritma Differential Evolution untuk optimasi.
94
+ - Hasil rekomendasi akan disimpan ke file Excel.
95
+
96
+ 2. **Validasi Model – XGBoost Many-to-One:**
97
+ - Gunakan parameter hasil inverse untuk validasi forward modelling.
98
+ - Tampilkan metrik performa seperti MAE, RMSE, R², dan Prediksi GAS (MMBTU).
99
+
100
+ 3. **Simulasi Prediksi Konsumsi Gas (Forward Modelling):**
101
+ - Input manual 6 parameter proses.
102
+ - Dapatkan prediksi konsumsi gas secara real-time menggunakan model forward yang sama.
103
+
104
+
105
+ ### 3. Monitoring Model
106
+ Halaman ini untuk memantau performa model secara real-time.
107
+
108
+ **Cara Penggunaan:**
109
+ - Unggah file CSV data evaluasi yang mencakup target `GAS_MMBTU_Disaggregated`.
110
+ - Sistem akan memprediksi nilai gas untuk setiap produk, menghitung selisih antara prediksi dan data aktual, serta menampilkan metrik performa per produk (seperti MAE, RMSE, R²).
111
+ Fitur ini berguna untuk evaluasi model pada data baru.
112
+
113
+ ### 4. Exploratory Data Analysis (EDA)
114
+ Halaman ini menyediakan analisis eksploratif data spray dryer.
115
+
116
+ **Fitur Utama:**
117
+ - **Ringkasan Data (Data Summary):** Tampilkan metric seperti total baris, total kolom, rentang tanggal produksi, total missing values, jumlah produk unik, jumlah data duplikat, dan ringkasan anomali.
118
+ - **EDA per Kategori Produk:** Gunakan tab untuk "All Data" dan setiap produk unik (misalnya: BMR BASE, CKP BASE, CKR BASE, CMR BASE, MORIGRO BASE, dll.). Di setiap tab:
119
+ - Distribusi Parameter Proses (boxplot/violin plot untuk 6 parameter utama).
120
+ - Deteksi Outlier (boxplot dengan highlight, plus jumlah outlier per parameter).
121
+ - Tabel Statistik Deskriptif (`df.describe()` untuk 6 parameter + GAS_MMBTU_Disaggregated).
122
+ - Segmen Produksi per Produk (tabel dengan Start_Time, End_Time, Duration_Minutes, Data_Points).
123
+ - Tombol **"Refresh EDA"** untuk update data baru.
124
+
125
+ ### 5. Disagregasi Data
126
+ Halaman ini untuk mengubah data konsumsi gas dari skala per jam menjadi per menit menggunakan algoritma disagregasi berbasis bobot indikator proses (proportional weight splitting).
127
+
128
+ **Alur Penggunaan:**
129
+ - Unggah file CSV data per jam.
130
+ - Klik **"Jalankan Proses Disagregasi"**.
131
+ - Tampilkan laporan lengkap: validasi jam produksi, perhitungan bobot, validasi akurasi (selisih nol), ringkasan pipeline, analisis hasil (statistik, jam tertinggi/terendah).
132
+ - Unduh file hasil disagregasi (dengan kolom baru `GAS_MMBTU_Disaggregated`).
133
+ Panggil fungsi dari `disagregasi_data.py` untuk proses inti.
134
+
135
+ ### 6. Filter Rule Engine
136
+ Halaman ini untuk membersihkan data hasil disagregasi menggunakan rule engine dengan 4 aturan anomali.
137
+
138
+ **Alur Penggunaan:**
139
+ - Unggah file CSV hasil disagregasi.
140
+ - Klik **"Jalankan Rule Engine"**.
141
+ - Tampilkan ringkasan: total baris awal/bersih, jumlah anomali dihapus, persentase data bersih.
142
+ - Detail anomali: tabel dengan jenis anomali, jumlah baris, persentase, contoh waktu.
143
+ - Unduh file data bersih dan file anomali (dengan kolom `anomaly_reason`).
disagregasi_data_spraydryer_terbaru_10_17_2025.csv ADDED
The diff for this file is too large to render. See raw diff
 
eda_functions.py ADDED
@@ -0,0 +1,1111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+ from datetime import datetime, timedelta
6
+ import warnings
7
+ warnings.filterwarnings('ignore')
8
+
9
+ class SprayDryerEDAPipeline:
10
+ """
11
+ Pipeline untuk Exploratory Data Analysis (EDA) dan preprocessing data spray dryer
12
+ """
13
+
14
+ def __init__(self, data_path=None, dataframe=None):
15
+ """
16
+ Inisialisasi pipeline
17
+
18
+ Parameters:
19
+ -----------
20
+ data_path : str, optional
21
+ Path ke file data (CSV, Excel, dll)
22
+ dataframe : pd.DataFrame, optional
23
+ DataFrame yang sudah dimuat
24
+ """
25
+ if dataframe is not None:
26
+ self.df_original = dataframe.copy()
27
+ elif data_path:
28
+ self.df_original = self.load_data(data_path)
29
+ else:
30
+ raise ValueError("Harus memberikan data_path atau dataframe")
31
+
32
+ self.df = self.df_original.copy()
33
+ self.product_dataframes = {}
34
+ self.setup_visualization()
35
+
36
+ def setup_visualization(self):
37
+ """Setup parameter visualisasi"""
38
+ plt.style.use('default')
39
+ sns.set_palette("husl")
40
+
41
+ def load_data(self, path):
42
+ """Load data dari file"""
43
+ if path.endswith('.csv'):
44
+ return pd.read_csv(path)
45
+ elif path.endswith(('.xlsx', '.xls')):
46
+ return pd.read_excel(path)
47
+ else:
48
+ raise ValueError("Format file tidak didukung")
49
+
50
+ # ============= STEP 1: PEMERIKSAAN KOLOM AWAL =============
51
+ def check_and_fix_columns(self):
52
+ """
53
+ Step 1: Pemeriksaan dan perbaikan nama kolom
54
+ """
55
+ print("="*80)
56
+ print("STEP 1: PEMERIKSAAN KOLOM AWAL")
57
+ print("="*80)
58
+
59
+ # Daftar kolom standar
60
+ standard_columns = [
61
+ 'Date_time', 'Drier_On_Product', 'D101330TT', 'D102260TIC_CV',
62
+ 'D102265TIC_PV', 'D102265TIC_CV', 'D102266TIC', 'D101264FTSCL',
63
+ 'Product', 'GAS_MMBTU', 'fixed_rounded_time'
64
+ ]
65
+
66
+ print(f"Kolom yang ada di dataframe: {list(self.df.columns)}")
67
+ print(f"\nKolom standar yang diharapkan: {standard_columns}")
68
+ standard_lookup = {col.lower(): col for col in standard_columns}
69
+ column_mapping = {}
70
+ unmatched_column = []
71
+ for actual_col in self.df.columns:
72
+ actual_col_lower = actual_col.lower()
73
+ if actual_col_lower in standard_lookup:
74
+ standard_name = standard_lookup[actual_col_lower]
75
+ if actual_col != standard_name:
76
+ column_mapping[actual_col] = standard_name
77
+ else:
78
+ unmatched_column.append(actual_col)
79
+
80
+ # Rename kolom
81
+ self.df.rename(columns=column_mapping, inplace=True)
82
+
83
+ # Hapus kolom yang tidak ada dalam daftar standar
84
+ cols_to_keep = [col for col in self.df.columns if col in standard_columns]
85
+ cols_removed = [col for col in self.df.columns if col not in standard_columns]
86
+
87
+ if cols_removed:
88
+ print(f"\nKolom yang dihapus: {cols_removed}")
89
+
90
+ self.df = self.df[cols_to_keep]
91
+
92
+ # Cek apakah fixed_rounded_time ada
93
+ if 'fixed_rounded_time' not in self.df.columns:
94
+ print("\nKolom 'fixed_rounded_time' tidak ditemukan. Akan dibuat nanti.")
95
+
96
+ print(f"\nKolom final: {list(self.df.columns)}")
97
+ print(f"Shape dataframe: {self.df.shape}")
98
+
99
+ # ============= STEP 2: VALIDASI KOLOM PRODUCT =============
100
+ def validate_product_names(self):
101
+ """
102
+ Step 2: Validasi dan standardisasi nama produk secara otomatis.
103
+ """
104
+ print("\n" + "="*80)
105
+ print("STEP 2: VALIDASI DAN STANDARDISASI KOLOM PRODUCT")
106
+ print("="*80)
107
+
108
+ # Pastikan kolom 'Product' ada
109
+ if 'Product' not in self.df.columns:
110
+ print("PERINGATAN: Kolom 'Product' tidak ditemukan. Melewati langkah ini.")
111
+ print("="*80 + "\n")
112
+ return self.df
113
+
114
+ # 1. Daftar nama produk standar (sumber kebenaran)
115
+ standard_products = [
116
+ 'CKP BASE', 'CMP BASE', 'BMP BASE', 'MORIGRO BASE', 'CKH BASE',
117
+ 'CMH BASE', 'BMH BASE', 'CKR BASE', 'CMR BASE', 'BMR BASE',
118
+ 'CGI BASE', 'NL33 BASE POWDER', 'CKS BASE', 'CHIL SCHOOL',
119
+ 'CHIL MIL SOYA', 'CIP', 'CIP CHAMBER'
120
+ ]
121
+
122
+ # 2. Mapping HANYA untuk kasus-kasus khusus/salah ketik yang tidak bisa ditebak
123
+ # Contoh: ada kata 'BASE' ganda, atau singkatan yang tidak standar.
124
+ special_product_mapping = {
125
+ 'CMR BASE BASE': 'CMR BASE',
126
+ 'CGI 6-12 BASE' : 'CGI BASE',
127
+ 'CMH BASE': 'CMH BASE',
128
+ 'BMH BASE': 'BMH BASE'
129
+ }
130
+
131
+ print(f"Produk unik sebelum standardisasi: {self.df['Product'].unique()}")
132
+
133
+ # 3. Buat kamus pencocokan (lookup map) utama secara otomatis
134
+ # Kunci: nama produk dalam format UPPERCASE dan tanpa spasi berlebih
135
+ # Nilai: nama produk standar yang benar
136
+
137
+ # Mulai dengan standard products
138
+ product_lookup = {prod.upper().strip(): prod for prod in standard_products}
139
+
140
+ # Timpa/tambahkan dengan special mapping. Ini memastikan kasus khusus diutamakan.
141
+ for key, value in special_product_mapping.items():
142
+ product_lookup[key.upper().strip()] = value
143
+
144
+ # 4. Gunakan metode .map() dari Pandas untuk efisiensi tinggi
145
+ # Ini jauh lebih cepat daripada .apply() untuk data besar
146
+
147
+ # Simpan kolom produk asli untuk perbandingan
148
+ original_products = self.df['Product'].copy()
149
+
150
+ # Buat series baru dengan nilai yang sudah dinormalisasi (uppercase, strip)
151
+ normalized_products = self.df['Product'].astype(str).str.upper().str.strip()
152
+
153
+ # Gunakan .map() untuk mengganti nilai. Nilai yang tidak ada di `product_lookup` akan menjadi NaN
154
+ self.df['Product'] = normalized_products.map(product_lookup)
155
+
156
+ # Isi kembali nilai yang menjadi NaN dengan nilai aslinya.
157
+ # Ini memastikan produk yang tidak dikenali tidak akan hilang/diubah.
158
+ self.df['Product'].fillna(original_products, inplace=True)
159
+
160
+ print(f"\nProduk unik setelah standardisasi: {self.df['Product'].unique()}")
161
+ print(f"\nJumlah setiap produk:\n{self.df['Product'].value_counts()}")
162
+
163
+ # 5. (Opsional tapi sangat direkomendasikan) Laporkan produk yang tidak berhasil distandardisasi
164
+ final_products_set = set(self.df['Product'].unique())
165
+ standard_products_set = set(standard_products)
166
+
167
+ unstandardized = final_products_set - standard_products_set
168
+ # Hapus None atau NaN jika ada dalam hasil
169
+ unstandardized = {item for item in unstandardized if pd.notna(item)}
170
+
171
+ if unstandardized:
172
+ print("\n" + "-"*40)
173
+ print(f"PERINGATAN: Ditemukan {len(unstandardized)} produk yang tidak sesuai standar:")
174
+ for item in unstandardized:
175
+ print(f" - '{item}'")
176
+ print("Pertimbangkan untuk menambahkannya ke `standard_products` atau `special_product_mapping`.")
177
+ print("-"*40)
178
+
179
+ print("\n" + "="*80)
180
+ print("STEP 2 SELESAI")
181
+ print("="*80 + "\n")
182
+ return self.df
183
+
184
+ # ============= STEP 3: PEMISAHAN DATA PER PRODUK =============
185
+ def separate_data_by_product(self):
186
+ """
187
+ Step 3: Pemisahan data berdasarkan produk
188
+ """
189
+ print("\n" + "="*80)
190
+ print("STEP 3: PEMISAHAN DATA PER PRODUK")
191
+ print("="*80)
192
+
193
+ unique_products = self.df['Product'].unique()
194
+ print(f"Memisahkan data untuk {len(unique_products)} produk...")
195
+
196
+ for product in unique_products:
197
+ self.product_dataframes[product] = self.df[self.df['Product'] == product].copy()
198
+ print(f"\n{product}: {len(self.product_dataframes[product])} baris")
199
+
200
+ # Tampilkan statistik deskriptif
201
+ print("\n" + "-"*50)
202
+ print("STATISTIK DESKRIPTIF - DATA KESELURUHAN")
203
+ print("-"*50)
204
+ print(self.df.describe())
205
+
206
+ print("\n" + "-"*50)
207
+ print("INFO DATA KESELURUHAN")
208
+ print("-"*50)
209
+ print(self.df.info())
210
+
211
+ # Statistik per produk
212
+ for product, df_product in self.product_dataframes.items():
213
+ print("\n" + "-"*50)
214
+ print(f"STATISTIK DESKRIPTIF - {product}")
215
+ print("-"*50)
216
+ print(df_product.describe())
217
+
218
+ print(f"\nINFO - {product}")
219
+ print(df_product.info())
220
+
221
+ # ============= STEP 4: IDENTIFIKASI ANOMALI DATA =============
222
+ def identify_anomalies(self):
223
+ """
224
+ Step 4: Identifikasi anomali berdasarkan aturan teknis
225
+ """
226
+ print("\n" + "="*80)
227
+ print("STEP 4: IDENTIFIKASI ANOMALI DATA")
228
+ print("="*80)
229
+
230
+ anomaly_rules = {
231
+ 'D101330TT': {'min': 20, 'max': 130, 'zero_anomaly': True},
232
+ 'D102265TIC_PV': {'min': 20, 'zero_anomaly': True},
233
+ 'D102265TIC_CV': {'zero_allowed_products': ['CIP', 'CIP CHAMBER']},
234
+ 'D102266TIC': {'zero_anomaly': True}
235
+ }
236
+
237
+ anomalies = []
238
+
239
+ for product, df_product in self.product_dataframes.items():
240
+ print(f"\nMemeriksa anomali untuk produk: {product}")
241
+
242
+ for column, rules in anomaly_rules.items():
243
+ if column not in df_product.columns:
244
+ continue
245
+
246
+ # Cek nilai 0
247
+ if 'zero_anomaly' in rules and rules['zero_anomaly']:
248
+ zero_count = (df_product[column] == 0).sum()
249
+ if zero_count > 0:
250
+ anomalies.append({
251
+ 'Product': product,
252
+ 'Column': column,
253
+ 'Anomaly': 'Nilai 0',
254
+ 'Count': zero_count
255
+ })
256
+ print(f" - {column}: Ditemukan {zero_count} nilai 0 (anomali)")
257
+
258
+ # Cek nilai 0 untuk D102265TIC_CV
259
+ if 'zero_allowed_products' in rules:
260
+ if product not in rules['zero_allowed_products']:
261
+ zero_count = (df_product[column] == 0).sum()
262
+ if zero_count > 0:
263
+ anomalies.append({
264
+ 'Product': product,
265
+ 'Column': column,
266
+ 'Anomaly': 'Nilai 0 (tidak diizinkan untuk produk ini)',
267
+ 'Count': zero_count
268
+ })
269
+ print(f" - {column}: Ditemukan {zero_count} nilai 0 (anomali untuk produk non-CIP)")
270
+
271
+ # Cek nilai minimum
272
+ if 'min' in rules:
273
+ below_min = (df_product[column] < rules['min']).sum()
274
+ if below_min > 0:
275
+ anomalies.append({
276
+ 'Product': product,
277
+ 'Column': column,
278
+ 'Anomaly': f'Nilai < {rules["min"]}',
279
+ 'Count': below_min
280
+ })
281
+ print(f" - {column}: Ditemukan {below_min} nilai < {rules['min']}")
282
+
283
+ # Cek nilai maksimum
284
+ if 'max' in rules:
285
+ above_max = (df_product[column] > rules['max']).sum()
286
+ if above_max > 0:
287
+ anomalies.append({
288
+ 'Product': product,
289
+ 'Column': column,
290
+ 'Anomaly': f'Nilai > {rules["max"]}',
291
+ 'Count': above_max
292
+ })
293
+ print(f" - {column}: Ditemukan {above_max} nilai > {rules['max']}")
294
+
295
+ if anomalies:
296
+ anomaly_df = pd.DataFrame(anomalies)
297
+ print("\n" + "-"*50)
298
+ print("RINGKASAN ANOMALI")
299
+ print("-"*50)
300
+ print(anomaly_df.to_string())
301
+ else:
302
+ print("\nTidak ditemukan anomali berdasarkan aturan yang ditetapkan.")
303
+
304
+ # ============= STEP 5: VALIDASI KOLOM DRIER_ON_PRODUCT =============
305
+ def validate_drier_on_product(self):
306
+ """
307
+ Step 5: Validasi kolom Drier_On_Product
308
+ """
309
+ print("\n" + "="*80)
310
+ print("STEP 5: VALIDASI KOLOM DRIER_ON_PRODUCT")
311
+ print("="*80)
312
+
313
+ production_products = [
314
+ 'CKP BASE', 'CMP BASE', 'BMP BASE', 'MORIGRO BASE', 'CKH BASE',
315
+ 'CMH BASE', 'BMH BASE', 'CKR BASE', 'CMR BASE', 'BMR BASE',
316
+ 'CGI BASE', 'NL33 BASE POWDER', 'CKS BASE', 'CHIL SCHOOL',
317
+ 'CHIL MIL SOYA'
318
+ ]
319
+
320
+ cip_products = ['CIP', 'CIP CHAMBER']
321
+
322
+ validation_errors = []
323
+
324
+ for product in self.df['Product'].unique():
325
+ df_product = self.df[self.df['Product'] == product]
326
+
327
+ if product in production_products:
328
+ # Harus 1
329
+ wrong_values = df_product[df_product['Drier_On_Product'] != 1]
330
+ if len(wrong_values) > 0:
331
+ validation_errors.append({
332
+ 'Product': product,
333
+ 'Expected': 1,
334
+ 'Wrong_Count': len(wrong_values)
335
+ })
336
+ print(f"ERROR: {product} memiliki {len(wrong_values)} baris dengan Drier_On_Product != 1")
337
+
338
+ elif product in cip_products:
339
+ # Harus 0
340
+ wrong_values = df_product[df_product['Drier_On_Product'] != 0]
341
+ if len(wrong_values) > 0:
342
+ validation_errors.append({
343
+ 'Product': product,
344
+ 'Expected': 0,
345
+ 'Wrong_Count': len(wrong_values)
346
+ })
347
+ print(f"ERROR: {product} memiliki {len(wrong_values)} baris dengan Drier_On_Product != 0")
348
+
349
+ if not validation_errors:
350
+ print("✓ Semua nilai Drier_On_Product sesuai dengan ketentuan")
351
+ else:
352
+ error_df = pd.DataFrame(validation_errors)
353
+ print("\nRingkasan Error Validasi:")
354
+ print(error_df)
355
+
356
+ # ============= STEP 6: CEK MISSING VALUES DAN DUPLIKASI =============
357
+ def check_missing_and_duplicates(self):
358
+ """
359
+ Step 6: Periksa missing values dan hapus duplikasi
360
+ """
361
+ print("\n" + "="*80)
362
+ print("STEP 6: CEK MISSING VALUES DAN DUPLIKASI")
363
+ print("="*80)
364
+
365
+ # Cek missing values
366
+ print("Missing Values per Kolom:")
367
+ missing_counts = self.df.isnull().sum()
368
+ print(missing_counts[missing_counts > 0] if any(missing_counts > 0) else "Tidak ada missing values")
369
+
370
+ # Cek duplikasi berdasarkan Date_time
371
+ duplicates = self.df[self.df.duplicated(subset=['Date_time'], keep=False)]
372
+ print(f"\nJumlah baris duplikat berdasarkan Date_time: {len(duplicates)}")
373
+
374
+ if len(duplicates) > 0:
375
+ print("Menghapus duplikasi...")
376
+ self.df = self.df.drop_duplicates(subset=['Date_time'], keep='first')
377
+ print(f"Shape setelah menghapus duplikasi: {self.df.shape}")
378
+
379
+ # Update product dataframes
380
+ for product in self.product_dataframes.keys():
381
+ self.product_dataframes[product] = self.df[self.df['Product'] == product].copy()
382
+
383
+ # ============= STEP 7: PERHITUNGAN DURASI PRODUKSI =============
384
+ def calculate_production_duration(self):
385
+ """
386
+ Step 7: Hitung durasi produksi untuk setiap produk berdasarkan segmen produksi yang berkelanjutan.
387
+
388
+ Logika:
389
+ 1. Data diurutkan berdasarkan waktu.
390
+ 2. Sebuah "segmen" produksi diidentifikasi sebagai blok baris yang berurutan
391
+ di mana nama produknya sama.
392
+ 3. Jika nama produk pada baris saat ini berbeda dari baris sebelumnya,
393
+ maka itu dianggap sebagai awal dari segmen baru.
394
+ 4. Durasi dihitung untuk setiap segmen (end_time - start_time).
395
+ 5. Total durasi untuk satu produk adalah jumlah dari semua durasi segmennya.
396
+ """
397
+ print("\n" + "="*80)
398
+ print("STEP 7: PERHITUNGAN DURASI PRODUKSI (METODE SEGMENTASI)")
399
+ print("="*80)
400
+
401
+ # Pastikan tipe data dan urutan sudah benar
402
+ try:
403
+ self.df['Date_time'] = pd.to_datetime(self.df['Date_time'])
404
+ except Exception as e:
405
+ print(f"Error saat konversi 'Date_time': {e}")
406
+ return
407
+
408
+ if self.df.empty:
409
+ print("DataFrame kosong, tidak ada durasi untuk dihitung.")
410
+ return
411
+
412
+ # Urutkan dataframe berdasarkan waktu, ini krusial untuk logika segmentasi
413
+ df_sorted = self.df.sort_values('Date_time').copy()
414
+
415
+ # --- Logika Inti: Identifikasi Segmen Produksi ---
416
+ # Buat kolom 'segment_id' yang akan unik untuk setiap blok produksi yang berkelanjutan.
417
+ # .shift() membandingkan produk di baris saat ini dengan baris sebelumnya.
418
+ # .cumsum() akan mengakumulasi nilai (True=1, False=0), sehingga menciptakan ID unik untuk setiap segmen.
419
+ df_sorted['segment_id'] = (df_sorted['Product'] != df_sorted['Product'].shift()).cumsum()
420
+
421
+ # Kelompokkan berdasarkan Produk dan ID Segmen untuk mendapatkan start dan end time setiap segmen
422
+ production_segments = df_sorted.groupby(['Product', 'segment_id']).agg(
423
+ Start_Time=('Date_time', 'min'),
424
+ End_Time=('Date_time', 'max'),
425
+ Data_Points=('Date_time', 'count')
426
+ ).reset_index()
427
+
428
+ # Hitung durasi untuk setiap segmen
429
+ production_segments['Duration'] = production_segments['End_Time'] - production_segments['Start_Time']
430
+
431
+ # Filter hanya untuk produk produksi (bukan CIP)
432
+ production_segments_filtered = production_segments[
433
+ ~production_segments['Product'].isin(['CIP', 'CIP CHAMBER'])
434
+ ].copy()
435
+
436
+ if production_segments_filtered.empty:
437
+ print("Tidak ada data produksi (non-CIP) untuk dihitung durasinya.")
438
+ return
439
+
440
+ # Hitung total durasi dengan menjumlahkan durasi dari semua segmen per produk
441
+ total_durations = production_segments_filtered.groupby('Product')['Duration'].sum().reset_index()
442
+
443
+ # Konversi total durasi ke jam
444
+ total_durations['Total_Duration_Hours'] = round(total_durations['Duration'].dt.total_seconds() / 3600, 2)
445
+
446
+ # Gabungkan dengan jumlah data points
447
+ total_data_points = production_segments_filtered.groupby('Product')['Data_Points'].sum().reset_index()
448
+ summary_df = pd.merge(total_durations, total_data_points, on='Product')
449
+
450
+ print("--- RINGKASAN TOTAL DURASI PRODUKSI PER PRODUK ---")
451
+ print(summary_df[['Product', 'Total_Duration_Hours', 'Data_Points']].to_string(index=False))
452
+
453
+ print("\n" + "-"*80)
454
+ print("--- DETAIL SEGMEN PRODUKSI ---")
455
+ # Tampilkan detail setiap segmen untuk setiap produk
456
+ for product in summary_df['Product'].unique():
457
+ print(f"\nProduk: {product}")
458
+ product_segment_details = production_segments_filtered[production_segments_filtered['Product'] == product].copy()
459
+
460
+ # Konversi durasi segmen ke menit untuk keterbacaan
461
+ product_segment_details['Duration_Minutes'] = round(product_segment_details['Duration'].dt.total_seconds() / 60, 2)
462
+
463
+ print(product_segment_details[[
464
+ 'Start_Time',
465
+ 'End_Time',
466
+ 'Duration_Minutes',
467
+ 'Data_Points'
468
+ ]].to_string(index=False))
469
+
470
+ # ============= STEP 8: PEMBUATAN KOLOM FIXED_ROUNDED_TIME =============
471
+ def create_fixed_rounded_time(self):
472
+ """
473
+ Step 8: Buat kolom fixed_rounded_time jika belum ada
474
+ """
475
+ print("\n" + "="*80)
476
+ print("STEP 8: PEMBUATAN KOLOM FIXED_ROUNDED_TIME")
477
+ print("="*80)
478
+
479
+ if 'fixed_rounded_time' not in self.df.columns:
480
+ print("Membuat kolom fixed_rounded_time...")
481
+ self.df['Date_time'] = pd.to_datetime(self.df['Date_time'])
482
+ self.df['fixed_rounded_time'] = (self.df['Date_time'] + pd.Timedelta(hours=1)).dt.floor('H')
483
+
484
+ print("Sample hasil:")
485
+ print(self.df[['Date_time', 'fixed_rounded_time']].head(10))
486
+
487
+ # Update product dataframes
488
+ for product in self.product_dataframes.keys():
489
+ self.product_dataframes[product] = self.df[self.df['Product'] == product].copy()
490
+ else:
491
+ print("Kolom fixed_rounded_time sudah ada")
492
+
493
+ # ============= STEP 9: PERHITUNGAN JUMLAH MENIT =============
494
+ def calculate_minutes_per_hour(self):
495
+ """
496
+ Step 9: Hitung jumlah data per jam berdasarkan fixed_rounded_time
497
+ """
498
+ print("\n" + "="*80)
499
+ print("STEP 9: PERHITUNGAN JUMLAH DATA PER JAM")
500
+ print("="*80)
501
+
502
+ if 'fixed_rounded_time' not in self.df.columns:
503
+ print("ERROR: Kolom fixed_rounded_time tidak ditemukan!")
504
+ return
505
+
506
+ # Hitung jumlah data per jam
507
+ jumlah_data_per_jam = self.df.groupby('fixed_rounded_time').size()
508
+ jumlah_data_per_jam_df = jumlah_data_per_jam.reset_index(name='Jumlah Data Per Jam')
509
+
510
+ # Filter data dengan jumlah kurang dari 60
511
+ jumlah_data_kurang_60 = jumlah_data_per_jam_df[jumlah_data_per_jam_df['Jumlah Data Per Jam'] <= 60]
512
+
513
+ # Urutkan dari jumlah terkecil ke terbesar
514
+ jumlah_data_kurang_60 = jumlah_data_kurang_60.sort_values(by='Jumlah Data Per Jam', ascending=True)
515
+
516
+ print(f"Jam dengan data < 60 menit ({len(jumlah_data_kurang_60)} jam):")
517
+ pd.set_option("display.max_rows", None)
518
+ print(jumlah_data_kurang_60.to_string())
519
+ pd.set_option("display.max_rows", 10)
520
+
521
+ # ============= STEP 10: VISUALISASI DATA =============
522
+ def create_line_plots(self, show_all_products=True, show_overall=True):
523
+ """
524
+ Step 10: Buat line plot untuk visualisasi data
525
+ """
526
+ print("\n" + "="*80)
527
+ print("STEP 10: VISUALISASI DATA (LINE PLOTS)")
528
+ print("="*80)
529
+
530
+ numeric_columns = self.df.select_dtypes(include=[np.number]).columns
531
+ numeric_columns = [col for col in numeric_columns if col not in ['Drier_On_Product']]
532
+
533
+ # Plot untuk keseluruhan data
534
+ if show_overall:
535
+ print("\nMembuat plot untuk keseluruhan produk...")
536
+ df_plot = self.df[self.df['Drier_On_Product'] == 1].copy()
537
+
538
+ if len(df_plot) > 0:
539
+ df_plot = df_plot.sort_values('Date_time')
540
+
541
+ for column in numeric_columns:
542
+ if column in df_plot.columns:
543
+ plt.figure(figsize=(30, 5))
544
+ plt.plot(df_plot['Date_time'], df_plot[column], marker='o', markersize=2, label=column)
545
+ plt.title(f'Line Plot of {column} Over Time - All Products', fontsize=14)
546
+ plt.xlabel('Date_time')
547
+ plt.ylabel(column)
548
+ plt.xticks(rotation=45)
549
+ plt.legend()
550
+ plt.grid(True, alpha=0.3)
551
+ plt.tight_layout()
552
+ plt.show()
553
+
554
+ # Plot untuk setiap produk
555
+ if show_all_products:
556
+ for product, df_product in self.product_dataframes.items():
557
+ if product not in ['CIP', 'CIP CHAMBER']:
558
+ print(f"\nMembuat plot untuk produk: {product}")
559
+ df_plot = df_product[df_product['Drier_On_Product'] == 1].copy()
560
+
561
+ if len(df_plot) > 0:
562
+ df_plot = df_plot.sort_values('Date_time')
563
+
564
+ for column in numeric_columns:
565
+ if column in df_plot.columns:
566
+ plt.figure(figsize=(20, 4))
567
+ plt.plot(df_plot['Date_time'], df_plot[column], marker='o', markersize=3, label=column)
568
+ plt.title(f'{product} - {column} Over Time', fontsize=12)
569
+ plt.xlabel('Date_time')
570
+ plt.ylabel(column)
571
+ plt.xticks(rotation=45)
572
+ plt.legend()
573
+ plt.grid(True, alpha=0.3)
574
+ plt.tight_layout()
575
+ plt.show()
576
+
577
+ # ============= STEP 11: IDENTIFIKASI OUTLIERS =============
578
+ def identify_outliers(self, show_plots=True):
579
+ """
580
+ Step 11: Identifikasi outliers menggunakan metode IQR
581
+ """
582
+ print("\n" + "="*80)
583
+ print("STEP 11: IDENTIFIKASI OUTLIERS")
584
+ print("="*80)
585
+
586
+ def analyze_outliers(dataframe, product_name="Overall"):
587
+ """Analisis outliers untuk dataframe tertentu"""
588
+
589
+ df_copy = dataframe.copy()
590
+ df_copy['Date_time'] = pd.to_datetime(df_copy['Date_time'])
591
+ drier_on_data = df_copy[df_copy['Drier_On_Product'] == 1].copy()
592
+
593
+ if drier_on_data.empty:
594
+ print(f"Tidak ada data dengan Drier_On_Product == 1 untuk {product_name}")
595
+ return None, None
596
+
597
+ print(f"\n{'='*60}")
598
+ print(f"Analisis Outliers - {product_name}")
599
+ print(f"Total data yang dianalisis: {len(drier_on_data)} baris")
600
+ print(f"{'='*60}")
601
+
602
+ all_stats_data = []
603
+ list_of_outliers = []
604
+
605
+ numeric_columns = drier_on_data.select_dtypes(include=np.number).columns.drop('Drier_On_Product', errors='ignore')
606
+
607
+ for column in numeric_columns:
608
+ if column in drier_on_data.columns:
609
+ param_data = drier_on_data[column].dropna()
610
+
611
+ if len(param_data) > 0:
612
+ Q1 = param_data.quantile(0.25)
613
+ Q3 = param_data.quantile(0.75)
614
+ IQR = Q3 - Q1
615
+ lower_bound = Q1 - 1.5 * IQR
616
+ upper_bound = Q3 + 1.5 * IQR
617
+
618
+ outliers = param_data[(param_data < lower_bound) | (param_data > upper_bound)]
619
+ has_outliers = not outliers.empty
620
+
621
+ mean_val = param_data.mean()
622
+ median_val = param_data.median()
623
+ std_val = param_data.std()
624
+ chosen_val = median_val if has_outliers else mean_val
625
+
626
+ all_stats_data.append({
627
+ 'Parameter': column,
628
+ 'Mean': mean_val,
629
+ 'Median': median_val,
630
+ 'Std_Dev': std_val,
631
+ 'Batas_Bawah': lower_bound,
632
+ 'Batas_Atas': upper_bound,
633
+ 'Has_Outliers': has_outliers,
634
+ 'Outliers_Count': len(outliers),
635
+ 'Chosen_Value': chosen_val
636
+ })
637
+
638
+ if has_outliers:
639
+ outlier_mask = (drier_on_data[column] < lower_bound) | (drier_on_data[column] > upper_bound)
640
+ outlier_rows = drier_on_data[outlier_mask]
641
+
642
+ for index, row in outlier_rows.iterrows():
643
+ list_of_outliers.append({
644
+ 'Tanggal dan Jam': row['Date_time'],
645
+ 'Kolom Outliers': column,
646
+ 'Nilai Outliers': row[column],
647
+ 'Produk': row['Product'] if 'Product' in row else product_name
648
+ })
649
+
650
+ # Visualisasi jika diminta
651
+ if show_plots and has_outliers:
652
+ fig, axes = plt.subplots(1, 2, figsize=(18, 5))
653
+
654
+ # Histogram
655
+ axes[0].hist(param_data, bins=30, edgecolor='black', alpha=0.7)
656
+ axes[0].axvline(mean_val, color='green', linestyle='--', label=f'Mean: {mean_val:.2f}')
657
+ axes[0].axvline(median_val, color='red', linestyle='--', label=f'Median: {median_val:.2f}')
658
+ axes[0].set_title(f'Distribution - {column}')
659
+ axes[0].legend()
660
+
661
+ # Time series with outliers
662
+ axes[1].plot(range(len(param_data)), param_data.values, 'b-', alpha=0.5)
663
+ axes[1].axhline(upper_bound, color='purple', linestyle='--', label=f'Upper: {upper_bound:.2f}')
664
+ axes[1].axhline(lower_bound, color='orange', linestyle='--', label=f'Lower: {lower_bound:.2f}')
665
+
666
+ if has_outliers:
667
+ outlier_indices = []
668
+ outlier_values = []
669
+ for i, (idx, val) in enumerate(param_data.items()):
670
+ if val < lower_bound or val > upper_bound:
671
+ outlier_indices.append(i)
672
+ outlier_values.append(val)
673
+ axes[1].scatter(outlier_indices, outlier_values, color='red', s=50, zorder=5, label='Outliers')
674
+
675
+ axes[1].set_title(f'Time Series - {column}')
676
+ axes[1].legend()
677
+
678
+ plt.suptitle(f'{product_name}: {column}', fontsize=14)
679
+ plt.tight_layout()
680
+ plt.show()
681
+
682
+ result_df = pd.DataFrame(all_stats_data) if all_stats_data else None
683
+ outliers_df = pd.DataFrame(list_of_outliers) if list_of_outliers else None
684
+
685
+ if outliers_df is not None and not outliers_df.empty:
686
+ outliers_df = outliers_df.sort_values(by='Tanggal dan Jam').reset_index(drop=True)
687
+
688
+ return result_df, outliers_df
689
+
690
+ # Analisis untuk keseluruhan data
691
+ print("\n" + "="*70)
692
+ print("ANALISIS OUTLIERS - KESELURUHAN DATA")
693
+ print("="*70)
694
+ overall_stats, overall_outliers = analyze_outliers(self.df, "OVERALL")
695
+
696
+ if overall_stats is not None:
697
+ print("\nRingkasan Statistik - Keseluruhan:")
698
+ print(overall_stats.to_string())
699
+
700
+ if overall_outliers is not None and not overall_outliers.empty:
701
+ print(f"\nTotal Outliers Keseluruhan: {len(overall_outliers)}")
702
+ print("\nSample Outliers (10 pertama):")
703
+ print(overall_outliers.head(10).to_string())
704
+
705
+ # Analisis untuk setiap produk
706
+ for product, df_product in self.product_dataframes.items():
707
+ if product not in ['CIP', 'CIP CHAMBER']:
708
+ stats, outliers = analyze_outliers(df_product, product)
709
+
710
+ if stats is not None:
711
+ print(f"\n{'='*50}")
712
+ print(f"Ringkasan Statistik - {product}:")
713
+ print(stats.to_string())
714
+
715
+ if outliers is not None and not outliers.empty:
716
+ print(f"\nTotal Outliers {product}: {len(outliers)}")
717
+
718
+ # ============= MAIN PIPELINE EXECUTION =============
719
+ def run_full_pipeline(self, show_visualizations=True):
720
+ """
721
+ Menjalankan seluruh pipeline EDA
722
+ """
723
+ print("\n" + "="*80)
724
+ print(" " * 20 + "SPRAY DRYER EDA PIPELINE")
725
+ print(" " * 25 + "STARTING ANALYSIS")
726
+ print("="*80)
727
+
728
+ try:
729
+ # Step 1: Pemeriksaan kolom
730
+ self.check_and_fix_columns()
731
+
732
+ # Step 2: Validasi nama produk
733
+ self.validate_product_names()
734
+
735
+ # Step 3: Pemisahan data per produk
736
+ self.separate_data_by_product()
737
+
738
+ # Step 4: Identifikasi anomali
739
+ self.identify_anomalies()
740
+
741
+ # Step 5: Validasi Drier_On_Product
742
+ self.validate_drier_on_product()
743
+
744
+ # Step 6: Cek missing values dan duplikasi
745
+ self.check_missing_and_duplicates()
746
+
747
+ # Step 7: Hitung durasi produksi
748
+ self.calculate_production_duration()
749
+
750
+ # Step 8: Buat kolom fixed_rounded_time
751
+ self.create_fixed_rounded_time()
752
+
753
+ # Step 9: Hitung jumlah menit per jam
754
+ self.calculate_minutes_per_hour()
755
+
756
+ # Step 10: Visualisasi (optional)
757
+ if show_visualizations:
758
+ self.create_line_plots(show_all_products=False, show_overall=True)
759
+
760
+ # Step 11: Identifikasi outliers
761
+ self.identify_outliers(show_plots=show_visualizations)
762
+
763
+ print("\n" + "="*80)
764
+ print(" " * 25 + "PIPELINE COMPLETED SUCCESSFULLY")
765
+ print("="*80)
766
+
767
+ return self.df, self.product_dataframes
768
+
769
+ except Exception as e:
770
+ print(f"\nERROR dalam pipeline: {str(e)}")
771
+ raise
772
+
773
+ def get_summary(self):
774
+ """
775
+ Mendapatkan ringkasan hasil analisis
776
+ """
777
+ summary = {
778
+ 'total_rows': len(self.df),
779
+ 'total_columns': len(self.df.columns),
780
+ 'unique_products': self.df['Product'].nunique(),
781
+ 'date_range': {
782
+ 'start': self.df['Date_time'].min(),
783
+ 'end': self.df['Date_time'].max()
784
+ },
785
+ 'missing_values': self.df.isnull().sum().to_dict(),
786
+ 'product_counts': self.df['Product'].value_counts().to_dict()
787
+ }
788
+
789
+ print("\n" + "="*50)
790
+ print("📊 DATA SUMMARY")
791
+ print("="*50)
792
+ print(f"{'Total rows':20}: {summary['total_rows']:,}")
793
+ print(f"{'Total columns':20}: {summary['total_columns']}")
794
+ print(f"{'Unique products':20}: {summary['unique_products']}")
795
+ print(f"{'Date range':20}: {summary['date_range']['start']} → {summary['date_range']['end']}")
796
+
797
+ print("\n🔍 Missing values per column")
798
+ print("-"*50)
799
+ for col, val in summary['missing_values'].items():
800
+ print(f"{col:25} : {val}")
801
+
802
+ print("\n📦 Product counts")
803
+ print("-"*50)
804
+ for prod, count in summary['product_counts'].items():
805
+ print(f"{prod:25} : {count:,}")
806
+
807
+ return summary
808
+
809
+ # ======================================================================
810
+ # HELPER FUNCTIONS UNTUK DASHBOARD STREAMLIT (EDA)
811
+ # ======================================================================
812
+
813
+ def compute_eda_summary(df: pd.DataFrame,
814
+ date_col: str = "Date_time",
815
+ product_col: str = "Product") -> dict:
816
+ """Ringkasan umum dataset untuk metric/cards di dashboard."""
817
+ df = df.copy()
818
+
819
+ # Tanggal
820
+ if date_col in df.columns:
821
+ df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
822
+ date_min = df[date_col].min()
823
+ date_max = df[date_col].max()
824
+ else:
825
+ date_min, date_max = pd.NaT, pd.NaT
826
+
827
+ total_rows = len(df)
828
+ total_columns = df.shape[1]
829
+ total_missing = int(df.isna().sum().sum())
830
+ duplicate_rows = int(df.duplicated().sum())
831
+
832
+ if product_col in df.columns:
833
+ product_counts = df[product_col].value_counts().to_dict()
834
+ unique_products = int(df[product_col].nunique())
835
+ else:
836
+ product_counts = {}
837
+ unique_products = 0
838
+
839
+ summary = {
840
+ "total_rows": total_rows,
841
+ "total_columns": total_columns,
842
+ "date_min": date_min,
843
+ "date_max": date_max,
844
+ "total_missing": total_missing,
845
+ "duplicate_rows": duplicate_rows,
846
+ "unique_products": unique_products,
847
+ "product_counts": product_counts,
848
+ }
849
+ return summary
850
+
851
+
852
+ def compute_anomaly_table(df: pd.DataFrame,
853
+ product_col: str = "Product") -> pd.DataFrame:
854
+ """
855
+ Hitung anomali berbasis rule teknis (error validasi),
856
+ dengan struktur kolom: Product, Column, Anomaly, Count.
857
+ """
858
+ if product_col not in df.columns:
859
+ return pd.DataFrame(columns=["Product", "Column", "Anomaly", "Count"])
860
+
861
+ anomaly_rules = {
862
+ 'D101330TT': {'min': 20, 'max': 130, 'zero_anomaly': True},
863
+ 'D102265TIC_PV': {'min': 20, 'zero_anomaly': True},
864
+ 'D102265TIC_CV': {'zero_allowed_products': ['CIP', 'CIP CHAMBER']},
865
+ 'D102266TIC': {'zero_anomaly': True}
866
+ }
867
+
868
+ anomalies = []
869
+
870
+ for product, df_product in df.groupby(product_col):
871
+ for column, rules in anomaly_rules.items():
872
+ if column not in df_product.columns:
873
+ continue
874
+
875
+ # Nilai 0 yang dianggap anomali
876
+ if rules.get("zero_anomaly", False):
877
+ zero_count = (df_product[column] == 0).sum()
878
+ if zero_count > 0:
879
+ anomalies.append({
880
+ "Product": product,
881
+ "Column": column,
882
+ "Anomaly": "Nilai 0",
883
+ "Count": int(zero_count),
884
+ })
885
+
886
+ # Nilai 0 untuk kolom yang hanya boleh 0 di produk tertentu
887
+ if "zero_allowed_products" in rules:
888
+ if product not in rules["zero_allowed_products"]:
889
+ zero_count = (df_product[column] == 0).sum()
890
+ if zero_count > 0:
891
+ anomalies.append({
892
+ "Product": product,
893
+ "Column": column,
894
+ "Anomaly": "Nilai 0 (tidak diizinkan untuk produk ini)",
895
+ "Count": int(zero_count),
896
+ })
897
+
898
+ # Nilai < min
899
+ if "min" in rules:
900
+ below_min = (df_product[column] < rules["min"]).sum()
901
+ if below_min > 0:
902
+ anomalies.append({
903
+ "Product": product,
904
+ "Column": column,
905
+ "Anomaly": f"Nilai < {rules['min']}",
906
+ "Count": int(below_min),
907
+ })
908
+
909
+ # Nilai > max
910
+ if "max" in rules:
911
+ above_max = (df_product[column] > rules["max"]).sum()
912
+ if above_max > 0:
913
+ anomalies.append({
914
+ "Product": product,
915
+ "Column": column,
916
+ "Anomaly": f"Nilai > {rules['max']}",
917
+ "Count": int(above_max),
918
+ })
919
+
920
+ if not anomalies:
921
+ return pd.DataFrame(columns=["Product", "Column", "Anomaly", "Count"])
922
+
923
+ anomaly_df = pd.DataFrame(anomalies)
924
+ anomaly_df = anomaly_df.groupby(
925
+ ["Product", "Column", "Anomaly"], as_index=False
926
+ )["Count"].sum()
927
+
928
+ return anomaly_df
929
+
930
+
931
+ def compute_production_segments(df: pd.DataFrame,
932
+ product_col: str = "Product",
933
+ time_col: str = "Date_time") -> pd.DataFrame:
934
+ """
935
+ Hitung segmen produksi kontinu per produk.
936
+
937
+ Output kolom:
938
+ Product | Start_Time | End_Time | Duration_Minutes | Data_Points
939
+ """
940
+ if product_col not in df.columns or time_col not in df.columns:
941
+ return pd.DataFrame(columns=["Product", "Start_Time", "End_Time", "Duration_Minutes", "Data_Points"])
942
+
943
+ df_seg = df[[product_col, time_col]].copy()
944
+ df_seg[time_col] = pd.to_datetime(df_seg[time_col], errors="coerce")
945
+ df_seg = df_seg.dropna(subset=[time_col]).sort_values(time_col)
946
+
947
+ # Segment id: berubah setiap kali Product berubah
948
+ df_seg["segment_id"] = (df_seg[product_col] != df_seg[product_col].shift()).cumsum()
949
+
950
+ grouped = df_seg.groupby([product_col, "segment_id"]).agg(
951
+ Start_Time=(time_col, "min"),
952
+ End_Time=(time_col, "max"),
953
+ Data_Points=(time_col, "count"),
954
+ ).reset_index()
955
+
956
+ grouped["Duration"] = grouped["End_Time"] - grouped["Start_Time"]
957
+ # Filter non-CIP jika perlu (optional)
958
+ grouped = grouped[~grouped[product_col].isin(["CIP", "CIP CHAMBER"])]
959
+
960
+ if grouped.empty:
961
+ return pd.DataFrame(columns=["Product", "Start_Time", "End_Time", "Duration_Minutes", "Data_Points"])
962
+
963
+ grouped["Duration_Minutes"] = grouped["Duration"].dt.total_seconds() / 60.0
964
+
965
+ result = grouped[[product_col, "Start_Time", "End_Time", "Duration_Minutes", "Data_Points"]].copy()
966
+ result.rename(columns={product_col: "Product"}, inplace=True)
967
+
968
+ return result
969
+
970
+
971
+ def create_line_plots(df: pd.DataFrame,
972
+ params: list,
973
+ product_label: str = "All Data",
974
+ time_col: str = "Date_time"):
975
+ """
976
+ Membuat 6 plot distribusi parameter proses (2x3 grid) vs waktu.
977
+ Mengembalikan satu Figure matplotlib untuk ditampilkan di Streamlit.
978
+ """
979
+ df_plot = df.copy()
980
+ if time_col in df_plot.columns:
981
+ df_plot[time_col] = pd.to_datetime(df_plot[time_col], errors="coerce")
982
+ df_plot = df_plot.dropna(subset=[time_col]).sort_values(time_col)
983
+
984
+ # Siapkan figure 2x3
985
+ fig, axes = plt.subplots(2, 3, figsize=(18, 8), sharex=True)
986
+ axes = axes.flatten()
987
+
988
+ for i, param in enumerate(params):
989
+ ax = axes[i]
990
+ if param in df_plot.columns:
991
+ ax.plot(df_plot[time_col], df_plot[param], marker=".", linewidth=0.7)
992
+ ax.set_title(param)
993
+ ax.grid(True, alpha=0.3)
994
+ else:
995
+ ax.set_title(f"{param} (not found)")
996
+ ax.axis("off")
997
+
998
+ # Jika params < 6, matikan axis kosong
999
+ for j in range(len(params), 6):
1000
+ axes[j].axis("off")
1001
+
1002
+ fig.suptitle(f"Distribusi Parameter Proses – {product_label}", fontsize=14)
1003
+ fig.tight_layout(rect=[0, 0.03, 1, 0.95])
1004
+ return fig
1005
+
1006
+
1007
+ def identify_outliers(df: pd.DataFrame,
1008
+ params: list,
1009
+ product_label: str = "All Data",
1010
+ time_col: str = "Date_time"):
1011
+ """
1012
+ Deteksi outlier dengan metode IQR untuk setiap parameter dalam `params`.
1013
+ Mengembalikan:
1014
+ - fig_out : Figure 2x3 dengan plot time series + highlight outlier
1015
+ - total_outliers: total jumlah outlier semua parameter
1016
+ - outlier_stats_df: tabel ringkasan per parameter
1017
+ """
1018
+ df_proc = df.copy()
1019
+ if time_col in df_proc.columns:
1020
+ df_proc[time_col] = pd.to_datetime(df_proc[time_col], errors="coerce")
1021
+ df_proc = df_proc.dropna(subset=[time_col]).sort_values(time_col)
1022
+
1023
+ stats_rows = []
1024
+ total_outliers = 0
1025
+
1026
+ fig, axes = plt.subplots(2, 3, figsize=(18, 8), sharex=True)
1027
+ axes = axes.flatten()
1028
+
1029
+ for i, param in enumerate(params):
1030
+ ax = axes[i]
1031
+ if param not in df_proc.columns:
1032
+ ax.set_title(f"{param} (not found)")
1033
+ ax.axis("off")
1034
+ continue
1035
+
1036
+ series = df_proc[param].astype(float)
1037
+ series_no_na = series.dropna()
1038
+
1039
+ if series_no_na.empty:
1040
+ ax.set_title(f"{param} (no data)")
1041
+ ax.axis("off")
1042
+ continue
1043
+
1044
+ Q1 = series_no_na.quantile(0.25)
1045
+ Q3 = series_no_na.quantile(0.75)
1046
+ IQR = Q3 - Q1
1047
+ lower = Q1 - 1.5 * IQR
1048
+ upper = Q3 + 1.5 * IQR
1049
+
1050
+ outlier_mask = (series < lower) | (series > upper)
1051
+ outlier_idx = df_proc.index[outlier_mask]
1052
+ outlier_vals = series[outlier_mask]
1053
+
1054
+ count_out = int(outlier_mask.sum())
1055
+ total_outliers += count_out
1056
+
1057
+ stats_rows.append({
1058
+ "Parameter": param,
1059
+ "Q1": Q1,
1060
+ "Q3": Q3,
1061
+ "IQR": IQR,
1062
+ "Lower_Bound": lower,
1063
+ "Upper_Bound": upper,
1064
+ "Outliers_Count": count_out,
1065
+ })
1066
+
1067
+ # Plot time series + highlight outliers
1068
+ ax.plot(df_proc[time_col], series, linewidth=0.7)
1069
+ if count_out > 0:
1070
+ ax.scatter(df_proc.loc[outlier_idx, time_col], outlier_vals, s=15)
1071
+ ax.axhline(lower, linestyle="--")
1072
+ ax.axhline(upper, linestyle="--")
1073
+ ax.set_title(f"{param} (outliers: {count_out})")
1074
+ ax.grid(True, alpha=0.3)
1075
+
1076
+ # Matikan axis kosong jika params < 6
1077
+ for j in range(len(params), 6):
1078
+ axes[j].axis("off")
1079
+
1080
+ fig.suptitle(f"Outlier Detection – {product_label}", fontsize=14)
1081
+ fig.tight_layout(rect=[0, 0.03, 1, 0.95])
1082
+
1083
+ outlier_stats_df = pd.DataFrame(stats_rows)
1084
+ return fig, total_outliers, outlier_stats_df
1085
+
1086
+
1087
+ def compute_stats_table(df: pd.DataFrame,
1088
+ params: list,
1089
+ target_col: str = None) -> pd.DataFrame:
1090
+ """
1091
+ Tabel statistik deskriptif untuk parameter proses + kolom target gas (jika ada).
1092
+ """
1093
+ cols = [c for c in params if c in df.columns]
1094
+ if target_col and target_col in df.columns:
1095
+ cols.append(target_col)
1096
+
1097
+ if not cols:
1098
+ return pd.DataFrame()
1099
+
1100
+ desc = df[cols].describe().T # index = parameter
1101
+ return desc
1102
+
1103
+
1104
+ # ============= CARA PENGGUNAAN =============
1105
+ if __name__ == "__main__":
1106
+ df = pd.read_csv(r"C:\Dokumen\One To Many_17_10_2025\MMBTU\DASHBOARD\One To Many\disagregasi_data_spraydryer_terbaru_10_17_2025.csv")
1107
+ pipeline = SprayDryerEDAPipeline(dataframe=df)
1108
+ processed_df, product_dfs = pipeline.run_full_pipeline(show_visualizations=True)
1109
+ processed_df.to_csv(r"Processed Data Pipeline EDA_21_10_2025.csv", index=False)
1110
+ summary = pipeline.get_summary()
1111
+ print(summary)
filter_rule_engine.py ADDED
@@ -0,0 +1,361 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+
4
+ # =========================
5
+ # RULE 1 (detect_anomaly_new_rules)
6
+ # =========================
7
+ def _detect_anomaly_rule_1(df: pd.DataFrame) -> pd.Series:
8
+ """
9
+ LOGIKA ASLI (tidak diubah)
10
+ """
11
+ NOZZLE_PRESSURE = 'D101463PIC_PV' # Tekanan nozzle
12
+ TIC_PV = 'D102265TIC_PV' # Inlet Temperature
13
+ TOC = 'D101330TT' # Outlet Temperature
14
+ HP_CV = 'D102260TIC_CV' # Steam Damper CV (High Pressure)
15
+ LP_CV = 'D102265TIC_CV' # Steam Damper CV (Low Pressure)
16
+
17
+ required_cols = [NOZZLE_PRESSURE, TIC_PV, TOC, HP_CV, LP_CV, 'Date_time']
18
+ for col in required_cols:
19
+ if col not in df.columns:
20
+ raise ValueError(f"❌ Kolom yang dibutuhkan '{col}' tidak ada di DataFrame.")
21
+
22
+ tmp = df.copy()
23
+ tmp['Date_time'] = pd.to_datetime(tmp['Date_time'])
24
+ tmp = tmp.sort_values('Date_time').reset_index(drop=True)
25
+
26
+ # 1 menit sebelumnya
27
+ rename_prev = {c: f"prev_{c}" for c in [TIC_PV, TOC, HP_CV, LP_CV]}
28
+ df_prev = tmp[['Date_time'] + list(rename_prev.keys())].rename(columns=rename_prev)
29
+ df_prev['Date_time'] = df_prev['Date_time'] + pd.Timedelta(minutes=1)
30
+
31
+ tmp = tmp.merge(df_prev, on='Date_time', how='left')
32
+
33
+ # Delta
34
+ tmp['delta_TIC'] = tmp[TIC_PV] - tmp[f'prev_{TIC_PV}']
35
+ tmp['delta_TOC'] = tmp[TOC] - tmp[f'prev_{TOC}']
36
+ tmp['delta_HP'] = tmp[HP_CV] - tmp[f'prev_{HP_CV}']
37
+ tmp['delta_LP'] = tmp[LP_CV] - tmp[f'prev_{LP_CV}']
38
+
39
+ # Kondisi dasar: mesin running
40
+ is_running = tmp[NOZZLE_PRESSURE] >= 135
41
+
42
+ # Case A: Inlet TURUN
43
+ is_tic_delta_eq_neg_3 = (tmp['delta_TIC'] == -3)
44
+ is_toc_up = (tmp['delta_TOC'] > 0)
45
+ anomaly_case_A1 = is_running & is_tic_delta_eq_neg_3 & is_toc_up
46
+
47
+ is_tic_delta_lt_neg_3 = (tmp['delta_TIC'] < -3)
48
+ is_damper_up = (tmp['delta_HP'] > 0) | (tmp['delta_LP'] > 0)
49
+ anomaly_case_A2 = is_running & is_tic_delta_lt_neg_3 & is_damper_up
50
+
51
+ # Case B: Inlet NAIK
52
+ is_tic_delta_eq_pos_3 = (tmp['delta_TIC'] == 3)
53
+ is_toc_down = (tmp['delta_TOC'] < 0)
54
+ anomaly_case_B1 = is_running & is_tic_delta_eq_pos_3 & is_toc_down
55
+
56
+ is_tic_delta_gt_pos_3 = (tmp['delta_TIC'] > 3)
57
+ is_damper_down = (tmp['delta_HP'] < 0) | (tmp['delta_LP'] < 0)
58
+ anomaly_case_B2 = is_running & is_tic_delta_gt_pos_3 & is_damper_down
59
+
60
+ anomaly_flags = anomaly_case_A1 | anomaly_case_A2 | anomaly_case_B1 | anomaly_case_B2
61
+ anomaly_flags.name = "anomaly_flag"
62
+
63
+ print(f"✅ Jumlah anomali (Rule 1 - new_rules) terdeteksi: {anomaly_flags.sum()} dari {len(tmp)} baris data")
64
+ return anomaly_flags
65
+
66
+
67
+ # =========================
68
+ # RULE 2 (detect_anomaly_rule_2_revised)
69
+ # =========================
70
+ def _detect_anomaly_rule_2(df: pd.DataFrame) -> pd.Series:
71
+ """
72
+ LOGIKA ASLI (tidak diubah)
73
+ """
74
+ NOZZLE_PRESSURE = 'D101463PIC_PV'
75
+ FF = 'D101264FTSCL' # Flow Feed
76
+ TOC = 'D101330TT' # Outlet Temperature
77
+
78
+ required_cols = [NOZZLE_PRESSURE, FF, TOC, 'Date_time']
79
+ for col in required_cols:
80
+ if col not in df.columns:
81
+ raise ValueError(f"❌ Kolom '{col}' tidak ada di DataFrame.")
82
+
83
+ tmp = df.copy()
84
+ tmp['Date_time'] = pd.to_datetime(tmp['Date_time'])
85
+ tmp = tmp.sort_values('Date_time').reset_index(drop=True)
86
+
87
+ # 1 menit sebelumnya
88
+ rename_prev = {c: f'prev_{c}' for c in [FF, TOC]}
89
+ df_prev = tmp[['Date_time'] + list(rename_prev.keys())].rename(columns=rename_prev)
90
+ df_prev['Date_time'] = df_prev['Date_time'] + pd.Timedelta(minutes=1)
91
+ tmp = tmp.merge(df_prev, on='Date_time', how='left')
92
+
93
+ # Delta
94
+ tmp['delta_FF'] = tmp[FF] - tmp[f'prev_{FF}']
95
+ tmp['delta_TOC'] = tmp[TOC] - tmp[f'prev_{TOC}']
96
+
97
+ # Kondisi dasar: mesin running
98
+ is_running = tmp[NOZZLE_PRESSURE] >= 135
99
+
100
+ # Kondisi 1: FF turun & TOC turun signifikan
101
+ is_ff_down = tmp['delta_FF'] < 0
102
+ is_toc_drop_significant = tmp['delta_TOC'] <= -1.0
103
+ anomaly_case_1 = is_running & is_ff_down & is_toc_drop_significant
104
+
105
+ # Kondisi 2: FF naik & TOC naik 1 s/d 12.59
106
+ is_ff_up = tmp['delta_FF'] > 0
107
+ is_toc_rise_in_range = (tmp['delta_TOC'] >= 1.0) & (tmp['delta_TOC'] <= 12.59)
108
+ anomaly_case_2 = is_running & is_ff_up & is_toc_rise_in_range
109
+
110
+ anomaly_flags = anomaly_case_1 | anomaly_case_2
111
+ anomaly_flags.name = "anomaly_flag_2"
112
+
113
+ print(f"✅ Jumlah anomali (Rule 2 Rev) terdeteksi: {anomaly_flags.sum()} dari {len(tmp)} baris data")
114
+ return anomaly_flags
115
+
116
+
117
+ # =========================
118
+ # RULE 3 (detect_anomaly_rule_3_revised)
119
+ # =========================
120
+ def _detect_anomaly_rule_3(df: pd.DataFrame) -> pd.Series:
121
+ """
122
+ LOGIKA ASLI (tidak diubah)
123
+ """
124
+ NOZZLE_PRESSURE = 'D101463PIC_PV'
125
+ FF = 'D101264FTSCL'
126
+ TIC_PV = 'D102265TIC_PV'
127
+ GAS_MMBTU = 'GAS_MMBTU_Disaggregated'
128
+
129
+ required_cols = [NOZZLE_PRESSURE, FF, TIC_PV, GAS_MMBTU, 'Date_time']
130
+ for col in required_cols:
131
+ if col not in df.columns:
132
+ raise ValueError(f"❌ Kolom '{col}' tidak ada di DataFrame.")
133
+
134
+ tmp = df.copy()
135
+ tmp['Date_time'] = pd.to_datetime(tmp['Date_time'])
136
+ tmp = tmp.sort_values('Date_time').reset_index(drop=True)
137
+
138
+ # 1 menit sebelumnya
139
+ rename_prev = {c: f'prev_{c}' for c in [FF, TIC_PV, GAS_MMBTU]}
140
+ df_prev = tmp[['Date_time'] + list(rename_prev.keys())].rename(columns=rename_prev)
141
+ df_prev['Date_time'] = df_prev['Date_time'] + pd.Timedelta(minutes=1)
142
+ tmp = tmp.merge(df_prev, on='Date_time', how='left')
143
+
144
+ # Delta
145
+ tmp['delta_FF'] = tmp[FF] - tmp[f'prev_{FF}']
146
+ tmp['delta_TIC'] = tmp[TIC_PV] - tmp[f'prev_{TIC_PV}']
147
+ tmp['delta_MMBTU'] = tmp[GAS_MMBTU] - tmp[f'prev_{GAS_MMBTU}']
148
+
149
+ # Kondisi dasar: mesin running
150
+ is_running = tmp[NOZZLE_PRESSURE] >= 135
151
+
152
+ # Rule 1: FF naik, TIC naik, tapi GAS turun
153
+ is_ff_up = tmp['delta_FF'] > 0
154
+ is_tic_up = tmp['delta_TIC'] > 0
155
+ is_mmbtu_down = tmp['delta_MMBTU'] < 0
156
+ anomaly_case_1 = is_running & is_ff_up & is_tic_up & is_mmbtu_down
157
+
158
+ # Rule 2: FF turun, TIC turun, tapi GAS naik
159
+ is_ff_down = tmp['delta_FF'] < 0
160
+ is_tic_down = tmp['delta_TIC'] < 0
161
+ is_mmbtu_up = tmp['delta_MMBTU'] > 0
162
+ anomaly_case_2 = is_running & is_ff_down & is_tic_down & is_mmbtu_up
163
+
164
+ anomaly_flags = anomaly_case_1 | anomaly_case_2
165
+ anomaly_flags.name = "anomaly_flag_3"
166
+
167
+ print(f"✅ Jumlah anomali (Rule 3 Rev) terdeteksi: {anomaly_flags.sum()} dari {len(tmp)} baris data")
168
+ return anomaly_flags
169
+
170
+
171
+ # =========================
172
+ # RULE 4 (detect_spray_dryer_anomalies)
173
+ # =========================
174
+ def _detect_anomaly_rule_4(df: pd.DataFrame) -> pd.Series:
175
+ """
176
+ LOGIKA ASLI (tidak diubah)
177
+ """
178
+ NOZZLE_PRESSURE = 'D101463PIC_PV'
179
+ HP_CV = 'D102260TIC_CV'
180
+ LP_CV = 'D102265TIC_CV'
181
+ TIC = 'D102265TIC_PV'
182
+ TOC = 'D101330TT'
183
+
184
+ required_cols = [NOZZLE_PRESSURE, HP_CV, LP_CV, TIC, TOC, 'Date_time']
185
+ for col in required_cols:
186
+ if col not in df.columns:
187
+ raise ValueError(f"❌ Kolom '{col}' tidak ada di DataFrame.")
188
+
189
+ tmp = df.copy()
190
+ tmp['Date_time'] = pd.to_datetime(tmp['Date_time'])
191
+ tmp = tmp.sort_values('Date_time').reset_index(drop=True)
192
+
193
+ # 1 menit sebelumnya untuk HP & LP
194
+ rename_prev = {c: f'prev_{c}' for c in [HP_CV, LP_CV]}
195
+ df_prev = tmp[['Date_time'] + list(rename_prev.keys())].rename(columns=rename_prev)
196
+ df_prev['Date_time'] = df_prev['Date_time'] + pd.Timedelta(minutes=1)
197
+ tmp = tmp.merge(df_prev, on='Date_time', how='left')
198
+
199
+ # Delta
200
+ tmp['delta_HP'] = tmp[HP_CV] - tmp[f'prev_{HP_CV}']
201
+ tmp['delta_LP'] = tmp[LP_CV] - tmp[f'prev_{LP_CV}']
202
+
203
+ # Kondisi dasar: mesin running
204
+ is_running = tmp[NOZZLE_PRESSURE] >= 135
205
+
206
+ # Aturan 1: HP naik, LP turun > 1
207
+ is_hp_up = tmp['delta_HP'] > 0
208
+ is_lp_down_significant = tmp['delta_LP'] < -1.0
209
+ anomaly_1 = is_hp_up & is_lp_down_significant
210
+
211
+ # Aturan 2: HP turun, LP naik > 1
212
+ is_hp_down = tmp['delta_HP'] < 0
213
+ is_lp_up_significant = tmp['delta_LP'] > 1.0
214
+ anomaly_2 = is_hp_down & is_lp_up_significant
215
+
216
+ # Aturan 3: TOC > TIC
217
+ anomaly_3 = tmp[TOC] > tmp[TIC]
218
+
219
+ final_anomaly_flags = is_running & (anomaly_1 | anomaly_2 | anomaly_3)
220
+ final_anomaly_flags.name = "anomaly_flag_4"
221
+
222
+ print(f"✅ Jumlah anomali (Rule 4) terdeteksi: {final_anomaly_flags.sum()} dari {len(tmp)} baris data")
223
+ return final_anomaly_flags
224
+
225
+
226
+ # =========================
227
+ # PIPELINE UTAMA
228
+ # =========================
229
+ def apply_rule_engine(df: pd.DataFrame):
230
+ """
231
+ Pipeline terintegrasi + pre-filter CIP/CIP CHAMBER.
232
+
233
+ Returns:
234
+ - df_clean : data yang sudah bersih (tanpa CIP & tanpa anomali rule engine)
235
+ - df_anomalies : semua baris yang dihapus + kolom alasan ('anomaly_reason')
236
+ - summary_report: dict ringkasan (total, persentase, CIP, dst.)
237
+ """
238
+ df_original = df.copy()
239
+ total_rows_initial = len(df_original)
240
+
241
+ # --- 1) Hapus CIP & CIP CHAMBER lebih dulu ---
242
+ if "Product" in df_original.columns:
243
+ cip_mask = df_original["Product"].isin(["CIP", "CIP CHAMBER"])
244
+ else:
245
+ cip_mask = pd.Series(False, index=df_original.index)
246
+
247
+ df_cip = df_original[cip_mask].copy()
248
+ df_non_cip = df_original[~cip_mask].copy()
249
+ cip_removed = int(cip_mask.sum())
250
+
251
+ # Siapkan kolom flag awal untuk CIP
252
+ rule_flag_cols = ["anomaly_flag", "anomaly_flag_2", "anomaly_flag_3", "anomaly_flag_4"]
253
+ if not df_cip.empty:
254
+ for c in rule_flag_cols + ["anomaly_any"]:
255
+ df_cip[c] = False
256
+ df_cip["anomaly_reason"] = "Produk CIP / CIP CHAMBER (dihapus sebelum rule engine)"
257
+
258
+ # --- 2) Jalankan 4 rule di data non-CIP ---
259
+ if len(df_non_cip) > 0:
260
+ rule1_flags = _detect_anomaly_rule_1(df_non_cip)
261
+ rule2_flags = _detect_anomaly_rule_2(df_non_cip)
262
+ rule3_flags = _detect_anomaly_rule_3(df_non_cip)
263
+ rule4_flags = _detect_anomaly_rule_4(df_non_cip)
264
+
265
+ df_with_flags = df_non_cip.copy()
266
+ df_with_flags["anomaly_flag"] = rule1_flags.values
267
+ df_with_flags["anomaly_flag_2"] = rule2_flags.values
268
+ df_with_flags["anomaly_flag_3"] = rule3_flags.values
269
+ df_with_flags["anomaly_flag_4"] = rule4_flags.values
270
+
271
+ df_with_flags["anomaly_any"] = df_with_flags[rule_flag_cols].any(axis=1)
272
+
273
+ df_clean_rules = df_with_flags[~df_with_flags["anomaly_any"]].reset_index(drop=True)
274
+ df_anomaly_rules = df_with_flags[df_with_flags["anomaly_any"]].reset_index(drop=True)
275
+
276
+ # Build alasan tiap baris rule
277
+ def _build_reasons(row):
278
+ reasons = []
279
+ if row["anomaly_flag"]:
280
+ reasons.append("Rule 1 – TIC/TOC vs Damper dynamics")
281
+ if row["anomaly_flag_2"]:
282
+ reasons.append("Rule 2 – Flow Feed vs TOC")
283
+ if row["anomaly_flag_3"]:
284
+ reasons.append("Rule 3 – Flow & TIC vs GAS MMBTU")
285
+ if row["anomaly_flag_4"]:
286
+ reasons.append("Rule 4 – HP/LP damper & TOC>TIC")
287
+ return "; ".join(reasons)
288
+
289
+ if not df_anomaly_rules.empty:
290
+ df_anomaly_rules["anomaly_reason"] = df_anomaly_rules.apply(_build_reasons, axis=1)
291
+ else:
292
+ # Tidak ada data non-CIP
293
+ df_clean_rules = df_non_cip.copy()
294
+ df_anomaly_rules = df_non_cip.iloc[0:0].copy()
295
+ df_with_flags = df_non_cip.copy()
296
+
297
+ # --- 3) Satukan semua baris anomali: CIP + rule engine ---
298
+ if not df_cip.empty and not df_anomaly_rules.empty:
299
+ df_anomalies = pd.concat([df_cip, df_anomaly_rules], ignore_index=True, sort=False)
300
+ elif not df_cip.empty:
301
+ df_anomalies = df_cip.copy()
302
+ else:
303
+ df_anomalies = df_anomaly_rules.copy()
304
+
305
+ # Data bersih final = clean_rules (non-CIP & lolos semua rule)
306
+ df_clean = df_clean_rules.copy()
307
+
308
+ # --- 4) Ringkasan angka ---
309
+ total_rows_after_filter = len(df_clean)
310
+ total_rows_removed = total_rows_initial - total_rows_after_filter
311
+ percent_clean = float(total_rows_after_filter / total_rows_initial * 100) if total_rows_initial > 0 else 0.0
312
+
313
+ # Breakdown CIP vs Rule
314
+ rule_rows_removed = len(df_anomaly_rules)
315
+ rule_percent = float(rule_rows_removed / total_rows_initial * 100) if total_rows_initial > 0 else 0.0
316
+ cip_percent = float(cip_removed / total_rows_initial * 100) if total_rows_initial > 0 else 0.0
317
+
318
+ # Jumlah jenis anomali unik
319
+ if not df_anomalies.empty and "anomaly_reason" in df_anomalies.columns:
320
+ num_anomaly_types = int(df_anomalies["anomaly_reason"].nunique())
321
+ else:
322
+ num_anomaly_types = 0
323
+
324
+ summary_report = {
325
+ "total_rows_initial": total_rows_initial,
326
+ "total_rows_after_filter": total_rows_after_filter,
327
+ "total_rows_removed": total_rows_removed,
328
+ "percent_clean": percent_clean,
329
+ "cip_rows_removed": cip_removed,
330
+ "cip_percent": cip_percent,
331
+ "rule_rows_removed": rule_rows_removed,
332
+ "rule_percent": rule_percent,
333
+ "num_anomaly_types": num_anomaly_types,
334
+ }
335
+
336
+ print(f"✅ Jumlah data awal : {total_rows_initial}")
337
+ print(f"✅ Jumlah data bersih: {total_rows_after_filter}")
338
+ print(f"🗑️ Jumlah baris yang dihapus (CIP + anomali rule): {total_rows_removed}")
339
+
340
+ return df_clean, df_anomalies, summary_report
341
+
342
+
343
+ # Alias utk kompatibilitas lama
344
+ def apply_spray_dryer_rule_engine(df: pd.DataFrame):
345
+ return apply_rule_engine(df)
346
+
347
+
348
+ if __name__ == "__main__":
349
+ # Contoh pemakaian manual
350
+ path_csv = "/work/Dataset 18 Mar - 19 Jun/disagregasi_data_spraydryer_terbaru_10_17_2025.csv"
351
+ try:
352
+ df_raw = pd.read_csv(path_csv)
353
+ df_clean, df_anom, summary = apply_rule_engine(df_raw)
354
+
355
+ print("\n--- RINGKASAN ---")
356
+ for k, v in summary.items():
357
+ print(f"{k}: {v}")
358
+ print("Contoh anomali:")
359
+ print(df_anom.head())
360
+ except FileNotFoundError:
361
+ print(f"File contoh tidak ditemukan: {path_csv}")
inverse_model_forward.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import joblib
5
+ import os
6
+
7
+ # --- KONFIGURASI PATH ---
8
+ BASE_MODEL_FOLDER = r"C:\Dokumen\One To Many_17_10_2025\MMBTU\DASHBOARD\One To Many\MODEL CHECKPOINT FOR INVERSE MODEL"
9
+ AVAILABLE_PRODUCTS = ["BMR BASE", "CKP BASE", "CKR BASE", "CMR BASE", "MORIGRO BASE"]
10
+ INPUT_FEATURES = [
11
+ "D101330TT", "D102260TIC_CV", "D102265TIC_PV",
12
+ "D102265TIC_CV", "D102266TIC", "D101264FTSCL"
13
+ ]
14
+ @st.cache_resource(show_spinner="Memuat model...")
15
+ def load_model_artifacts(product_name, base_folder):
16
+ file_name = f"model_checkpoint_xgb_{product_name}.joblib"
17
+ model_path = os.path.join(base_folder, file_name)
18
+ if not os.path.exists(model_path):
19
+ st.error(f"File model tidak ditemukan untuk **{product_name}** di: **{model_path}**")
20
+ return None, None, None, None
21
+
22
+ try:
23
+ deployment_bundle = joblib.load(model_path)
24
+
25
+ # Mengambil artefak sesuai dengan struktur kode awal Anda
26
+ model = deployment_bundle.get('model')
27
+ poly_transformer = deployment_bundle.get('poly_transformer')
28
+ input_features = deployment_bundle.get('input_features')
29
+ poly_feature_names = deployment_bundle.get('poly_feature_names')
30
+
31
+ # Validasi sederhana
32
+ if model is None or poly_transformer is None or input_features is None or poly_feature_names is None:
33
+ st.error(f"Salah satu artefak (model, poly_transformer, input_features, poly_feature_names) hilang dalam file joblib **{product_name}**.")
34
+ return None, None, None, None
35
+
36
+ return model, poly_transformer, input_features, poly_feature_names
37
+
38
+ except Exception as e:
39
+ st.error(f"Gagal memuat atau membaca file joblib. Error: {e}")
40
+ return None, None, None, None
41
+
42
+ # --- TAMPILAN (UI) STREAMLIT ---
43
+ st.set_page_config(page_title="Dashboard Prediksi GAS MMBTU", layout="wide")
44
+ with st.sidebar:
45
+ st.header("⚙️ Konfigurasi Model")
46
+ # Dropdown untuk memilih produk
47
+ selected_product = st.selectbox(
48
+ "Pilih Produk Target:",
49
+ AVAILABLE_PRODUCTS,
50
+ index=AVAILABLE_PRODUCTS.index("CKR BASE") if "CKR BASE" in AVAILABLE_PRODUCTS else 0
51
+ )
52
+ st.markdown(f"**Folder Model:** `{BASE_MODEL_FOLDER}`")
53
+ st.title(f"🔥 Dashboard Prediksi GAS MMBTU: **{selected_product}**")
54
+ st.markdown("Dashboard ini menggunakan model **XGBoost** dan transformasi **Polinomial**.")
55
+ st.markdown("---")
56
+ model, poly_transformer, input_features_loaded, poly_feature_names = load_model_artifacts(
57
+ selected_product, BASE_MODEL_FOLDER
58
+ )
59
+
60
+ if model is None:
61
+ st.stop() # Berhenti jika model gagal dimuat
62
+
63
+ # Cek apakah fitur input konsisten
64
+ if set(input_features_loaded) != set(INPUT_FEATURES):
65
+ st.warning("Fitur Input yang dimuat dari joblib berbeda dengan daftar fitur default. Menggunakan fitur dari joblib.")
66
+ INPUT_FEATURES = input_features_loaded
67
+ st.subheader("🧪 Masukkan Nilai Input Mentah")
68
+ st.markdown("Harap masukkan nilai numerik untuk 6 fitur di bawah:")
69
+
70
+ # Membuat kolom input untuk 6 fitur
71
+ cols = st.columns(len(INPUT_FEATURES))
72
+ user_raw_data = {}
73
+ for i, feature in enumerate(INPUT_FEATURES):
74
+ # Menggunakan nilai default untuk contoh
75
+ default_value = 0.0
76
+ if feature == "D101330TT": default_value = 95.0
77
+ elif feature == "D102260TIC_CV": default_value = 45.0
78
+ elif feature == "D102265TIC_PV": default_value = 185.0
79
+ elif feature == "D102265TIC_CV": default_value = 17.0
80
+ elif feature == "D102266TIC": default_value = 16.0
81
+ elif feature == "D101264FTSCL": default_value = 3800.0
82
+
83
+ # Input nilai per fitur
84
+ user_raw_data[feature] = cols[i].number_input(
85
+ feature,
86
+ value=default_value,
87
+ format="%.4f",
88
+ key=f"input_{feature}"
89
+ )
90
+
91
+ st.markdown("---")
92
+
93
+ # Tombol prediksi
94
+ if st.button("🔮 Prediksi GAS MMBTU Sekarang", type="primary", use_container_width=True):
95
+
96
+ # Membuat DataFrame dari input mentah (sesuai urutan fitur input)
97
+ sim_input_df = pd.DataFrame([user_raw_data])[INPUT_FEATURES]
98
+
99
+ st.subheader("⚙️ Proses Transformasi & Prediksi")
100
+
101
+ # 1. Transformasi Polinomial
102
+ with st.spinner("1. Menerapkan transformasi polinomial..."):
103
+ transformed_input_np = poly_transformer.transform(sim_input_df)
104
+ transformed_input_df = pd.DataFrame(
105
+ transformed_input_np,
106
+ columns=poly_feature_names,
107
+ index=sim_input_df.index
108
+ )
109
+ # 2. Prediksi
110
+ with st.spinner("2. Melakukan prediksi dengan model..."):
111
+ predictions = model.predict(transformed_input_df)
112
+ prediksi_final = predictions[0]
113
+
114
+ st.markdown("### ✅ Hasil Prediksi")
115
+ st.metric(
116
+ f"Prediksi Kebutuhan **GAS MMBTU** untuk {selected_product}",
117
+ f"{prediksi_final:.6f}MMBTU"
118
+ )
119
+ st.markdown("---")
prediksi_model_inverse.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import pandas as pd
4
+ import numpy as np
5
+ import joblib
6
+ import os
7
+ from datetime import datetime
8
+ from sklearn.preprocessing import MinMaxScaler
9
+ import openpyxl
10
+ from scipy.optimize import differential_evolution
11
+
12
+ def predict_forward_from_params(product_name: str, params_dict: dict, model_folder: str) -> float:
13
+ """
14
+ Jalankan forward modelling XGBoost many-to-one untuk 1 baris input parameter.
15
+ Dipakai baik untuk Validasi maupun Simulasi agar konsisten.
16
+ """
17
+ model_filename = f"model_checkpoint_xgb_{product_name}.joblib"
18
+ model_path = os.path.join(model_folder, model_filename)
19
+
20
+ if not os.path.exists(model_path):
21
+ raise FileNotFoundError(f"File model untuk produk {product_name} tidak ditemukan: {model_path}")
22
+
23
+ artifacts = joblib.load(model_path)
24
+
25
+ # Struktur bundle: dict dengan kunci 'model', 'poly_transformer', 'input_features', 'poly_feature_names'
26
+ if isinstance(artifacts, dict):
27
+ fwd_model = artifacts.get("model", artifacts)
28
+ poly_transformer = artifacts.get("poly_transformer", None)
29
+ input_features = artifacts.get("input_features", list(params_dict.keys()))
30
+ poly_feature_names = artifacts.get("poly_feature_names", None)
31
+ else:
32
+ # Fallback: kalau bukan dict, anggap langsung model
33
+ fwd_model = artifacts
34
+ poly_transformer = None
35
+ input_features = list(params_dict.keys())
36
+ poly_feature_names = None
37
+
38
+ # Susun DataFrame satu baris
39
+ X_base = pd.DataFrame([params_dict])
40
+
41
+ # Pastikan semua fitur ada
42
+ missing = [f for f in input_features if f not in X_base.columns]
43
+ if missing:
44
+ raise ValueError(
45
+ "Fitur berikut dibutuhkan oleh model namun tidak ada di input: "
46
+ + ", ".join(missing)
47
+ )
48
+
49
+ # Urutkan kolom sesuai urutan training
50
+ X_base = X_base[input_features]
51
+
52
+ # Polynomial features jika ada
53
+ if poly_transformer is not None:
54
+ X_poly = poly_transformer.transform(X_base)
55
+ if (poly_feature_names is not None) and (len(poly_feature_names) == X_poly.shape[1]):
56
+ X_final = pd.DataFrame(X_poly, columns=poly_feature_names)
57
+ else:
58
+ X_final = pd.DataFrame(X_poly)
59
+ else:
60
+ X_final = X_base
61
+
62
+ # Prediksi
63
+ y_pred = fwd_model.predict(X_final)[0]
64
+ return float(y_pred)
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ numpy
4
+ joblib
5
+ scikit-learn
6
+ openpyxl
7
+ scipy
8
+ matplotlib
9
+ seaborn
10
+ xgboost