Spaces:
Configuration error
Configuration error
delima1234-Sunbright commited on
Commit ·
5e0490f
0
Parent(s):
KMI Dashboard
Browse files- Dashboard.py +1675 -0
- Disagregasi_mmbtu.py +321 -0
- Hasil_Inverse_Model.csv +145 -0
- Inverse_Model.py +440 -0
- MonitoringModel.py +245 -0
- README.md +143 -0
- disagregasi_data_spraydryer_terbaru_10_17_2025.csv +0 -0
- eda_functions.py +1111 -0
- filter_rule_engine.py +361 -0
- inverse_model_forward.py +119 -0
- prediksi_model_inverse.py +64 -0
- requirements.txt +10 -0
Dashboard.py
ADDED
|
@@ -0,0 +1,1675 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
+
import streamlit as st
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import numpy as np
|
| 5 |
+
import joblib
|
| 6 |
+
import os
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
from sklearn.preprocessing import MinMaxScaler
|
| 9 |
+
import openpyxl
|
| 10 |
+
from scipy.optimize import differential_evolution
|
| 11 |
+
from MonitoringModel import (
|
| 12 |
+
evaluate_models_for_dashboard,
|
| 13 |
+
DATA_FILENAME,
|
| 14 |
+
MODEL_FOLDER,
|
| 15 |
+
PRODUCT_LIST,
|
| 16 |
+
FEATURES,
|
| 17 |
+
TARGET_COLUMN,
|
| 18 |
+
)
|
| 19 |
+
from eda_functions import (
|
| 20 |
+
compute_eda_summary,
|
| 21 |
+
create_line_plots,
|
| 22 |
+
identify_outliers,
|
| 23 |
+
compute_stats_table,
|
| 24 |
+
compute_anomaly_table,
|
| 25 |
+
compute_production_segments,
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
from Inverse_Model import (
|
| 29 |
+
AVAILABLE_PRODUCTS,
|
| 30 |
+
run_inverse_for_targets,
|
| 31 |
+
results_to_dataframe,
|
| 32 |
+
)
|
| 33 |
+
from Disagregasi_mmbtu import run_disagregasi_pipeline
|
| 34 |
+
from filter_rule_engine import apply_rule_engine
|
| 35 |
+
from prediksi_model_inverse import predict_forward_from_params
|
| 36 |
+
|
| 37 |
+
st.set_page_config(
|
| 38 |
+
page_title="Sistem Prediksi & Rekomendasi Parameter Gas (MMBTU)",
|
| 39 |
+
layout="wide"
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
AVAILABLE_PRODUCTS = ["BMR BASE", "CKP BASE", "CKR BASE", "CMR BASE", "MORIGRO BASE"]
|
| 43 |
+
|
| 44 |
+
# Konfigurasi fitur tetap sama untuk semua model
|
| 45 |
+
INPUT_FEATURES = [
|
| 46 |
+
"D101330TT", "D102260TIC_CV", "D102265TIC_PV",
|
| 47 |
+
"D102265TIC_CV", "D102266TIC", "D101264FTSCL"
|
| 48 |
+
]
|
| 49 |
+
LAG_STEPS = [1, 2, 3, 6, 12, 24]
|
| 50 |
+
ROLL_WINDOWS = [3, 6, 12, 24]
|
| 51 |
+
CONTEXT_WINDOW = max(max(LAG_STEPS), max(ROLL_WINDOWS)) # 24
|
| 52 |
+
|
| 53 |
+
# =========================
|
| 54 |
+
# FUNGSI UTILITAS
|
| 55 |
+
# =========================
|
| 56 |
+
def create_temporal_features(df, lag_cols, rolling_cols):
|
| 57 |
+
"""Membuat fitur berbasis waktu (lag, rolling, kalender)."""
|
| 58 |
+
df_featured = df.copy()
|
| 59 |
+
|
| 60 |
+
# Fitur kalender
|
| 61 |
+
if "Date_time" in df_featured.columns:
|
| 62 |
+
dt = pd.to_datetime(df_featured["Date_time"], errors="coerce")
|
| 63 |
+
df_featured["minute"] = dt.dt.minute
|
| 64 |
+
df_featured["hour"] = dt.dt.hour
|
| 65 |
+
df_featured["day_of_week"] = dt.dt.dayofweek
|
| 66 |
+
df_featured["month"] = dt.dt.month
|
| 67 |
+
df_featured["day_of_month"] = dt.dt.day
|
| 68 |
+
|
| 69 |
+
# Fitur lag
|
| 70 |
+
for col in lag_cols:
|
| 71 |
+
if col in df_featured.columns:
|
| 72 |
+
for lag in LAG_STEPS:
|
| 73 |
+
df_featured[f"{col}_lag_{lag}"] = df_featured[col].shift(lag)
|
| 74 |
+
|
| 75 |
+
# Fitur rolling
|
| 76 |
+
for col in rolling_cols:
|
| 77 |
+
if col in df_featured.columns:
|
| 78 |
+
s = df_featured[col]
|
| 79 |
+
for w in ROLL_WINDOWS:
|
| 80 |
+
rolled = s.rolling(window=w, min_periods=w)
|
| 81 |
+
df_featured[f"{col}_rolling_mean_{w}"] = rolled.mean()
|
| 82 |
+
df_featured[f"{col}_rolling_std_{w}"] = rolled.std()
|
| 83 |
+
df_featured[f"{col}_rolling_min_{w}"] = rolled.min()
|
| 84 |
+
df_featured[f"{col}_rolling_max_{w}"] = rolled.max()
|
| 85 |
+
|
| 86 |
+
return df_featured
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def select_context_history(df_product_history, user_row_dict, input_cols, context_window=CONTEXT_WINDOW):
|
| 90 |
+
"""
|
| 91 |
+
Mencari data historis paling mirip HANYA dalam data produk yang relevan.
|
| 92 |
+
Mengembalikan: context_df, indeks terbaik, jarak minimum.
|
| 93 |
+
"""
|
| 94 |
+
hist = df_product_history.copy()
|
| 95 |
+
hist = hist.sort_values("Date_time").reset_index(drop=True)
|
| 96 |
+
|
| 97 |
+
# Pastikan data cukup untuk scaling
|
| 98 |
+
if len(hist) < 2:
|
| 99 |
+
return hist, 0, 0.0 # Kembalikan apa adanya jika data tidak cukup
|
| 100 |
+
|
| 101 |
+
scaler = MinMaxScaler()
|
| 102 |
+
hist_scaled = scaler.fit_transform(hist[input_cols])
|
| 103 |
+
|
| 104 |
+
user_vec = np.array([[user_row_dict[col] for col in input_cols]], dtype=float)
|
| 105 |
+
user_scaled = scaler.transform(user_vec)
|
| 106 |
+
|
| 107 |
+
deltas = hist_scaled - user_scaled
|
| 108 |
+
dists = np.sqrt(np.sum(deltas**2, axis=1))
|
| 109 |
+
|
| 110 |
+
best_idx = int(np.argmin(dists))
|
| 111 |
+
start_idx = max(0, best_idx - (context_window - 1))
|
| 112 |
+
context_df = hist.iloc[start_idx:best_idx + 1].copy()
|
| 113 |
+
|
| 114 |
+
return context_df, best_idx, float(dists[best_idx])
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def load_artifacts_and_history(model_folder, data_source, selected_product):
|
| 118 |
+
"""
|
| 119 |
+
Memuat model yang tepat dan memfilter data historis untuk produk yang dipilih.
|
| 120 |
+
data_source bisa berupa:
|
| 121 |
+
- string path CSV
|
| 122 |
+
- UploadedFile dari st.file_uploader
|
| 123 |
+
"""
|
| 124 |
+
# Nama file model berdasarkan produk, contoh: ckr_base_checkpoint.pkl
|
| 125 |
+
product_file_name = f"{selected_product.lower().replace(' ', '_')}_checkpoint.pkl"
|
| 126 |
+
model_path = os.path.join(model_folder, product_file_name)
|
| 127 |
+
|
| 128 |
+
if not os.path.exists(model_path):
|
| 129 |
+
st.error(f"❌ File model tidak ditemukan di: {model_path}")
|
| 130 |
+
return None, None, None
|
| 131 |
+
|
| 132 |
+
# Load model
|
| 133 |
+
artifacts = joblib.load(model_path)
|
| 134 |
+
model = artifacts["model"]
|
| 135 |
+
feature_columns = artifacts["features"]
|
| 136 |
+
|
| 137 |
+
# Load data historis
|
| 138 |
+
try:
|
| 139 |
+
if isinstance(data_source, str):
|
| 140 |
+
df_raw = pd.read_csv(data_source)
|
| 141 |
+
else:
|
| 142 |
+
# Asumsikan ini UploadedFile dari Streamlit
|
| 143 |
+
df_raw = pd.read_csv(data_source)
|
| 144 |
+
except Exception as e:
|
| 145 |
+
st.error(f"❌ Gagal membaca file CSV historis. Error: {e}")
|
| 146 |
+
return None, None, None
|
| 147 |
+
|
| 148 |
+
if "Product" not in df_raw.columns:
|
| 149 |
+
st.error("Kolom 'Product' tidak ditemukan di data historis.")
|
| 150 |
+
return None, None, None
|
| 151 |
+
|
| 152 |
+
df_product = df_raw[df_raw["Product"] == selected_product].copy()
|
| 153 |
+
if df_product.empty:
|
| 154 |
+
st.error(f"Tidak ada data historis untuk produk '{selected_product}'.")
|
| 155 |
+
return None, None, None
|
| 156 |
+
|
| 157 |
+
if "Date_time" not in df_product.columns:
|
| 158 |
+
st.error("Kolom 'Date_time' tidak ditemukan di data historis.")
|
| 159 |
+
return None, None, None
|
| 160 |
+
|
| 161 |
+
df_product["Date_time"] = pd.to_datetime(df_product["Date_time"], errors="coerce")
|
| 162 |
+
df_product = df_product.dropna(subset=["Date_time"]).sort_values("Date_time").reset_index(drop=True)
|
| 163 |
+
|
| 164 |
+
return model, feature_columns, df_product
|
| 165 |
+
|
| 166 |
+
# =========================
|
| 167 |
+
# HALAMAN 1: Prediksi Gas dari 6 Parameter
|
| 168 |
+
# =========================
|
| 169 |
+
def page_prediksi_gas_dari_6_parameter():
|
| 170 |
+
st.subheader("1️⃣ Dashboard Prediksi Konsumsi Gas (MMBTU)")
|
| 171 |
+
|
| 172 |
+
st.markdown(
|
| 173 |
+
"""
|
| 174 |
+
Halaman ini digunakan untuk memprediksi **konsumsi gas (MMBTU)**
|
| 175 |
+
berdasarkan **6 parameter proses** pada spray dryer.
|
| 176 |
+
"""
|
| 177 |
+
)
|
| 178 |
+
|
| 179 |
+
# ---------- Konfigurasi Model & Data (DI MAIN PAGE, BUKAN SIDEBAR) ----------
|
| 180 |
+
st.markdown("### 🔧 Konfigurasi Data & Model")
|
| 181 |
+
|
| 182 |
+
config_col1, config_col2 = st.columns(2)
|
| 183 |
+
|
| 184 |
+
with config_col1:
|
| 185 |
+
selected_product = st.selectbox(
|
| 186 |
+
"Pilih Produk",
|
| 187 |
+
AVAILABLE_PRODUCTS,
|
| 188 |
+
index=2 # default CKR BASE
|
| 189 |
+
)
|
| 190 |
+
|
| 191 |
+
model_folder = st.text_input(
|
| 192 |
+
"Folder Model Checkpoints",
|
| 193 |
+
value="MODEL CHECKPOINT MANY TO ONE",
|
| 194 |
+
help="Folder tempat file-file model *.pkl disimpan."
|
| 195 |
+
)
|
| 196 |
+
|
| 197 |
+
with config_col2:
|
| 198 |
+
data_source_option = st.radio(
|
| 199 |
+
"Sumber Data Historis",
|
| 200 |
+
["Path File CSV", "Upload File CSV"],
|
| 201 |
+
horizontal=True
|
| 202 |
+
)
|
| 203 |
+
|
| 204 |
+
data_source = None
|
| 205 |
+
if data_source_option == "Path File CSV":
|
| 206 |
+
data_file_path = st.text_input(
|
| 207 |
+
"Path File Data CSV (historis)",
|
| 208 |
+
value=r"disagregasi_data_spraydryer_terbaru_10_17_2025.csv"
|
| 209 |
+
)
|
| 210 |
+
if data_file_path:
|
| 211 |
+
data_source = data_file_path
|
| 212 |
+
else:
|
| 213 |
+
uploaded_file = st.file_uploader(
|
| 214 |
+
"Upload File CSV Historis",
|
| 215 |
+
type=["csv"]
|
| 216 |
+
)
|
| 217 |
+
if uploaded_file is not None:
|
| 218 |
+
data_source = uploaded_file
|
| 219 |
+
|
| 220 |
+
st.markdown("---")
|
| 221 |
+
|
| 222 |
+
# Jika data_source belum ada, jangan lanjut ke prediksi
|
| 223 |
+
if data_source is None:
|
| 224 |
+
st.info("ℹ️ Silakan pilih sumber data historis (path atau upload CSV) untuk melanjutkan.")
|
| 225 |
+
return
|
| 226 |
+
|
| 227 |
+
# ---------- Load Model & Data Historis ----------
|
| 228 |
+
with st.spinner("📦 Memuat model & data historis..."):
|
| 229 |
+
model, feature_columns, df_history = load_artifacts_and_history(
|
| 230 |
+
model_folder=model_folder,
|
| 231 |
+
data_source=data_source,
|
| 232 |
+
selected_product=selected_product
|
| 233 |
+
)
|
| 234 |
+
|
| 235 |
+
if (model is None) or (df_history is None):
|
| 236 |
+
return # pesan error sudah ditampilkan di fungsi loader
|
| 237 |
+
|
| 238 |
+
if len(df_history) < CONTEXT_WINDOW:
|
| 239 |
+
st.error(
|
| 240 |
+
f"Data historis untuk '{selected_product}' kurang dari {CONTEXT_WINDOW} baris "
|
| 241 |
+
f"(hanya {len(df_history)}). Prediksi mungkin tidak akurat."
|
| 242 |
+
)
|
| 243 |
+
return
|
| 244 |
+
|
| 245 |
+
# ---------- Input Parameter dari User ----------
|
| 246 |
+
st.markdown("### 🧪 Masukkan 6 Parameter Input (Data Baru)")
|
| 247 |
+
|
| 248 |
+
c1, c2, c3 = st.columns(3)
|
| 249 |
+
|
| 250 |
+
with c1:
|
| 251 |
+
v_D101330TT = st.number_input("Temperature Outlet Chamber (D101330TT)", value=95.0, format="%.4f")
|
| 252 |
+
v_D102265TIC_PV = st.number_input("Temperature Inlet Chamber (D102265TIC_PV)", value=185.0, format="%.4f")
|
| 253 |
+
|
| 254 |
+
with c2:
|
| 255 |
+
v_D102260TIC_CV = st.number_input("High Pressure Steam Damper (D102260TIC_CV)", value=45.0, format="%.4f")
|
| 256 |
+
v_D102265TIC_CV = st.number_input("Low Pressure Steam Damper (D102265TIC_CV)", value=17.0, format="%.4f")
|
| 257 |
+
|
| 258 |
+
with c3:
|
| 259 |
+
v_D102266TIC = st.number_input("Dehumidifier Temperature (D102266TIC)", value=16.0, format="%.4f")
|
| 260 |
+
v_D101264FTSCL = st.number_input("Flow Feed Dryer (D101264FTSCL)", value=3800.0, format="%.4f")
|
| 261 |
+
|
| 262 |
+
st.markdown("---")
|
| 263 |
+
|
| 264 |
+
# ---------- Tombol Prediksi ----------
|
| 265 |
+
if st.button("🔮 Prediksi Konsumsi Gas (MMBTU)", type="primary", use_container_width=True):
|
| 266 |
+
# Susun baris input user
|
| 267 |
+
user_row = {
|
| 268 |
+
"Date_time": pd.to_datetime(datetime.now()),
|
| 269 |
+
"Product": selected_product,
|
| 270 |
+
"D101330TT": v_D101330TT,
|
| 271 |
+
"D102260TIC_CV": v_D102260TIC_CV,
|
| 272 |
+
"D102265TIC_PV": v_D102265TIC_PV,
|
| 273 |
+
"D102265TIC_CV": v_D102265TIC_CV,
|
| 274 |
+
"D102266TIC": v_D102266TIC,
|
| 275 |
+
"D101264FTSCL": v_D101264FTSCL,
|
| 276 |
+
}
|
| 277 |
+
|
| 278 |
+
with st.spinner(f"🔎 Mencari konteks historis paling mirip di data '{selected_product}'..."):
|
| 279 |
+
ctx_df, best_idx, best_dist = select_context_history(
|
| 280 |
+
df_history,
|
| 281 |
+
user_row,
|
| 282 |
+
INPUT_FEATURES,
|
| 283 |
+
context_window=CONTEXT_WINDOW
|
| 284 |
+
)
|
| 285 |
+
|
| 286 |
+
#st.info(
|
| 287 |
+
#f"Konteks historis paling mirip ditemukan pada index ke-**{best_idx}** "
|
| 288 |
+
#f"(jarak: **{best_dist:.6f}**) dengan timestamp: "
|
| 289 |
+
#f"**{ctx_df.iloc[-1]['Date_time']}**"
|
| 290 |
+
#)
|
| 291 |
+
|
| 292 |
+
with st.spinner("🧩 Membentuk fitur temporal & melakukan prediksi..."):
|
| 293 |
+
# Gabungkan konteks historis dan baris user
|
| 294 |
+
df_new = pd.DataFrame([user_row])
|
| 295 |
+
df_combined = pd.concat([ctx_df, df_new], ignore_index=True)
|
| 296 |
+
|
| 297 |
+
# Buat fitur temporal
|
| 298 |
+
df_featured = create_temporal_features(
|
| 299 |
+
df_combined,
|
| 300 |
+
lag_cols=INPUT_FEATURES,
|
| 301 |
+
rolling_cols=INPUT_FEATURES
|
| 302 |
+
)
|
| 303 |
+
|
| 304 |
+
# Ambil baris terakhir sebagai input final
|
| 305 |
+
final_input_row = df_featured.tail(1)
|
| 306 |
+
|
| 307 |
+
# Cek fitur yang dibutuhkan model
|
| 308 |
+
missing = [c for c in feature_columns if c not in final_input_row.columns]
|
| 309 |
+
if missing:
|
| 310 |
+
st.error(f"⚠️ Beberapa fitur yang dibutuhkan model tidak tersedia: {missing}")
|
| 311 |
+
return
|
| 312 |
+
|
| 313 |
+
# Cek NaN
|
| 314 |
+
if final_input_row[feature_columns].isnull().values.any():
|
| 315 |
+
st.warning(
|
| 316 |
+
"Input akhir mengandung nilai NaN. "
|
| 317 |
+
"Ini bisa terjadi jika konteks historis tidak cukup panjang "
|
| 318 |
+
"atau data historis memiliki gap."
|
| 319 |
+
)
|
| 320 |
+
st.dataframe(final_input_row[feature_columns].T)
|
| 321 |
+
return
|
| 322 |
+
|
| 323 |
+
# Prediksi
|
| 324 |
+
X_pred = final_input_row[feature_columns]
|
| 325 |
+
y_pred = model.predict(X_pred)
|
| 326 |
+
|
| 327 |
+
st.metric(
|
| 328 |
+
f"✅ Hasil Prediksi Konsumsi GAS MMBTU untuk {selected_product}",
|
| 329 |
+
f"{float(y_pred[0]):.6f} MMBTU"
|
| 330 |
+
)
|
| 331 |
+
|
| 332 |
+
with st.expander("🔍 Lihat Input Fitur Final yang Digunakan untuk Prediksi"):
|
| 333 |
+
st.dataframe(X_pred)
|
| 334 |
+
|
| 335 |
+
with st.expander("📈 Lihat Konteks Historis yang Dipakai"):
|
| 336 |
+
st.dataframe(ctx_df.tail(CONTEXT_WINDOW))
|
| 337 |
+
|
| 338 |
+
def load_inverse_from_csv(csv_path: str):
|
| 339 |
+
"""
|
| 340 |
+
Membaca file CSV hasil inverse (global untuk semua produk).
|
| 341 |
+
Return: DataFrame atau None kalau file tidak ada / kosong.
|
| 342 |
+
"""
|
| 343 |
+
if not os.path.exists(csv_path):
|
| 344 |
+
return None
|
| 345 |
+
|
| 346 |
+
try:
|
| 347 |
+
df = pd.read_csv(csv_path)
|
| 348 |
+
if df.empty:
|
| 349 |
+
return None
|
| 350 |
+
return df
|
| 351 |
+
except Exception as e:
|
| 352 |
+
st.warning(f"Gagal membaca CSV hasil inverse: {e}")
|
| 353 |
+
return None
|
| 354 |
+
|
| 355 |
+
|
| 356 |
+
def append_inverse_to_csv(df_new: pd.DataFrame, csv_path: str):
|
| 357 |
+
"""
|
| 358 |
+
Menambahkan df_new ke file CSV hasil inverse.
|
| 359 |
+
|
| 360 |
+
Perilaku:
|
| 361 |
+
- Kalau file belum ada → dibuat baru
|
| 362 |
+
- Kalau sudah ada → dibaca, diselaraskan kolomnya, lalu dikonkaten dan disimpan ulang
|
| 363 |
+
- Kolom yang belum ada di file lama akan ditambahkan otomatis
|
| 364 |
+
"""
|
| 365 |
+
if df_new is None or df_new.empty:
|
| 366 |
+
return
|
| 367 |
+
|
| 368 |
+
if os.path.exists(csv_path):
|
| 369 |
+
try:
|
| 370 |
+
df_existing = pd.read_csv(csv_path)
|
| 371 |
+
except Exception as e:
|
| 372 |
+
st.warning(
|
| 373 |
+
f"Gagal membaca CSV lama, akan overwrite dengan hasil baru. Error: {e}"
|
| 374 |
+
)
|
| 375 |
+
df_new.to_csv(csv_path, index=False)
|
| 376 |
+
return
|
| 377 |
+
|
| 378 |
+
# Samakan set kolom antara existing dan new
|
| 379 |
+
all_cols = sorted(set(df_existing.columns).union(df_new.columns))
|
| 380 |
+
df_existing = df_existing.reindex(columns=all_cols)
|
| 381 |
+
df_new = df_new.reindex(columns=all_cols)
|
| 382 |
+
|
| 383 |
+
df_all = pd.concat([df_existing, df_new], ignore_index=True)
|
| 384 |
+
df_all.to_csv(csv_path, index=False)
|
| 385 |
+
else:
|
| 386 |
+
# File belum ada → buat baru
|
| 387 |
+
df_new.to_csv(csv_path, index=False)
|
| 388 |
+
|
| 389 |
+
# =========================
|
| 390 |
+
# HALAMAN 2–6 (STUB SEMENTARA)
|
| 391 |
+
# =========================
|
| 392 |
+
def page_prediksi_parameter_dari_gas():
|
| 393 |
+
st.subheader("2️⃣ Prediksi Parameter dari Gas (MMBTU)")
|
| 394 |
+
st.markdown(
|
| 395 |
+
"""
|
| 396 |
+
Halaman ini digunakan untuk **mencari kombinasi 6 parameter proses** yang paling optimal
|
| 397 |
+
untuk mencapai **target konsumsi Gas (MMBTU)** tertentu, kemudian
|
| 398 |
+
memvalidasi hasilnya menggunakan **forward model XGBoost many-to-one**.
|
| 399 |
+
"""
|
| 400 |
+
)
|
| 401 |
+
|
| 402 |
+
# ====== Layout dua kolom utama ======
|
| 403 |
+
col_left, col_right = st.columns([2, 1])
|
| 404 |
+
|
| 405 |
+
# -----------------------------
|
| 406 |
+
# BAGIAN KIRI – INVERSE MODEL
|
| 407 |
+
# -----------------------------
|
| 408 |
+
with col_left:
|
| 409 |
+
st.markdown("### 🔁 Inverse Model – Parameter Recommendation")
|
| 410 |
+
|
| 411 |
+
# 1. Pilih Produk
|
| 412 |
+
selected_product = st.selectbox(
|
| 413 |
+
"Pilih Produk",
|
| 414 |
+
AVAILABLE_PRODUCTS,
|
| 415 |
+
index=AVAILABLE_PRODUCTS.index("CKR BASE") if "CKR BASE" in AVAILABLE_PRODUCTS else 0,
|
| 416 |
+
key="inv_product_select"
|
| 417 |
+
)
|
| 418 |
+
|
| 419 |
+
# 2. Input Target Gas
|
| 420 |
+
target_mmbtu = st.number_input(
|
| 421 |
+
"Target Gas Consumption (MMBTU)",
|
| 422 |
+
min_value=0.10,
|
| 423 |
+
max_value=0.50,
|
| 424 |
+
value=0.29,
|
| 425 |
+
step=0.0001,
|
| 426 |
+
format="%.4f",
|
| 427 |
+
key="inv_target_mmbtu"
|
| 428 |
+
)
|
| 429 |
+
|
| 430 |
+
# 3. Konfigurasi path model & Excel
|
| 431 |
+
# 3. Konfigurasi path model & CSV
|
| 432 |
+
st.markdown("#### ⚙️ Konfigurasi Model & Database Hasil")
|
| 433 |
+
|
| 434 |
+
default_model_folder = r"MODEL CHECKPOINT FOR INVERSE MODEL"
|
| 435 |
+
model_folder = st.text_input(
|
| 436 |
+
"Folder Model Checkpoint XGBoost (Many-to-One)",
|
| 437 |
+
value=default_model_folder,
|
| 438 |
+
help="Folder berisi file model_checkpoint_xgb_{PRODUCT}.joblib"
|
| 439 |
+
)
|
| 440 |
+
|
| 441 |
+
default_csv_path = r"Hasil_Inverse_Model.csv"
|
| 442 |
+
csv_path = st.text_input(
|
| 443 |
+
"File CSV Hasil Inverse Model",
|
| 444 |
+
value=default_csv_path,
|
| 445 |
+
help="Semua produk disimpan dalam satu file CSV dengan kolom 'Product' dan 'Target_MMBTU'."
|
| 446 |
+
)
|
| 447 |
+
|
| 448 |
+
tol = st.number_input(
|
| 449 |
+
"Toleransi pencarian target di CSV (±)",
|
| 450 |
+
min_value=0.0,
|
| 451 |
+
max_value=0.01,
|
| 452 |
+
value=0.0005,
|
| 453 |
+
step=0.0001,
|
| 454 |
+
format="%.4f",
|
| 455 |
+
help="Misal 0.0005 → akan mencari baris dengan |Target_MMBTU - target| ≤ 0.0005"
|
| 456 |
+
)
|
| 457 |
+
|
| 458 |
+
# Tombol utama
|
| 459 |
+
run_btn = st.button(
|
| 460 |
+
"🔍 Cari / Optimasi Parameter",
|
| 461 |
+
type="primary",
|
| 462 |
+
use_container_width=True
|
| 463 |
+
)
|
| 464 |
+
|
| 465 |
+
# Variabel untuk dikirim ke kolom kanan
|
| 466 |
+
last_result_row = None
|
| 467 |
+
|
| 468 |
+
if run_btn:
|
| 469 |
+
# ------- 3. Lookup di CSV dulu -------
|
| 470 |
+
df_cache = load_inverse_from_csv(csv_path)
|
| 471 |
+
found_from_cache = False
|
| 472 |
+
last_result_row = None
|
| 473 |
+
|
| 474 |
+
if df_cache is not None:
|
| 475 |
+
# Pastikan kolom Product ada
|
| 476 |
+
if "Product" not in df_cache.columns:
|
| 477 |
+
st.info(
|
| 478 |
+
"Kolom 'Product' tidak ditemukan di CSV hasil inverse. "
|
| 479 |
+
"Akan menjalankan optimasi baru."
|
| 480 |
+
)
|
| 481 |
+
else:
|
| 482 |
+
df_prod = df_cache[df_cache["Product"] == selected_product].copy()
|
| 483 |
+
if df_prod.empty:
|
| 484 |
+
st.info(
|
| 485 |
+
f"Tidak ada histori inverse untuk produk '{selected_product}' "
|
| 486 |
+
f"di CSV. Akan menjalankan optimasi baru."
|
| 487 |
+
)
|
| 488 |
+
else:
|
| 489 |
+
# Cari kolom target (utama: 'Target_MMBTU', fallback: nama lain)
|
| 490 |
+
target_col = None
|
| 491 |
+
for c in df_prod.columns:
|
| 492 |
+
if c.lower() in [
|
| 493 |
+
"target_mmbtu",
|
| 494 |
+
"target",
|
| 495 |
+
"target_gas",
|
| 496 |
+
"target_gas_mmbtu",
|
| 497 |
+
"target_input",
|
| 498 |
+
]:
|
| 499 |
+
target_col = c
|
| 500 |
+
break
|
| 501 |
+
|
| 502 |
+
if target_col is not None:
|
| 503 |
+
diffs = (df_prod[target_col] - target_mmbtu).abs()
|
| 504 |
+
mask = diffs <= tol
|
| 505 |
+
if mask.any():
|
| 506 |
+
df_match = df_prod.loc[mask].copy()
|
| 507 |
+
df_match["__diff__"] = (df_match[target_col] - target_mmbtu).abs()
|
| 508 |
+
df_match = df_match.sort_values("__diff__")
|
| 509 |
+
row = df_match.iloc[0].drop(labels="__diff__")
|
| 510 |
+
last_result_row = row
|
| 511 |
+
found_from_cache = True
|
| 512 |
+
st.success(
|
| 513 |
+
"✅ Rekomendasi parameter ditemukan di database CSV "
|
| 514 |
+
"(tanpa perlu menjalankan Differential Evolution)."
|
| 515 |
+
)
|
| 516 |
+
else:
|
| 517 |
+
st.info(
|
| 518 |
+
"ℹ️ Tidak ditemukan target yang mendekati di CSV. "
|
| 519 |
+
"Akan menjalankan optimasi baru."
|
| 520 |
+
)
|
| 521 |
+
else:
|
| 522 |
+
st.info(
|
| 523 |
+
"Kolom target tidak ditemukan di CSV hasil inverse. "
|
| 524 |
+
"Akan menjalankan optimasi baru."
|
| 525 |
+
)
|
| 526 |
+
else:
|
| 527 |
+
st.info(
|
| 528 |
+
"File CSV hasil inverse belum ada. Akan dibuat setelah optimasi pertama."
|
| 529 |
+
)
|
| 530 |
+
|
| 531 |
+
# ------- 4. Jika tidak ditemukan → jalankan optimasi real-time -------
|
| 532 |
+
# ------- 4. Jika tidak ditemukan di CSV → jalankan optimasi real-time -------
|
| 533 |
+
if not found_from_cache:
|
| 534 |
+
model_filename = f"model_checkpoint_xgb_{selected_product}.joblib"
|
| 535 |
+
model_path = os.path.join(model_folder, model_filename)
|
| 536 |
+
|
| 537 |
+
if not os.path.exists(model_path):
|
| 538 |
+
st.error(f"❌ File model tidak ditemukan: {model_path}")
|
| 539 |
+
return
|
| 540 |
+
|
| 541 |
+
st.info(
|
| 542 |
+
"Sedang mencari kombinasi parameter paling optimal untuk "
|
| 543 |
+
"mencapai target Gas Consumption Anda."
|
| 544 |
+
)
|
| 545 |
+
with st.spinner(
|
| 546 |
+
"Menjalankan Differential Evolution untuk inverse model..."
|
| 547 |
+
):
|
| 548 |
+
# Jalankan inverse hanya untuk 1 target
|
| 549 |
+
results = run_inverse_for_targets(
|
| 550 |
+
model_path, selected_product, [target_mmbtu]
|
| 551 |
+
)
|
| 552 |
+
df_new = results_to_dataframe(results, selected_product)
|
| 553 |
+
|
| 554 |
+
# --- Pastikan struktur kolom minimal untuk CSV global ---
|
| 555 |
+
# 1) Tambah kolom Product
|
| 556 |
+
df_new["Product"] = selected_product
|
| 557 |
+
|
| 558 |
+
# 2) Normalisasi kolom target → 'Target_MMBTU'
|
| 559 |
+
target_col = None
|
| 560 |
+
for c in df_new.columns:
|
| 561 |
+
if c.lower() in [
|
| 562 |
+
"target_mmbtu",
|
| 563 |
+
"target",
|
| 564 |
+
"target_gas",
|
| 565 |
+
"target_gas_mmbtu",
|
| 566 |
+
"target_input",
|
| 567 |
+
]:
|
| 568 |
+
target_col = c
|
| 569 |
+
break
|
| 570 |
+
|
| 571 |
+
if target_col is None:
|
| 572 |
+
df_new["Target_MMBTU"] = float(target_mmbtu)
|
| 573 |
+
else:
|
| 574 |
+
if target_col != "Target_MMBTU":
|
| 575 |
+
df_new["Target_MMBTU"] = df_new[target_col]
|
| 576 |
+
|
| 577 |
+
# 3) Tambah kolom-kolom penting jika belum ada
|
| 578 |
+
required_cols = [
|
| 579 |
+
"Level",
|
| 580 |
+
"Predicted_MMBTU",
|
| 581 |
+
"Error",
|
| 582 |
+
"Error_Pct",
|
| 583 |
+
"Objective_Value",
|
| 584 |
+
"Converged",
|
| 585 |
+
"Iterations",
|
| 586 |
+
"Soft_Violations",
|
| 587 |
+
]
|
| 588 |
+
for col in required_cols:
|
| 589 |
+
if col not in df_new.columns:
|
| 590 |
+
df_new[col] = np.nan
|
| 591 |
+
|
| 592 |
+
# Ambil baris pertama sebagai hasil terakhir
|
| 593 |
+
last_result_row = df_new.iloc[0]
|
| 594 |
+
|
| 595 |
+
# 4) Simpan ke CSV (append)
|
| 596 |
+
try:
|
| 597 |
+
append_inverse_to_csv(df_new, csv_path)
|
| 598 |
+
st.success(
|
| 599 |
+
"✅ Hasil optimasi baru berhasil disimpan ke CSV "
|
| 600 |
+
"(Hasil_Inverse_Model)."
|
| 601 |
+
)
|
| 602 |
+
except Exception as e:
|
| 603 |
+
st.error(f"Gagal menyimpan hasil ke CSV: {e}")
|
| 604 |
+
|
| 605 |
+
|
| 606 |
+
# ------- 5. Tampilkan hasil dalam format tabel -------
|
| 607 |
+
if last_result_row is not None:
|
| 608 |
+
# Simpan ke session_state agar bisa diakses kolom kanan
|
| 609 |
+
st.session_state["last_inverse_result"] = {
|
| 610 |
+
"product": selected_product,
|
| 611 |
+
"target": float(target_mmbtu),
|
| 612 |
+
"row": last_result_row.to_dict()
|
| 613 |
+
}
|
| 614 |
+
|
| 615 |
+
# Ambil nilai-nilai parameter
|
| 616 |
+
row_dict = last_result_row.to_dict()
|
| 617 |
+
|
| 618 |
+
# (Note) Prediksi & error di Excel tidak lagi dipakai untuk forward,
|
| 619 |
+
# tapi masih boleh ditampilkan sebagai informasi historis
|
| 620 |
+
pred_col = None
|
| 621 |
+
for c in row_dict.keys():
|
| 622 |
+
if c.lower() in ["predicted_mmbtu", "prediction", "prediction_mmbtu"]:
|
| 623 |
+
pred_col = c
|
| 624 |
+
break
|
| 625 |
+
|
| 626 |
+
prediction_val = row_dict.get(pred_col, None)
|
| 627 |
+
error_val = row_dict.get("Error", None)
|
| 628 |
+
|
| 629 |
+
display_row = {
|
| 630 |
+
"D101330TT": row_dict.get("D101330TT", np.nan),
|
| 631 |
+
"D102260TIC_CV": row_dict.get("D102260TIC_CV", np.nan),
|
| 632 |
+
"D102265TIC_CV": row_dict.get("D102265TIC_CV", np.nan),
|
| 633 |
+
"D102265TIC_PV": row_dict.get("D102265TIC_PV", np.nan),
|
| 634 |
+
"D102266TIC": row_dict.get("D102266TIC", np.nan),
|
| 635 |
+
"D101264FTSCL": row_dict.get("D101264FTSCL", np.nan),
|
| 636 |
+
"Prediction (MMBTU) [Excel/Inverse]": prediction_val,
|
| 637 |
+
"Error (MMBTU) [Excel/Inverse]": error_val,
|
| 638 |
+
"Target Input": float(target_mmbtu),
|
| 639 |
+
}
|
| 640 |
+
|
| 641 |
+
st.markdown("#### 📊 Hasil Rekomendasi Parameter")
|
| 642 |
+
st.dataframe(pd.DataFrame([display_row]), use_container_width=True)
|
| 643 |
+
|
| 644 |
+
with st.expander("🔍 Detail Lengkap Hasil Inverse Model (Raw)"):
|
| 645 |
+
st.json(row_dict)
|
| 646 |
+
else:
|
| 647 |
+
st.warning("Tidak ada hasil yang bisa ditampilkan.")
|
| 648 |
+
|
| 649 |
+
# ---------------------------------------------
|
| 650 |
+
# BAGIAN KANAN – FORWARD MODELLING (VALIDASI)
|
| 651 |
+
# ---------------------------------------------
|
| 652 |
+
with col_right:
|
| 653 |
+
st.markdown("### 📈 Forward Modelling – Validasi XGBoost Many-to-One")
|
| 654 |
+
|
| 655 |
+
info_box = st.empty()
|
| 656 |
+
|
| 657 |
+
if "last_inverse_result" not in st.session_state:
|
| 658 |
+
info_box.info(
|
| 659 |
+
"Belum ada hasil inverse model.\n\n"
|
| 660 |
+
"Silakan jalankan **Cari / Optimasi Parameter** di sisi kiri terlebih dahulu."
|
| 661 |
+
)
|
| 662 |
+
return
|
| 663 |
+
|
| 664 |
+
# Ambil hasil terakhir dari inverse model
|
| 665 |
+
last_res = st.session_state["last_inverse_result"]
|
| 666 |
+
product_name = last_res["product"]
|
| 667 |
+
target_input = last_res["target"]
|
| 668 |
+
row_dict = last_res["row"]
|
| 669 |
+
|
| 670 |
+
info_box.success(f"Validasi forward model untuk **{product_name}** (Target: {target_input:.4f} MMBTU)")
|
| 671 |
+
|
| 672 |
+
# Tampilkan parameter yang digunakan forward model
|
| 673 |
+
st.markdown("#### Parameter Input ke Forward Model")
|
| 674 |
+
param_df = pd.DataFrame([{
|
| 675 |
+
"D101330TT": row_dict.get("D101330TT", np.nan),
|
| 676 |
+
"D102260TIC_CV": row_dict.get("D102260TIC_CV", np.nan),
|
| 677 |
+
"D102265TIC_PV": row_dict.get("D102265TIC_PV", np.nan),
|
| 678 |
+
"D102265TIC_CV": row_dict.get("D102265TIC_CV", np.nan),
|
| 679 |
+
"D102266TIC": row_dict.get("D102266TIC", np.nan),
|
| 680 |
+
"D101264FTSCL": row_dict.get("D101264FTSCL", np.nan),
|
| 681 |
+
}])
|
| 682 |
+
st.dataframe(param_df, use_container_width=True)
|
| 683 |
+
|
| 684 |
+
# 🔁 Jalankan forward model beneran (bukan baca dari Excel)
|
| 685 |
+
st.markdown("#### ✅ Hasil Prediksi Forward Model (Recomputed)")
|
| 686 |
+
|
| 687 |
+
try:
|
| 688 |
+
forward_input = {
|
| 689 |
+
"D101330TT": float(row_dict.get("D101330TT", np.nan)),
|
| 690 |
+
"D102260TIC_CV": float(row_dict.get("D102260TIC_CV", np.nan)),
|
| 691 |
+
"D102265TIC_PV": float(row_dict.get("D102265TIC_PV", np.nan)),
|
| 692 |
+
"D102265TIC_CV": float(row_dict.get("D102265TIC_CV", np.nan)),
|
| 693 |
+
"D102266TIC": float(row_dict.get("D102266TIC", np.nan)),
|
| 694 |
+
"D101264FTSCL": float(row_dict.get("D101264FTSCL", np.nan)),
|
| 695 |
+
}
|
| 696 |
+
|
| 697 |
+
pred_val = predict_forward_from_params(product_name, forward_input, model_folder)
|
| 698 |
+
err_val = float(pred_val) - float(target_input)
|
| 699 |
+
|
| 700 |
+
m1, m2 = st.columns(2)
|
| 701 |
+
with m1:
|
| 702 |
+
st.metric("Prediksi GAS (MMBTU)", f"{pred_val:.6f}")
|
| 703 |
+
with m2:
|
| 704 |
+
st.metric("Error terhadap Target", f"{err_val:+.6f}")
|
| 705 |
+
|
| 706 |
+
except Exception as e:
|
| 707 |
+
st.error(f"Terjadi error saat menghitung ulang prediksi forward: {e}")
|
| 708 |
+
|
| 709 |
+
# --------------------------------------------------
|
| 710 |
+
# 🔽 BAGIAN: SIMULASI PREDIKSI (FORWARD MODELLING)
|
| 711 |
+
# --------------------------------------------------
|
| 712 |
+
st.markdown("---")
|
| 713 |
+
st.markdown("### 🧪 Simulasi Prediksi Konsumsi Gas (Forward Modelling)")
|
| 714 |
+
|
| 715 |
+
st.caption(
|
| 716 |
+
"Pilih produk dan masukkan nilai 6 parameter proses secara manual untuk mensimulasikan "
|
| 717 |
+
"prediksi konsumsi gas (MMBTU) menggunakan model XGBoost Many-to-One."
|
| 718 |
+
)
|
| 719 |
+
|
| 720 |
+
# Pilih produk untuk simulasi – default ke produk dari inverse terakhir
|
| 721 |
+
sim_product = st.selectbox(
|
| 722 |
+
"Produk untuk Simulasi Forward",
|
| 723 |
+
AVAILABLE_PRODUCTS,
|
| 724 |
+
index=AVAILABLE_PRODUCTS.index(product_name) if product_name in AVAILABLE_PRODUCTS else 0,
|
| 725 |
+
key="sim_product_select"
|
| 726 |
+
)
|
| 727 |
+
|
| 728 |
+
# Nilai default:
|
| 729 |
+
if sim_product == product_name:
|
| 730 |
+
default_vals = {
|
| 731 |
+
"D101330TT": float(row_dict.get("D101330TT", 95.0)),
|
| 732 |
+
"D102260TIC_CV": float(row_dict.get("D102260TIC_CV", 45.0)),
|
| 733 |
+
"D102265TIC_PV": float(row_dict.get("D102265TIC_PV", 185.0)),
|
| 734 |
+
"D102265TIC_CV": float(row_dict.get("D102265TIC_CV", 17.0)),
|
| 735 |
+
"D102266TIC": float(row_dict.get("D102266TIC", 16.0)),
|
| 736 |
+
"D101264FTSCL": float(row_dict.get("D101264FTSCL", 3800.0)),
|
| 737 |
+
}
|
| 738 |
+
else:
|
| 739 |
+
default_vals = {
|
| 740 |
+
"D101330TT": 95.0,
|
| 741 |
+
"D102260TIC_CV": 45.0,
|
| 742 |
+
"D102265TIC_PV": 185.0,
|
| 743 |
+
"D102265TIC_CV": 17.0,
|
| 744 |
+
"D102266TIC": 16.0,
|
| 745 |
+
"D101264FTSCL": 3800.0,
|
| 746 |
+
}
|
| 747 |
+
|
| 748 |
+
s1, s2, s3 = st.columns(3)
|
| 749 |
+
with s1:
|
| 750 |
+
sim_D101330TT = st.number_input(
|
| 751 |
+
"D101330TT",
|
| 752 |
+
value=default_vals["D101330TT"],
|
| 753 |
+
format="%.4f",
|
| 754 |
+
key="sim_D101330TT"
|
| 755 |
+
)
|
| 756 |
+
sim_D102265TIC_PV = st.number_input(
|
| 757 |
+
"D102265TIC_PV",
|
| 758 |
+
value=default_vals["D102265TIC_PV"],
|
| 759 |
+
format="%.4f",
|
| 760 |
+
key="sim_D102265TIC_PV"
|
| 761 |
+
)
|
| 762 |
+
with s2:
|
| 763 |
+
sim_D102260TIC_CV = st.number_input(
|
| 764 |
+
"D102260TIC_CV",
|
| 765 |
+
value=default_vals["D102260TIC_CV"],
|
| 766 |
+
format="%.4f",
|
| 767 |
+
key="sim_D102260TIC_CV"
|
| 768 |
+
)
|
| 769 |
+
sim_D102265TIC_CV = st.number_input(
|
| 770 |
+
"D102265TIC_CV",
|
| 771 |
+
value=default_vals["D102265TIC_CV"],
|
| 772 |
+
format="%.4f",
|
| 773 |
+
key="sim_D102265TIC_CV"
|
| 774 |
+
)
|
| 775 |
+
with s3:
|
| 776 |
+
sim_D102266TIC = st.number_input(
|
| 777 |
+
"D102266TIC",
|
| 778 |
+
value=default_vals["D102266TIC"],
|
| 779 |
+
format="%.4f",
|
| 780 |
+
key="sim_D102266TIC"
|
| 781 |
+
)
|
| 782 |
+
sim_D101264FTSCL = st.number_input(
|
| 783 |
+
"D101264FTSCL",
|
| 784 |
+
value=default_vals["D101264FTSCL"],
|
| 785 |
+
format="%.4f",
|
| 786 |
+
key="sim_D101264FTSCL"
|
| 787 |
+
)
|
| 788 |
+
|
| 789 |
+
sim_btn = st.button(
|
| 790 |
+
"▶️ Jalankan Simulasi Prediksi GAS (MMBTU)",
|
| 791 |
+
type="primary",
|
| 792 |
+
use_container_width=True,
|
| 793 |
+
key="sim_forward_btn"
|
| 794 |
+
)
|
| 795 |
+
|
| 796 |
+
if sim_btn:
|
| 797 |
+
sim_input = {
|
| 798 |
+
"D101330TT": sim_D101330TT,
|
| 799 |
+
"D102260TIC_CV": sim_D102260TIC_CV,
|
| 800 |
+
"D102265TIC_PV": sim_D102265TIC_PV,
|
| 801 |
+
"D102265TIC_CV": sim_D102265TIC_CV,
|
| 802 |
+
"D102266TIC": sim_D102266TIC,
|
| 803 |
+
"D101264FTSCL": sim_D101264FTSCL,
|
| 804 |
+
}
|
| 805 |
+
|
| 806 |
+
try:
|
| 807 |
+
y_sim = predict_forward_from_params(sim_product, sim_input, model_folder)
|
| 808 |
+
|
| 809 |
+
diff_from_target = None
|
| 810 |
+
if (sim_product == product_name) and (target_input is not None):
|
| 811 |
+
diff_from_target = float(y_sim) - float(target_input)
|
| 812 |
+
|
| 813 |
+
st.success(f"✅ Simulasi prediksi konsumsi GAS untuk produk **{sim_product}** berhasil.")
|
| 814 |
+
c_res1, c_res2 = st.columns(2)
|
| 815 |
+
with c_res1:
|
| 816 |
+
st.metric(
|
| 817 |
+
"Prediksi Konsumsi GAS (MMBTU)",
|
| 818 |
+
f"{float(y_sim):.6f}"
|
| 819 |
+
)
|
| 820 |
+
with c_res2:
|
| 821 |
+
if diff_from_target is not None:
|
| 822 |
+
st.metric(
|
| 823 |
+
"Selisih terhadap Target Inverse",
|
| 824 |
+
f"{diff_from_target:+.6f}"
|
| 825 |
+
)
|
| 826 |
+
else:
|
| 827 |
+
st.caption(
|
| 828 |
+
"Selisih terhadap target hanya dihitung jika produk simulasi sama dengan produk inverse terakhir."
|
| 829 |
+
)
|
| 830 |
+
|
| 831 |
+
except FileNotFoundError as e:
|
| 832 |
+
st.error(str(e))
|
| 833 |
+
except Exception as e:
|
| 834 |
+
st.error(f"Terjadi error saat menjalankan simulasi forward modelling: {e}")
|
| 835 |
+
|
| 836 |
+
|
| 837 |
+
def page_monitoring_model():
|
| 838 |
+
st.subheader("3️⃣ Evaluasi Performa Model Prediksi Gas (MMBTU) per Produk")
|
| 839 |
+
|
| 840 |
+
st.markdown(
|
| 841 |
+
"""
|
| 842 |
+
Halaman ini menampilkan **ringkasan metrik performa model XGBoost** untuk setiap produk,
|
| 843 |
+
serta **grafik perbandingan Actual vs Predicted GAS_MMBTU**.
|
| 844 |
+
|
| 845 |
+
Kamu bisa:
|
| 846 |
+
- Menggunakan **dataset default** dari path lokal, atau
|
| 847 |
+
- Meng-upload **dataset terbaru (CSV)** untuk dievaluasi dengan model yang sama.
|
| 848 |
+
"""
|
| 849 |
+
)
|
| 850 |
+
|
| 851 |
+
# --- Konfigurasi sumber data & model ---
|
| 852 |
+
st.markdown("#### ⚙️ Konfigurasi Sumber Data & Model")
|
| 853 |
+
col1, col2 = st.columns(2)
|
| 854 |
+
with col1:
|
| 855 |
+
data_path = st.text_input(
|
| 856 |
+
"Path Data Disaggregated (default)",
|
| 857 |
+
value=DATA_FILENAME,
|
| 858 |
+
help="Dipakai jika tidak ada file yang di-upload."
|
| 859 |
+
)
|
| 860 |
+
with col2:
|
| 861 |
+
model_dir = st.text_input(
|
| 862 |
+
"Folder Model Checkpoint",
|
| 863 |
+
value=MODEL_FOLDER,
|
| 864 |
+
help="Folder berisi file model_checkpoint_xgb_{PRODUCT}.joblib"
|
| 865 |
+
)
|
| 866 |
+
|
| 867 |
+
st.markdown("#### 📂 Upload Dataset Terbaru (Opsional)")
|
| 868 |
+
uploaded_file = st.file_uploader(
|
| 869 |
+
"Upload file CSV baru (struktur kolom harus sama dengan dataset sebelumnya)",
|
| 870 |
+
type=["csv"]
|
| 871 |
+
)
|
| 872 |
+
|
| 873 |
+
run_btn = st.button("🔎 Run Evaluation", type="primary", use_container_width=True)
|
| 874 |
+
|
| 875 |
+
if not run_btn:
|
| 876 |
+
st.info(
|
| 877 |
+
"• Upload dataset baru (opsional), lalu klik **Run Evaluation**\n\n"
|
| 878 |
+
"• Jika tidak upload apa-apa, sistem akan menggunakan **dataset default**"
|
| 879 |
+
)
|
| 880 |
+
return
|
| 881 |
+
|
| 882 |
+
# --- Siapkan data_df (jika ada upload) ---
|
| 883 |
+
data_df = None
|
| 884 |
+
if uploaded_file is not None:
|
| 885 |
+
try:
|
| 886 |
+
data_df = pd.read_csv(uploaded_file)
|
| 887 |
+
st.success("✅ Dataset baru berhasil dibaca dan akan digunakan untuk evaluasi.")
|
| 888 |
+
except Exception as e:
|
| 889 |
+
st.error(f"❌ Gagal membaca file CSV yang di-upload: {e}")
|
| 890 |
+
return
|
| 891 |
+
else:
|
| 892 |
+
st.warning("Tidak ada file yang di-upload. Sistem akan menggunakan dataset default dari path di atas.")
|
| 893 |
+
|
| 894 |
+
# --- Jalankan evaluasi ---
|
| 895 |
+
with st.spinner("Menghitung metrik performa dan menyiapkan grafik..."):
|
| 896 |
+
summary_df, product_figs = evaluate_models_for_dashboard(
|
| 897 |
+
data_path=data_path, # tetap dikirim sebagai fallback
|
| 898 |
+
model_dir=model_dir,
|
| 899 |
+
products=PRODUCT_LIST,
|
| 900 |
+
features=FEATURES,
|
| 901 |
+
target_col=TARGET_COLUMN,
|
| 902 |
+
data_df=data_df # <<-- NEW: jika None → pakai data_path
|
| 903 |
+
)
|
| 904 |
+
|
| 905 |
+
if summary_df.empty:
|
| 906 |
+
st.warning("Tidak ada hasil evaluasi yang dapat ditampilkan. Periksa kembali data dan model.")
|
| 907 |
+
return
|
| 908 |
+
|
| 909 |
+
# =====================================================
|
| 910 |
+
# BAGIAN 1 – Tabel Ringkasan Performa
|
| 911 |
+
# =====================================================
|
| 912 |
+
st.markdown("### 📊 Ringkasan Performa Model")
|
| 913 |
+
|
| 914 |
+
df_display = summary_df.copy()
|
| 915 |
+
df_display["R²"] = df_display["R²"].round(3)
|
| 916 |
+
df_display["RMSE"] = df_display["RMSE"].round(3)
|
| 917 |
+
df_display["MAE"] = df_display["MAE"].round(3)
|
| 918 |
+
|
| 919 |
+
def color_r2(val):
|
| 920 |
+
try:
|
| 921 |
+
v = float(val)
|
| 922 |
+
except Exception:
|
| 923 |
+
return ""
|
| 924 |
+
if v >= 0.90:
|
| 925 |
+
return "background-color: #d4edda; color: #155724;" # hijau
|
| 926 |
+
elif v >= 0.80:
|
| 927 |
+
return "background-color: #cce5ff; color: #004085;" # biru muda
|
| 928 |
+
else:
|
| 929 |
+
return "background-color: #fff3cd; color: #856404;" # kuning
|
| 930 |
+
|
| 931 |
+
styled = (
|
| 932 |
+
df_display.style
|
| 933 |
+
.applymap(color_r2, subset=["R²"])
|
| 934 |
+
.format({"R²": "{:.3f}", "RMSE": "{:.3f}", "MAE": "{:.3f}"})
|
| 935 |
+
)
|
| 936 |
+
|
| 937 |
+
st.dataframe(styled, use_container_width=True)
|
| 938 |
+
|
| 939 |
+
# =====================================================
|
| 940 |
+
# BAGIAN 2 – Grafik Actual vs Predicted per Produk
|
| 941 |
+
# =====================================================
|
| 942 |
+
st.markdown("### 📈 Grafik Actual vs Predicted per Produk")
|
| 943 |
+
|
| 944 |
+
tabs = st.tabs(PRODUCT_LIST)
|
| 945 |
+
|
| 946 |
+
for i, product in enumerate(PRODUCT_LIST):
|
| 947 |
+
with tabs[i]:
|
| 948 |
+
st.subheader(f"Actual vs Predicted GAS_MMBTU – {product}")
|
| 949 |
+
|
| 950 |
+
row = summary_df[summary_df["Product"] == product]
|
| 951 |
+
if not row.empty:
|
| 952 |
+
r2 = row["R²"].values[0]
|
| 953 |
+
rmse = row["RMSE"].values[0]
|
| 954 |
+
mae = row["MAE"].values[0]
|
| 955 |
+
st.caption(f"R² = {r2:.3f} | RMSE = {rmse:.3f} | MAE = {mae:.3f}")
|
| 956 |
+
|
| 957 |
+
fig = product_figs.get(product)
|
| 958 |
+
if fig is not None:
|
| 959 |
+
st.pyplot(fig, use_container_width=True)
|
| 960 |
+
else:
|
| 961 |
+
st.info("Tidak ada grafik untuk produk ini (mungkin data/model tidak tersedia).")
|
| 962 |
+
|
| 963 |
+
|
| 964 |
+
|
| 965 |
+
def page_eda():
|
| 966 |
+
st.subheader("4️⃣ Exploratory Data Analysis (EDA) – Data Konsumsi Gas Spray Dryer")
|
| 967 |
+
|
| 968 |
+
st.markdown(
|
| 969 |
+
"""
|
| 970 |
+
Halaman ini digunakan untuk melakukan **Exploratory Data Analysis (EDA)** terhadap
|
| 971 |
+
dataset disagregasi spray dryer yang sama dengan halaman **Monitoring Model**.
|
| 972 |
+
|
| 973 |
+
Kamu bisa:
|
| 974 |
+
- Menggunakan **dataset default** dari path lokal, atau
|
| 975 |
+
- Meng-upload **dataset baru (CSV)** untuk dianalisis.
|
| 976 |
+
"""
|
| 977 |
+
)
|
| 978 |
+
|
| 979 |
+
# ------------------------------------------------------
|
| 980 |
+
# 1. Konfigurasi sumber data
|
| 981 |
+
# ------------------------------------------------------
|
| 982 |
+
st.markdown("#### ⚙️ Konfigurasi Sumber Data")
|
| 983 |
+
|
| 984 |
+
col1, col2 = st.columns(2)
|
| 985 |
+
with col1:
|
| 986 |
+
data_path = st.text_input(
|
| 987 |
+
"Path Data Disaggregated (default)",
|
| 988 |
+
value=DATA_FILENAME,
|
| 989 |
+
help="Dipakai jika tidak ada file yang di-upload."
|
| 990 |
+
)
|
| 991 |
+
with col2:
|
| 992 |
+
st.caption(
|
| 993 |
+
"Pastikan struktur kolom sama dengan dataset yang digunakan di halaman **Monitoring Model** "
|
| 994 |
+
"(minimal kolom: `Date_time`, `Product`, parameter proses, dan kolom gas)."
|
| 995 |
+
)
|
| 996 |
+
|
| 997 |
+
st.markdown("#### 📂 Upload Dataset untuk EDA (Opsional)")
|
| 998 |
+
uploaded_file = st.file_uploader(
|
| 999 |
+
"Upload file CSV untuk dianalisis",
|
| 1000 |
+
type=["csv"],
|
| 1001 |
+
key="eda_upload",
|
| 1002 |
+
)
|
| 1003 |
+
|
| 1004 |
+
run_btn = st.button(
|
| 1005 |
+
"🔁 Refresh / Jalankan EDA",
|
| 1006 |
+
type="primary",
|
| 1007 |
+
use_container_width=True,
|
| 1008 |
+
key="eda_run_btn",
|
| 1009 |
+
)
|
| 1010 |
+
|
| 1011 |
+
if not run_btn:
|
| 1012 |
+
st.info(
|
| 1013 |
+
"• Upload dataset baru (opsional), kemudian klik **Refresh / Jalankan EDA**.\n\n"
|
| 1014 |
+
"• Jika tidak upload apa-apa, sistem akan menggunakan **dataset default** dari path di atas."
|
| 1015 |
+
)
|
| 1016 |
+
return
|
| 1017 |
+
|
| 1018 |
+
# ------------------------------------------------------
|
| 1019 |
+
# 2. Load data (upload > default path)
|
| 1020 |
+
# ------------------------------------------------------
|
| 1021 |
+
if uploaded_file is not None:
|
| 1022 |
+
try:
|
| 1023 |
+
df_raw = pd.read_csv(uploaded_file)
|
| 1024 |
+
st.success("✅ Dataset baru berhasil dibaca dan akan digunakan untuk EDA.")
|
| 1025 |
+
except Exception as e:
|
| 1026 |
+
st.error(f"❌ Gagal membaca file CSV yang di-upload: {e}")
|
| 1027 |
+
return
|
| 1028 |
+
else:
|
| 1029 |
+
try:
|
| 1030 |
+
df_raw = pd.read_csv(data_path)
|
| 1031 |
+
st.warning("Tidak ada file yang di-upload. Menggunakan dataset default dari path di atas.")
|
| 1032 |
+
except Exception as e:
|
| 1033 |
+
st.error(f"❌ Gagal membaca dataset default dari `{data_path}`: {e}")
|
| 1034 |
+
return
|
| 1035 |
+
|
| 1036 |
+
if df_raw.empty:
|
| 1037 |
+
st.error("Dataset kosong. Tidak ada yang bisa dianalisis.")
|
| 1038 |
+
return
|
| 1039 |
+
|
| 1040 |
+
# Pastikan kolom Date_time ada dan dikonversi
|
| 1041 |
+
if "Date_time" in df_raw.columns:
|
| 1042 |
+
df_raw["Date_time"] = pd.to_datetime(df_raw["Date_time"], errors="coerce")
|
| 1043 |
+
else:
|
| 1044 |
+
st.warning("Kolom 'Date_time' tidak ditemukan. EDA tetap berjalan, tapi fitur berbasis waktu terbatas.")
|
| 1045 |
+
|
| 1046 |
+
# ------------------------------------------------------
|
| 1047 |
+
# 3. Data Summary (metric cards)
|
| 1048 |
+
# ------------------------------------------------------
|
| 1049 |
+
PROCESS_PARAMS = [
|
| 1050 |
+
"D101330TT",
|
| 1051 |
+
"D102260TIC_CV",
|
| 1052 |
+
"D102265TIC_PV",
|
| 1053 |
+
"D102265TIC_CV",
|
| 1054 |
+
"D102266TIC",
|
| 1055 |
+
"D101264FTSCL",
|
| 1056 |
+
]
|
| 1057 |
+
|
| 1058 |
+
summary = compute_eda_summary(df_raw, date_col="Date_time", product_col="Product")
|
| 1059 |
+
|
| 1060 |
+
date_min = summary["date_min"]
|
| 1061 |
+
date_max = summary["date_max"]
|
| 1062 |
+
date_range_text = "-"
|
| 1063 |
+
if pd.notna(date_min) and pd.notna(date_max):
|
| 1064 |
+
date_range_text = f"{date_min:%Y-%m-%d %H:%M} → {date_max:%Y-%m-%d %H:%M}"
|
| 1065 |
+
|
| 1066 |
+
st.markdown("### 📊 Data Summary")
|
| 1067 |
+
|
| 1068 |
+
m1, m2, m3, m4, m5 = st.columns(5)
|
| 1069 |
+
m1.metric("Total Rows", f"{summary['total_rows']:,}")
|
| 1070 |
+
m2.metric("Total Columns", f"{summary['total_columns']}")
|
| 1071 |
+
m3.metric("Total Missing Values", f"{summary['total_missing']:,}")
|
| 1072 |
+
m4.metric("Jumlah Unique Product", f"{summary['unique_products']}")
|
| 1073 |
+
m5.metric("Data Duplikat", f"{summary['duplicate_rows']:,}" if "duplicate_rows" in summary else "-")
|
| 1074 |
+
|
| 1075 |
+
with st.expander("📦 Detail Jumlah Data per Produk"):
|
| 1076 |
+
if summary["product_counts"]:
|
| 1077 |
+
product_count_df = (
|
| 1078 |
+
pd.DataFrame(list(summary["product_counts"].items()), columns=["Product", "Count"])
|
| 1079 |
+
.sort_values("Product")
|
| 1080 |
+
)
|
| 1081 |
+
st.dataframe(product_count_df, use_container_width=True)
|
| 1082 |
+
else:
|
| 1083 |
+
st.write("Tidak ada kolom 'Product' atau tidak ada data produk.")
|
| 1084 |
+
|
| 1085 |
+
# ------------------------------------------------------
|
| 1086 |
+
# 3b. Anomali data (error validasi / anomaly_df)
|
| 1087 |
+
# ------------------------------------------------------
|
| 1088 |
+
st.markdown("---")
|
| 1089 |
+
st.markdown("### ⚠️ Ringkasan Anomali Data")
|
| 1090 |
+
|
| 1091 |
+
anomaly_df = compute_anomaly_table(df_raw, product_col="Product")
|
| 1092 |
+
if anomaly_df.empty:
|
| 1093 |
+
st.info("Tidak ditemukan anomali berdasarkan rule yang didefinisikan.")
|
| 1094 |
+
else:
|
| 1095 |
+
st.dataframe(anomaly_df, use_container_width=True)
|
| 1096 |
+
|
| 1097 |
+
st.markdown("---")
|
| 1098 |
+
|
| 1099 |
+
# ------------------------------------------------------
|
| 1100 |
+
# 4. Segmen produksi (precompute sekali)
|
| 1101 |
+
# ------------------------------------------------------
|
| 1102 |
+
segments_df = compute_production_segments(df_raw, product_col="Product", time_col="Date_time")
|
| 1103 |
+
|
| 1104 |
+
# ------------------------------------------------------
|
| 1105 |
+
# 5. Tabs per produk (All Data + tiap produk)
|
| 1106 |
+
# ------------------------------------------------------
|
| 1107 |
+
st.markdown("### 🔍 EDA per Produk")
|
| 1108 |
+
|
| 1109 |
+
product_counts = summary["product_counts"]
|
| 1110 |
+
product_names = list(product_counts.keys()) if product_counts else []
|
| 1111 |
+
|
| 1112 |
+
# Urutan prioritas tab seperti requirement
|
| 1113 |
+
PRIORITY_PRODUCTS = ["BMR BASE", "CKP BASE", "CKR BASE", "CMR BASE", "MORIGRO BASE"]
|
| 1114 |
+
|
| 1115 |
+
ordered_products = (
|
| 1116 |
+
[p for p in PRIORITY_PRODUCTS if p in product_names]
|
| 1117 |
+
+ [p for p in sorted(product_names) if p not in PRIORITY_PRODUCTS]
|
| 1118 |
+
)
|
| 1119 |
+
|
| 1120 |
+
tab_labels = ["All Data"] + ordered_products
|
| 1121 |
+
tabs = st.tabs(tab_labels)
|
| 1122 |
+
|
| 1123 |
+
def highlight_min_max_rows(row):
|
| 1124 |
+
if row.name == "min":
|
| 1125 |
+
return ["background-color: #f8d7da"] * len(row) # merah muda
|
| 1126 |
+
elif row.name == "max":
|
| 1127 |
+
return ["background-color: #d4edda"] * len(row) # hijau muda
|
| 1128 |
+
return [""] * len(row)
|
| 1129 |
+
|
| 1130 |
+
for idx, label in enumerate(tab_labels):
|
| 1131 |
+
with tabs[idx]:
|
| 1132 |
+
if label == "All Data":
|
| 1133 |
+
df_tab = df_raw.copy()
|
| 1134 |
+
title_suffix = "All Data"
|
| 1135 |
+
else:
|
| 1136 |
+
df_tab = df_raw[df_raw["Product"] == label].copy()
|
| 1137 |
+
title_suffix = label
|
| 1138 |
+
|
| 1139 |
+
if df_tab.empty:
|
| 1140 |
+
st.warning(f"Tidak ada data untuk: **{label}**")
|
| 1141 |
+
continue
|
| 1142 |
+
|
| 1143 |
+
# 1️⃣ Distribusi parameter utama (2x3 grid, matplotlib)
|
| 1144 |
+
st.markdown(f"#### 1️⃣ Distribusi Parameter Proses – {title_suffix}")
|
| 1145 |
+
fig_dist = create_line_plots(
|
| 1146 |
+
df_tab,
|
| 1147 |
+
params=PROCESS_PARAMS,
|
| 1148 |
+
product_label=title_suffix,
|
| 1149 |
+
time_col="Date_time",
|
| 1150 |
+
)
|
| 1151 |
+
st.pyplot(fig_dist, use_container_width=True)
|
| 1152 |
+
|
| 1153 |
+
# 2️⃣ Outlier detection & visualisasi (2x3 grid, matplotlib)
|
| 1154 |
+
st.markdown(f"#### 2️⃣ Outlier Detection & Visualisasi – {title_suffix}")
|
| 1155 |
+
fig_out, total_outliers, outlier_stats_df = identify_outliers(
|
| 1156 |
+
df_tab,
|
| 1157 |
+
PROCESS_PARAMS,
|
| 1158 |
+
product_label=title_suffix,
|
| 1159 |
+
time_col="Date_time",
|
| 1160 |
+
)
|
| 1161 |
+
st.pyplot(fig_out, use_container_width=True)
|
| 1162 |
+
|
| 1163 |
+
st.caption(
|
| 1164 |
+
f"Total outliers terdeteksi: **{total_outliers}** data points "
|
| 1165 |
+
"(metode IQR per parameter)."
|
| 1166 |
+
)
|
| 1167 |
+
|
| 1168 |
+
with st.expander("Klik untuk lihat detail outlier per parameter"):
|
| 1169 |
+
if not outlier_stats_df.empty:
|
| 1170 |
+
st.dataframe(outlier_stats_df, use_container_width=True)
|
| 1171 |
+
else:
|
| 1172 |
+
st.write("Tidak ada outlier terdeteksi untuk parameter yang dianalisis.")
|
| 1173 |
+
|
| 1174 |
+
# 3️⃣ Statistical description table
|
| 1175 |
+
st.markdown(f"#### 3️⃣ Statistical Description – {title_suffix}")
|
| 1176 |
+
desc_df = compute_stats_table(df_tab, PROCESS_PARAMS, target_col=TARGET_COLUMN)
|
| 1177 |
+
|
| 1178 |
+
if desc_df.empty:
|
| 1179 |
+
st.info("Tidak ada kolom numerik yang cukup untuk dihitung statistik deskriptif.")
|
| 1180 |
+
else:
|
| 1181 |
+
styled_desc = (
|
| 1182 |
+
desc_df.style
|
| 1183 |
+
.format("{:.3f}")
|
| 1184 |
+
.apply(highlight_min_max_rows, axis=0)
|
| 1185 |
+
)
|
| 1186 |
+
st.dataframe(styled_desc, use_container_width=True)
|
| 1187 |
+
|
| 1188 |
+
# 4️⃣ Segmen produksi per produk
|
| 1189 |
+
st.markdown(f"#### 4️⃣ Segmen Produksi – {title_suffix}")
|
| 1190 |
+
|
| 1191 |
+
if segments_df.empty or "Product" not in segments_df.columns:
|
| 1192 |
+
st.info("Segmen produksi tidak tersedia (kolom waktu/produk tidak lengkap).")
|
| 1193 |
+
else:
|
| 1194 |
+
if label == "All Data":
|
| 1195 |
+
seg_to_show = segments_df.copy()
|
| 1196 |
+
else:
|
| 1197 |
+
seg_to_show = segments_df[segments_df["Product"] == label].copy()
|
| 1198 |
+
|
| 1199 |
+
if seg_to_show.empty:
|
| 1200 |
+
st.info(f"Tidak ada segmen produksi untuk {title_suffix}.")
|
| 1201 |
+
else:
|
| 1202 |
+
seg_to_show = seg_to_show.sort_values(["Product", "Start_Time"])
|
| 1203 |
+
seg_to_show["Duration_Minutes"] = seg_to_show["Duration_Minutes"].round(1)
|
| 1204 |
+
st.dataframe(
|
| 1205 |
+
seg_to_show[["Product", "Start_Time", "End_Time", "Duration_Minutes", "Data_Points"]],
|
| 1206 |
+
use_container_width=True,
|
| 1207 |
+
)
|
| 1208 |
+
def disagregasi_data():
|
| 1209 |
+
st.subheader("5️⃣ Disagregasi Konsumsi Gas Spray Dryer")
|
| 1210 |
+
|
| 1211 |
+
st.markdown(
|
| 1212 |
+
"""
|
| 1213 |
+
Halaman ini digunakan untuk mengubah data konsumsi gas **per jam** menjadi
|
| 1214 |
+
data **per menit** menggunakan **disagregasi berbasis bobot indikator proses**
|
| 1215 |
+
(weight split proporsional).
|
| 1216 |
+
|
| 1217 |
+
**Alur:**
|
| 1218 |
+
1. Upload file CSV (struktur sama dengan data training, minimal kolom:
|
| 1219 |
+
`Date_time`, `fixed_rounded_time`, `GAS_MMBTU`, dan 6 indikator proses).
|
| 1220 |
+
2. Klik tombol **Jalankan Proses Disagregasi Sekarang**.
|
| 1221 |
+
3. Sistem menjalankan pipeline disagregasi dan menampilkan ringkasan + hasil dalam bentuk CSV yang bisa diunduh.
|
| 1222 |
+
"""
|
| 1223 |
+
)
|
| 1224 |
+
|
| 1225 |
+
st.markdown("### 📂 Upload Data Sumber (Per Jam)")
|
| 1226 |
+
|
| 1227 |
+
uploaded_file = st.file_uploader(
|
| 1228 |
+
"Unggah file CSV data konsumsi gas (per jam) untuk didisagregasi",
|
| 1229 |
+
type="csv",
|
| 1230 |
+
key="disagg_upload",
|
| 1231 |
+
)
|
| 1232 |
+
|
| 1233 |
+
if uploaded_file is None:
|
| 1234 |
+
st.info(
|
| 1235 |
+
"Silakan unggah file CSV terlebih dahulu.\n\n"
|
| 1236 |
+
"Pastikan file memiliki kolom:\n"
|
| 1237 |
+
"- `Date_time`\n"
|
| 1238 |
+
"- `fixed_rounded_time` (timestamp jam, misal: 2025-03-18 01:00:00)\n"
|
| 1239 |
+
"- `GAS_MMBTU` (total gas per jam)\n"
|
| 1240 |
+
"- 6 indikator proses: `D101330TT`, `D102260TIC_CV`, `D102265TIC_PV`, "
|
| 1241 |
+
"`D102265TIC_CV`, `D102266TIC`, `D101264FTSCL`"
|
| 1242 |
+
)
|
| 1243 |
+
return
|
| 1244 |
+
|
| 1245 |
+
# Tombol utama
|
| 1246 |
+
run_btn = st.button(
|
| 1247 |
+
"🚀 Jalankan Proses Disagregasi Sekarang",
|
| 1248 |
+
type="primary",
|
| 1249 |
+
use_container_width=True,
|
| 1250 |
+
key="disagg_run_btn",
|
| 1251 |
+
)
|
| 1252 |
+
|
| 1253 |
+
if not run_btn:
|
| 1254 |
+
return
|
| 1255 |
+
|
| 1256 |
+
# Jalankan pipeline
|
| 1257 |
+
with st.spinner("Sedang melakukan disagregasi... Mohon tunggu 10–20 detik"):
|
| 1258 |
+
df_result, report = run_disagregasi_pipeline(uploaded_file, min_minutes_threshold=50)
|
| 1259 |
+
|
| 1260 |
+
if df_result.empty:
|
| 1261 |
+
st.error("❌ Proses disagregasi gagal atau tidak ada jam yang memenuhi kriteria validasi.")
|
| 1262 |
+
reason = report.get("reason", "")
|
| 1263 |
+
if reason == "no_valid_hours":
|
| 1264 |
+
st.warning(
|
| 1265 |
+
"Tidak ada jam dengan jumlah menit ≥ ambang batas (default 50 menit). "
|
| 1266 |
+
"Silakan cek kembali data input."
|
| 1267 |
+
)
|
| 1268 |
+
return
|
| 1269 |
+
|
| 1270 |
+
st.markdown("## ✅ SELESAI! Proses Disagregasi Berhasil")
|
| 1271 |
+
|
| 1272 |
+
# ===============================
|
| 1273 |
+
# RINGKASAN EKSEKUSI PIPELINE
|
| 1274 |
+
# ===============================
|
| 1275 |
+
st.markdown("### 📊 Ringkasan Eksekusi Pipeline")
|
| 1276 |
+
|
| 1277 |
+
total_jam = report.get("total_jam_input", 0)
|
| 1278 |
+
jam_valid = report.get("jam_valid", 0)
|
| 1279 |
+
jam_tidak_valid = report.get("jam_tidak_valid", 0)
|
| 1280 |
+
total_baris_input = report.get("total_baris_input", 0)
|
| 1281 |
+
total_baris_diproses = report.get("total_baris_diproses", 0)
|
| 1282 |
+
persen_diproses = report.get("persentase_data_diproses", 0.0)
|
| 1283 |
+
total_selisih = report.get("total_selisih_disagregasi", 0.0)
|
| 1284 |
+
|
| 1285 |
+
cols = st.columns(6)
|
| 1286 |
+
cols[0].metric("Total jam dalam dataset", f"{total_jam}")
|
| 1287 |
+
cols[1].metric("Jam valid (diproses)", f"{jam_valid}", "Success")
|
| 1288 |
+
cols[2].metric("Jam tidak valid (skip)", f"{jam_tidak_valid}", "Warning")
|
| 1289 |
+
cols[3].metric(
|
| 1290 |
+
"Total baris yang diproses",
|
| 1291 |
+
f"{total_baris_diproses:,} / {total_baris_input:,}"
|
| 1292 |
+
)
|
| 1293 |
+
cols[4].metric(
|
| 1294 |
+
"Persentase data yang digunakan",
|
| 1295 |
+
f"{persen_diproses:.2f}%",
|
| 1296 |
+
"Success" if persen_diproses >= 95 else ""
|
| 1297 |
+
)
|
| 1298 |
+
cols[5].metric(
|
| 1299 |
+
"Ak. disagregasi (total selisih)",
|
| 1300 |
+
f"{total_selisih:.10f}",
|
| 1301 |
+
"Success" if total_selisih < 1e-8 else "Periksa"
|
| 1302 |
+
)
|
| 1303 |
+
|
| 1304 |
+
# ===============================
|
| 1305 |
+
# TAHAP-Tahap Proses
|
| 1306 |
+
# ===============================
|
| 1307 |
+
st.markdown("### 🧩 Tahap-Tahap Proses Disagregasi")
|
| 1308 |
+
|
| 1309 |
+
# Tahap 1: Validasi Jam Produksi
|
| 1310 |
+
st.success("**Tahap 1: Validasi Jam Produksi**")
|
| 1311 |
+
st.write(
|
| 1312 |
+
f"- Total jam dalam dataset: **{total_jam} jam**\n"
|
| 1313 |
+
f"- Jam valid (diproses): **{jam_valid} jam**\n"
|
| 1314 |
+
f"- Jam tidak valid (di-skip karena durasi < 50 menit): **{jam_tidak_valid} jam**"
|
| 1315 |
+
)
|
| 1316 |
+
|
| 1317 |
+
jam_tidak_valid_detail = report.get("jam_tidak_valid_detail")
|
| 1318 |
+
if jam_tidak_valid_detail is not None and not jam_tidak_valid_detail.empty:
|
| 1319 |
+
with st.expander("📋 Lihat Daftar Jam Tidak Valid"):
|
| 1320 |
+
jam_tidak_valid_detail_sorted = jam_tidak_valid_detail.sort_values(
|
| 1321 |
+
"Jumlah_Menit"
|
| 1322 |
+
)
|
| 1323 |
+
st.dataframe(jam_tidak_valid_detail_sorted, use_container_width=True)
|
| 1324 |
+
|
| 1325 |
+
# Tahap 2 & 3: Perhitungan Bobot & Disagregasi
|
| 1326 |
+
st.info("**Tahap 2 & 3: Perhitungan Bobot & Disagregasi**")
|
| 1327 |
+
st.write(
|
| 1328 |
+
"- Indikator proses yang digunakan:\n"
|
| 1329 |
+
" `D101330TT`, `D102260TIC_CV`, `D102265TIC_PV`, "
|
| 1330 |
+
"`D102265TIC_CV`, `D102266TIC`, `D101264FTSCL`"
|
| 1331 |
+
)
|
| 1332 |
+
weight_min = report.get("weight_min", None)
|
| 1333 |
+
weight_max = report.get("weight_max", None)
|
| 1334 |
+
if weight_min is not None and weight_max is not None:
|
| 1335 |
+
st.write(
|
| 1336 |
+
f"- Rentang bobot (w_m): **{weight_min:.4f} → {weight_max:.4f}**\n"
|
| 1337 |
+
"- Disagregasi selesai dilakukan untuk seluruh jam valid."
|
| 1338 |
+
)
|
| 1339 |
+
|
| 1340 |
+
# Tahap 4: Validasi Akurasi
|
| 1341 |
+
st.success("**Tahap 4: Validasi Akurasi (Kunci Keberhasilan!)**")
|
| 1342 |
+
validation_df = report.get("validation_df")
|
| 1343 |
+
if validation_df is not None and not validation_df.empty:
|
| 1344 |
+
max_diff = validation_df["Difference"].abs().max()
|
| 1345 |
+
st.write(
|
| 1346 |
+
f"- Total jam divalidasi: **{len(validation_df)} jam**\n"
|
| 1347 |
+
f"- Total selisih (original vs hasil): **{total_selisih:.10f}**\n"
|
| 1348 |
+
f"- Selisih maksimum per jam: **{max_diff:.10f}**"
|
| 1349 |
+
)
|
| 1350 |
+
if total_selisih < 1e-8:
|
| 1351 |
+
st.success("→ **VALIDASI BERHASIL: Total gas terjaga SEMPURNA (akurasi ~100%)**")
|
| 1352 |
+
|
| 1353 |
+
with st.expander("🔎 Lihat Laporan Validasi Lengkap per Jam"):
|
| 1354 |
+
st.dataframe(validation_df, use_container_width=True)
|
| 1355 |
+
|
| 1356 |
+
# Tahap 5: Output Final
|
| 1357 |
+
st.info("**Tahap 5: Output Final**")
|
| 1358 |
+
n_rows, n_cols = df_result.shape
|
| 1359 |
+
st.write(
|
| 1360 |
+
f"- Dimensi data hasil: **{n_rows:,} baris × {n_cols} kolom**\n"
|
| 1361 |
+
"- Kolom baru: `GAS_MMBTU_Disaggregated`\n"
|
| 1362 |
+
"- Semua kolom asli tetap dipertahankan."
|
| 1363 |
+
)
|
| 1364 |
+
|
| 1365 |
+
# ===============================
|
| 1366 |
+
# Analisis Hasil Disagregasi
|
| 1367 |
+
# ===============================
|
| 1368 |
+
st.markdown("### 📈 Analisis Hasil Disagregasi")
|
| 1369 |
+
|
| 1370 |
+
stats = report.get("gas_disagg_stats", None)
|
| 1371 |
+
if stats is not None:
|
| 1372 |
+
st.markdown("**Statistik Konsumsi Gas per Menit (Setelah Disagregasi)**")
|
| 1373 |
+
st.markdown(
|
| 1374 |
+
"```text\n"
|
| 1375 |
+
f"Mean : {stats['mean']:.3f} MMBTU/menit\n"
|
| 1376 |
+
f"Std : {stats['std']:.3f}\n"
|
| 1377 |
+
f"Min : {stats['min']:.3f}\n"
|
| 1378 |
+
f"25% : {stats['25%']:.3f}\n"
|
| 1379 |
+
f"50% : {stats['50%']:.3f}\n"
|
| 1380 |
+
f"75% : {stats['75%']:.3f}\n"
|
| 1381 |
+
f"Max : {stats['max']:.3f}\n"
|
| 1382 |
+
"```"
|
| 1383 |
+
)
|
| 1384 |
+
|
| 1385 |
+
top_hours = report.get("top_hours", None)
|
| 1386 |
+
if top_hours is not None and not top_hours.empty:
|
| 1387 |
+
st.markdown("**10 Jam dengan Konsumsi Gas Tertinggi (Total per Jam)**")
|
| 1388 |
+
df_top = top_hours.reset_index()
|
| 1389 |
+
df_top.columns = ["fixed_rounded_time", "Total_GAS_MMBTU"]
|
| 1390 |
+
st.dataframe(df_top, use_container_width=True)
|
| 1391 |
+
|
| 1392 |
+
zero_hours = report.get("zero_hours", None)
|
| 1393 |
+
if zero_hours is not None and not zero_hours.empty:
|
| 1394 |
+
st.markdown("**Jam dengan Konsumsi Gas = 0 (Kemungkinan Shutdown)**")
|
| 1395 |
+
df_zero = zero_hours.reset_index()
|
| 1396 |
+
df_zero.columns = ["fixed_rounded_time", "Total_GAS_MMBTU"]
|
| 1397 |
+
st.dataframe(df_zero, use_container_width=True)
|
| 1398 |
+
|
| 1399 |
+
# ===============================
|
| 1400 |
+
# Tombol Aksi
|
| 1401 |
+
# ===============================
|
| 1402 |
+
st.markdown("### 📥 Aksi Lanjutan")
|
| 1403 |
+
|
| 1404 |
+
csv_bytes = df_result.to_csv(index=False).encode("utf-8")
|
| 1405 |
+
st.download_button(
|
| 1406 |
+
"📥 Download Data Hasil Disagregasi (CSV)",
|
| 1407 |
+
data=csv_bytes,
|
| 1408 |
+
file_name="hasil_disagregasi_spraydryer.csv",
|
| 1409 |
+
mime="text/csv",
|
| 1410 |
+
use_container_width=True,
|
| 1411 |
+
)
|
| 1412 |
+
|
| 1413 |
+
def filter_rule_engine():
|
| 1414 |
+
st.title("Filter Data dengan Rule Engine")
|
| 1415 |
+
st.markdown("**Upload file hasil disagregasi → Otomatis deteksi & hapus anomali → Download data bersih**")
|
| 1416 |
+
|
| 1417 |
+
uploaded_file = st.file_uploader("Upload file CSV hasil disagregasi", type="csv")
|
| 1418 |
+
|
| 1419 |
+
if uploaded_file is not None:
|
| 1420 |
+
df = pd.read_csv(uploaded_file)
|
| 1421 |
+
st.success(f"File berhasil diupload: {len(df):,} baris")
|
| 1422 |
+
|
| 1423 |
+
if st.button("Jalankan Rule Engine", type="primary", use_container_width=True):
|
| 1424 |
+
with st.spinner("Mendeteksi anomali..."):
|
| 1425 |
+
df_clean, df_anomalies, summary = apply_rule_engine(df)
|
| 1426 |
+
|
| 1427 |
+
st.success("Rule Engine selesai!")
|
| 1428 |
+
st.balloons()
|
| 1429 |
+
|
| 1430 |
+
# --- Ringkasan ---
|
| 1431 |
+
col1, col2, col3, col4 = st.columns(4)
|
| 1432 |
+
col1.metric("Total Baris Awal", f"{summary['total_rows_initial relator']:,}")
|
| 1433 |
+
col2.metric("Baris Bersih", f"{summary['total_rows_clean']:,}")
|
| 1434 |
+
col3.metric("Anomali Dihapus", f"{summary['total_anomalies']:,}",
|
| 1435 |
+
f"-{summary['anomaly_percentage']}%")
|
| 1436 |
+
col4.metric("Data Bersih", f"{100 - summary['anomaly_percentage']:.2f}%")
|
| 1437 |
+
|
| 1438 |
+
# --- Detail Anomali ---
|
| 1439 |
+
st.subheader("Detail Jenis Anomali yang Dihapus")
|
| 1440 |
+
breakdown_df = pd.DataFrame([
|
| 1441 |
+
{"Jenis Anomali": reason, "Jumlah": count, "Persentase": f"{count/len(df)*100:.2f}%"}
|
| 1442 |
+
for reason, count in summary['anomaly_breakdown'].items()
|
| 1443 |
+
])
|
| 1444 |
+
st.dataframe(breakdown_df, use_container_width=True, hide_index=True)
|
| 1445 |
+
|
| 1446 |
+
# --- Download ---
|
| 1447 |
+
csv_clean = df_clean.to_csv(index=False).encode()
|
| 1448 |
+
csv_anomalies = df_anomalies.to_csv(index=False).encode()
|
| 1449 |
+
|
| 1450 |
+
col1, col2 = st.columns(2)
|
| 1451 |
+
with col1:
|
| 1452 |
+
st.download_button(
|
| 1453 |
+
"Download Data Bersih (Siap Modelling)",
|
| 1454 |
+
csv_clean,
|
| 1455 |
+
"data_bersih_spray_dryer.csv",
|
| 1456 |
+
"text/csv"
|
| 1457 |
+
)
|
| 1458 |
+
with col2:
|
| 1459 |
+
st.download_button(
|
| 1460 |
+
"Download Data Anomali (untuk Review)",
|
| 1461 |
+
csv_anomalies,
|
| 1462 |
+
"data_anomali_dihapus.csv",
|
| 1463 |
+
"text/csv"
|
| 1464 |
+
)
|
| 1465 |
+
|
| 1466 |
+
with st.expander("Lihat contoh baris yang dihapus"):
|
| 1467 |
+
st.dataframe(df_anomalies[['Date_time', 'anomaly_reason']].head(20))
|
| 1468 |
+
|
| 1469 |
+
def filter_rule_engine():
|
| 1470 |
+
st.subheader("6️⃣ Filter Data dengan Rule Engine – Deteksi & Penghapusan Anomali")
|
| 1471 |
+
st.caption("Memastikan data berkualitas yang digunakan untuk analisis dan modelling.")
|
| 1472 |
+
|
| 1473 |
+
st.markdown(
|
| 1474 |
+
"""
|
| 1475 |
+
Halaman ini digunakan untuk melakukan **pembersihan data otomatis** menggunakan
|
| 1476 |
+
**Rule Engine** berbasis domain knowledge, dengan input berupa
|
| 1477 |
+
**file hasil disagregasi** (sudah memiliki kolom `GAS_MMBTU_Disaggregated`).
|
| 1478 |
+
"""
|
| 1479 |
+
)
|
| 1480 |
+
|
| 1481 |
+
st.markdown("### 📂 Upload Data Hasil Disagregasi")
|
| 1482 |
+
uploaded_file = st.file_uploader(
|
| 1483 |
+
"Upload file CSV hasil disagregasi (per menit, sudah ada kolom GAS_MMBTU_Disaggregated)",
|
| 1484 |
+
type="csv",
|
| 1485 |
+
key="rule_engine_upload",
|
| 1486 |
+
)
|
| 1487 |
+
|
| 1488 |
+
if uploaded_file is None:
|
| 1489 |
+
st.info(
|
| 1490 |
+
"Silakan upload file CSV hasil disagregasi terlebih dahulu.\n\n"
|
| 1491 |
+
"Pastikan minimal ada kolom:\n"
|
| 1492 |
+
"- `Date_time`\n"
|
| 1493 |
+
"- `Product`\n"
|
| 1494 |
+
"- `GAS_MMBTU_Disaggregated`\n"
|
| 1495 |
+
"- Parameter proses utama: `D101330TT`, `D102260TIC_CV`, `D102265TIC_PV`, "
|
| 1496 |
+
"`D102265TIC_CV`, `D102266TIC`, `D101264FTSCL`, `D101463PIC_PV`"
|
| 1497 |
+
)
|
| 1498 |
+
return
|
| 1499 |
+
|
| 1500 |
+
run_btn = st.button(
|
| 1501 |
+
"🚦 Jalankan Rule Engine",
|
| 1502 |
+
type="primary",
|
| 1503 |
+
use_container_width=True,
|
| 1504 |
+
key="rule_engine_run_btn",
|
| 1505 |
+
)
|
| 1506 |
+
|
| 1507 |
+
if not run_btn:
|
| 1508 |
+
return
|
| 1509 |
+
|
| 1510 |
+
# ---------------------------------------------------
|
| 1511 |
+
# Jalankan Rule Engine
|
| 1512 |
+
# ---------------------------------------------------
|
| 1513 |
+
try:
|
| 1514 |
+
df_input = pd.read_csv(uploaded_file)
|
| 1515 |
+
except Exception as e:
|
| 1516 |
+
st.error(f"❌ Gagal membaca file CSV: {e}")
|
| 1517 |
+
return
|
| 1518 |
+
|
| 1519 |
+
with st.spinner("Mendeteksi anomali dan melakukan pembersihan data..."):
|
| 1520 |
+
df_clean, df_anomalies, summary = apply_rule_engine(df_input)
|
| 1521 |
+
|
| 1522 |
+
st.success("Rule Engine selesai dijalankan! Data siap digunakan untuk analisis & modelling.")
|
| 1523 |
+
st.balloons()
|
| 1524 |
+
|
| 1525 |
+
total_initial = summary.get("total_rows_initial", len(df_input))
|
| 1526 |
+
total_after = summary.get("total_rows_after_filter", len(df_clean))
|
| 1527 |
+
total_removed = summary.get("total_rows_removed", total_initial - total_after)
|
| 1528 |
+
percent_clean = summary.get("percent_clean", (total_after / total_initial * 100 if total_initial > 0 else 0.0))
|
| 1529 |
+
num_anomaly_types = summary.get("num_anomaly_types", 0)
|
| 1530 |
+
cip_removed = summary.get("cip_rows_removed", 0)
|
| 1531 |
+
rule_removed = summary.get("rule_rows_removed", total_removed - cip_removed)
|
| 1532 |
+
|
| 1533 |
+
# ---------------------------------------------------
|
| 1534 |
+
# Ringkasan Hasil Pembersihan Data (6 metric)
|
| 1535 |
+
# ---------------------------------------------------
|
| 1536 |
+
st.markdown("### 📊 Ringkasan Hasil Pembersihan Data")
|
| 1537 |
+
|
| 1538 |
+
m1, m2, m3, m4, m5 = st.columns(5)
|
| 1539 |
+
|
| 1540 |
+
m1.metric("Total baris awal", f"{total_initial:,}")
|
| 1541 |
+
m2.metric("Total baris setelah filter", f"{total_after:,}")
|
| 1542 |
+
m3.metric("Total baris dihapus (termasuk CIP)", f"{total_removed:,}")
|
| 1543 |
+
m4.metric("Jumlah jenis anomali", f"{num_anomaly_types}")
|
| 1544 |
+
m5.metric(
|
| 1545 |
+
"Total baris dihapus tanpa CIP",
|
| 1546 |
+
f"{rule_removed:,}",
|
| 1547 |
+
delta=f"{cip_removed:,} baris CIP",)
|
| 1548 |
+
|
| 1549 |
+
|
| 1550 |
+
# ---------------------------------------------------
|
| 1551 |
+
# Detail Anomali yang Terdeteksi
|
| 1552 |
+
# ---------------------------------------------------
|
| 1553 |
+
st.markdown("### 🧾 Detail Anomali yang Terdeteksi")
|
| 1554 |
+
|
| 1555 |
+
if df_anomalies.empty:
|
| 1556 |
+
st.info("Tidak ada baris yang dihapus oleh rule engine. Semua data dianggap bersih.")
|
| 1557 |
+
else:
|
| 1558 |
+
if "anomaly_reason" not in df_anomalies.columns:
|
| 1559 |
+
st.warning("Kolom 'anomaly_reason' tidak ditemukan di df_anomalies. Breakdown jenis anomali tidak dapat ditampilkan.")
|
| 1560 |
+
else:
|
| 1561 |
+
# Group by jenis anomali
|
| 1562 |
+
rows = []
|
| 1563 |
+
grouped = df_anomalies.groupby("anomaly_reason")
|
| 1564 |
+
for i, (reason, g) in enumerate(grouped, start=1):
|
| 1565 |
+
count = len(g)
|
| 1566 |
+
pct = (count / total_initial * 100) if total_initial > 0 else 0.0
|
| 1567 |
+
|
| 1568 |
+
# Contoh baris (tanggal/waktu) – ambil sampai 3
|
| 1569 |
+
if "Date_time" in g.columns:
|
| 1570 |
+
dt_series = pd.to_datetime(g["Date_time"], errors="coerce").dropna()
|
| 1571 |
+
examples = ", ".join(dt_series.astype(str).head(3).tolist())
|
| 1572 |
+
else:
|
| 1573 |
+
examples = "-"
|
| 1574 |
+
|
| 1575 |
+
rows.append({
|
| 1576 |
+
"No": i,
|
| 1577 |
+
"Jenis Anomali": reason,
|
| 1578 |
+
"Jumlah Baris": count,
|
| 1579 |
+
"Persentase": f"{pct:.2f}%",
|
| 1580 |
+
"Contoh Baris (Tanggal/Waktu)": examples,
|
| 1581 |
+
"Status": "Dihapus",
|
| 1582 |
+
})
|
| 1583 |
+
|
| 1584 |
+
detail_df = pd.DataFrame(rows)
|
| 1585 |
+
|
| 1586 |
+
st.dataframe(
|
| 1587 |
+
detail_df,
|
| 1588 |
+
use_container_width=True,
|
| 1589 |
+
hide_index=True,
|
| 1590 |
+
)
|
| 1591 |
+
|
| 1592 |
+
# ---------------------------------------------------
|
| 1593 |
+
# Tombol Aksi (Download)
|
| 1594 |
+
# ---------------------------------------------------
|
| 1595 |
+
st.markdown("### 📥 Aksi Lanjutan")
|
| 1596 |
+
|
| 1597 |
+
col_dl1, col_dl2 = st.columns(2)
|
| 1598 |
+
csv_clean = df_clean.to_csv(index=False).encode("utf-8")
|
| 1599 |
+
csv_anom = df_anomalies.to_csv(index=False).encode("utf-8")
|
| 1600 |
+
|
| 1601 |
+
with col_dl1:
|
| 1602 |
+
st.download_button(
|
| 1603 |
+
"📥 Download Data Bersih (Siap Modelling)",
|
| 1604 |
+
data=csv_clean,
|
| 1605 |
+
file_name="data_bersih_spray_dryer_rule_engine.csv",
|
| 1606 |
+
mime="text/csv",
|
| 1607 |
+
use_container_width=True,
|
| 1608 |
+
)
|
| 1609 |
+
|
| 1610 |
+
with col_dl2:
|
| 1611 |
+
st.download_button(
|
| 1612 |
+
"📥 Download Data Anomali yang Dihapus",
|
| 1613 |
+
data=csv_anom,
|
| 1614 |
+
file_name="data_anomali_spray_dryer_rule_engine.csv",
|
| 1615 |
+
mime="text/csv",
|
| 1616 |
+
use_container_width=True,
|
| 1617 |
+
)
|
| 1618 |
+
|
| 1619 |
+
# ---------------------------------------------------
|
| 1620 |
+
# Expander: Lihat Semua Baris yang Dihapus
|
| 1621 |
+
# ---------------------------------------------------
|
| 1622 |
+
with st.expander("🔍 Lihat Semua Baris yang Dihapus (Detail Anomali)"):
|
| 1623 |
+
if df_anomalies.empty:
|
| 1624 |
+
st.write("Tidak ada baris yang dihapus.")
|
| 1625 |
+
else:
|
| 1626 |
+
cols_to_show = ["Date_time", "Product", "anomaly_reason"]
|
| 1627 |
+
cols_existing = [c for c in cols_to_show if c in df_anomalies.columns]
|
| 1628 |
+
other_cols = [c for c in df_anomalies.columns if c not in cols_existing]
|
| 1629 |
+
|
| 1630 |
+
st.dataframe(
|
| 1631 |
+
df_anomalies[cols_existing + other_cols],
|
| 1632 |
+
use_container_width=True,
|
| 1633 |
+
)
|
| 1634 |
+
|
| 1635 |
+
|
| 1636 |
+
def main():
|
| 1637 |
+
# Judul besar sistem (selalu tampil di atas)
|
| 1638 |
+
st.title("Sistem Prediksi dan Rekomendasi Parameter Berdasarkan Input Gas Consumption (MMBTU)")
|
| 1639 |
+
|
| 1640 |
+
st.caption(
|
| 1641 |
+
"Platform internal untuk memprediksi konsumsi gas dan merekomendasikan parameter proses "
|
| 1642 |
+
"berdasarkan histori operasi spray dryer."
|
| 1643 |
+
)
|
| 1644 |
+
|
| 1645 |
+
# Sidebar navigasi utama
|
| 1646 |
+
with st.sidebar:
|
| 1647 |
+
st.header("📂 Menu Utama")
|
| 1648 |
+
menu = st.radio(
|
| 1649 |
+
"Pilih Halaman",
|
| 1650 |
+
[
|
| 1651 |
+
"1. Prediksi Gas dari 6 Parameter",
|
| 1652 |
+
"2. Prediksi Parameter dari Gas (MMBTU)",
|
| 1653 |
+
"3. Monitoring Model",
|
| 1654 |
+
"4. EDA",
|
| 1655 |
+
"5. Disagregasi Data",
|
| 1656 |
+
"6. Filter Rule Engine"
|
| 1657 |
+
]
|
| 1658 |
+
)
|
| 1659 |
+
|
| 1660 |
+
# Ringkasan di bagian atas main page (opsional, bisa kamu modif)
|
| 1661 |
+
if menu.startswith("1."):
|
| 1662 |
+
page_prediksi_gas_dari_6_parameter()
|
| 1663 |
+
elif menu.startswith("2."):
|
| 1664 |
+
page_prediksi_parameter_dari_gas()
|
| 1665 |
+
elif menu.startswith("3."):
|
| 1666 |
+
page_monitoring_model()
|
| 1667 |
+
elif menu.startswith("4."):
|
| 1668 |
+
page_eda()
|
| 1669 |
+
elif menu.startswith("5."):
|
| 1670 |
+
disagregasi_data()
|
| 1671 |
+
elif menu.startswith("6."):
|
| 1672 |
+
filter_rule_engine()
|
| 1673 |
+
|
| 1674 |
+
if __name__ == "__main__":
|
| 1675 |
+
main()
|
Disagregasi_mmbtu.py
ADDED
|
@@ -0,0 +1,321 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
from sklearn.preprocessing import MinMaxScaler
|
| 4 |
+
import warnings
|
| 5 |
+
warnings.filterwarnings('ignore')
|
| 6 |
+
|
| 7 |
+
def pipeline_disagregasi(Data_Terbaru, min_minutes_threshold=50):
|
| 8 |
+
"""
|
| 9 |
+
Pipeline untuk disagregasi data dengan validasi jam produksi
|
| 10 |
+
|
| 11 |
+
Parameters:
|
| 12 |
+
-----------
|
| 13 |
+
Data_Terbaru : DataFrame
|
| 14 |
+
DataFrame input dengan kolom yang diperlukan
|
| 15 |
+
min_minutes_threshold : int
|
| 16 |
+
Minimum jumlah menit per jam untuk proses disagregasi (default: 50)
|
| 17 |
+
|
| 18 |
+
Returns:
|
| 19 |
+
--------
|
| 20 |
+
df_disaggregated_final : DataFrame
|
| 21 |
+
DataFrame hasil disagregasi
|
| 22 |
+
validation_report : dict
|
| 23 |
+
Laporan validasi dan statistik
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
print("="*60)
|
| 27 |
+
print("PIPELINE DISAGREGASI DATA DENGAN VALIDASI JAM PRODUKSI")
|
| 28 |
+
print("="*60)
|
| 29 |
+
|
| 30 |
+
# Simpan daftar kolom asli untuk memastikan semuanya dipertahankan
|
| 31 |
+
original_cols = list(Data_Terbaru.columns)
|
| 32 |
+
|
| 33 |
+
# ========================================
|
| 34 |
+
# TAHAP 1: VALIDASI JAM PRODUKSI
|
| 35 |
+
# ========================================
|
| 36 |
+
print("\n--- TAHAP 1: Validasi Jam Produksi ---")
|
| 37 |
+
|
| 38 |
+
# Hitung jumlah data (menit) per jam
|
| 39 |
+
jumlah_data_per_jam = Data_Terbaru.groupby('fixed_rounded_time').size()
|
| 40 |
+
jumlah_data_per_jam_df = jumlah_data_per_jam.reset_index(name='Jumlah_Menit')
|
| 41 |
+
|
| 42 |
+
# Identifikasi jam yang valid (>= 50 menit)
|
| 43 |
+
jam_valid = jumlah_data_per_jam_df[jumlah_data_per_jam_df['Jumlah_Menit'] >= min_minutes_threshold]['fixed_rounded_time'].tolist()
|
| 44 |
+
jam_tidak_valid = jumlah_data_per_jam_df[jumlah_data_per_jam_df['Jumlah_Menit'] < min_minutes_threshold]
|
| 45 |
+
|
| 46 |
+
print(f"Total jam dalam dataset: {len(jumlah_data_per_jam_df)}")
|
| 47 |
+
print(f"Jam valid (>= {min_minutes_threshold} menit): {len(jam_valid)}")
|
| 48 |
+
print(f"Jam tidak valid (< {min_minutes_threshold} menit): {len(jam_tidak_valid)}")
|
| 49 |
+
|
| 50 |
+
# Tampilkan detail jam yang tidak valid
|
| 51 |
+
if len(jam_tidak_valid) > 0:
|
| 52 |
+
print("\nDetail jam yang TIDAK akan diproses:")
|
| 53 |
+
print(jam_tidak_valid.sort_values('Jumlah_Menit')[['fixed_rounded_time', 'Jumlah_Menit']].to_string(index=False))
|
| 54 |
+
else:
|
| 55 |
+
print("\nSemua jam valid untuk diproses!")
|
| 56 |
+
|
| 57 |
+
# Filter data hanya untuk jam yang valid
|
| 58 |
+
df_work = Data_Terbaru[Data_Terbaru['fixed_rounded_time'].isin(jam_valid)].copy()
|
| 59 |
+
|
| 60 |
+
if df_work.empty:
|
| 61 |
+
print("\n⚠️ PERINGATAN: Tidak ada data yang memenuhi kriteria validasi!")
|
| 62 |
+
return pd.DataFrame(), {"status": "failed", "reason": "no_valid_hours"}
|
| 63 |
+
|
| 64 |
+
print(f"\nData yang akan diproses: {len(df_work)} baris dari {len(Data_Terbaru)} baris total")
|
| 65 |
+
print(f"Persentase data yang diproses: {len(df_work)/len(Data_Terbaru)*100:.2f}%")
|
| 66 |
+
|
| 67 |
+
# ========================================
|
| 68 |
+
# TAHAP 2: PERHITUNGAN BOBOT INDIKATOR
|
| 69 |
+
# ========================================
|
| 70 |
+
print("\n--- TAHAP 2: Perhitungan Bobot Indikator ---")
|
| 71 |
+
|
| 72 |
+
indicator_cols = [
|
| 73 |
+
"D101330TT", "D102260TIC_CV", "D102265TIC_PV",
|
| 74 |
+
"D102265TIC_CV", "D102266TIC", "D101264FTSCL"
|
| 75 |
+
]
|
| 76 |
+
|
| 77 |
+
# Handle missing values
|
| 78 |
+
df_work[indicator_cols] = df_work[indicator_cols].fillna(0)
|
| 79 |
+
|
| 80 |
+
# Normalisasi menggunakan MinMaxScaler
|
| 81 |
+
scaled_cols = [col + '_scaled' for col in indicator_cols]
|
| 82 |
+
scaler = MinMaxScaler()
|
| 83 |
+
df_work[scaled_cols] = scaler.fit_transform(df_work[indicator_cols])
|
| 84 |
+
|
| 85 |
+
# Hitung bobot total per baris
|
| 86 |
+
df_work['w_m'] = df_work[scaled_cols].sum(axis=1)
|
| 87 |
+
|
| 88 |
+
print(f"Indikator yang digunakan: {', '.join(indicator_cols)}")
|
| 89 |
+
print(f"Range bobot (w_m): Min={df_work['w_m'].min():.4f}, Max={df_work['w_m'].max():.4f}")
|
| 90 |
+
|
| 91 |
+
# ========================================
|
| 92 |
+
# TAHAP 3: APLIKASI ALGORITMA DISAGREGASI
|
| 93 |
+
# ========================================
|
| 94 |
+
print("\n--- TAHAP 3: Aplikasi Algoritma Disagregasi ---")
|
| 95 |
+
|
| 96 |
+
# Hitung total bobot per jam
|
| 97 |
+
total_weight_per_block = df_work.groupby('fixed_rounded_time')['w_m'].transform('sum')
|
| 98 |
+
total_weight_per_block[total_weight_per_block == 0] = 1
|
| 99 |
+
|
| 100 |
+
# Hitung proporsi share
|
| 101 |
+
df_work['proportional_share'] = df_work['w_m'] / total_weight_per_block
|
| 102 |
+
|
| 103 |
+
# Ambil nilai GAS_MMBTU total per jam
|
| 104 |
+
gas_total_per_block = df_work.groupby('fixed_rounded_time')['GAS_MMBTU'].transform('first')
|
| 105 |
+
|
| 106 |
+
# Hitung GAS_MMBTU yang sudah didisagregasi
|
| 107 |
+
df_work['GAS_MMBTU_Disaggregated'] = gas_total_per_block * df_work['proportional_share']
|
| 108 |
+
|
| 109 |
+
# Handle kasus khusus: jam dengan total bobot = 0
|
| 110 |
+
zero_weight_blocks = df_work.groupby('fixed_rounded_time')['w_m'].sum()
|
| 111 |
+
zero_weight_blocks = zero_weight_blocks[zero_weight_blocks == 0].index
|
| 112 |
+
|
| 113 |
+
if not zero_weight_blocks.empty:
|
| 114 |
+
print(f"⚠️ Ditemukan {len(zero_weight_blocks)} jam dengan total bobot = 0")
|
| 115 |
+
print(" Menggunakan distribusi merata untuk jam tersebut")
|
| 116 |
+
|
| 117 |
+
for block_time in zero_weight_blocks:
|
| 118 |
+
mask = df_work['fixed_rounded_time'] == block_time
|
| 119 |
+
gas_value = df_work.loc[mask, 'GAS_MMBTU'].iloc[0]
|
| 120 |
+
count_in_block = mask.sum()
|
| 121 |
+
df_work.loc[mask, 'GAS_MMBTU_Disaggregated'] = gas_value / count_in_block if count_in_block > 0 else 0
|
| 122 |
+
|
| 123 |
+
print("✓ Disagregasi selesai dilakukan")
|
| 124 |
+
|
| 125 |
+
# ========================================
|
| 126 |
+
# TAHAP 4: VALIDASI HASIL DISAGREGASI
|
| 127 |
+
# ========================================
|
| 128 |
+
print("\n--- TAHAP 4: Validasi Hasil Disagregasi ---")
|
| 129 |
+
|
| 130 |
+
# Bandingkan total per jam
|
| 131 |
+
original_total = df_work.groupby('fixed_rounded_time')['GAS_MMBTU'].first()
|
| 132 |
+
disaggregated_total = df_work.groupby('fixed_rounded_time')['GAS_MMBTU_Disaggregated'].sum()
|
| 133 |
+
|
| 134 |
+
validation_df = pd.DataFrame({
|
| 135 |
+
'Original_Total': original_total,
|
| 136 |
+
'Disaggregated_Total': disaggregated_total,
|
| 137 |
+
'Difference': original_total - disaggregated_total
|
| 138 |
+
})
|
| 139 |
+
|
| 140 |
+
# Statistik validasi
|
| 141 |
+
max_diff = validation_df['Difference'].abs().max()
|
| 142 |
+
total_diff = validation_df['Difference'].abs().sum()
|
| 143 |
+
|
| 144 |
+
print(f"Jumlah jam yang divalidasi: {len(validation_df)}")
|
| 145 |
+
print(f"Total selisih absolut: {total_diff:.10f}")
|
| 146 |
+
print(f"Selisih maksimum: {max_diff:.10f}")
|
| 147 |
+
|
| 148 |
+
if total_diff < 1e-8:
|
| 149 |
+
print("✓ Validasi BERHASIL: Total gas terjaga dengan sempurna")
|
| 150 |
+
else:
|
| 151 |
+
print("⚠️ PERINGATAN: Terdapat selisih kecil dalam disagregasi")
|
| 152 |
+
|
| 153 |
+
# Tampilkan 5 jam dengan selisih terbesar
|
| 154 |
+
if max_diff > 1e-10:
|
| 155 |
+
print("\n5 Jam dengan selisih terbesar:")
|
| 156 |
+
top_diff = validation_df.nlargest(5, 'Difference')[['Original_Total', 'Disaggregated_Total', 'Difference']]
|
| 157 |
+
print(top_diff.to_string())
|
| 158 |
+
|
| 159 |
+
# ========================================
|
| 160 |
+
# TAHAP 5: PERSIAPAN OUTPUT FINAL
|
| 161 |
+
# ========================================
|
| 162 |
+
print("\n--- TAHAP 5: Persiapan Output Final ---")
|
| 163 |
+
|
| 164 |
+
# --- PERUBAHAN KUNCI ---
|
| 165 |
+
# Daripada menghapus kolom, kita secara eksplisit memilih semua kolom asli
|
| 166 |
+
# ditambah kolom hasil disagregasi yang baru. Ini memastikan semua
|
| 167 |
+
# kolom lain yang tidak terpakai tetap ada di hasil akhir.
|
| 168 |
+
|
| 169 |
+
# Tentukan daftar kolom final
|
| 170 |
+
final_cols = original_cols + ['GAS_MMBTU_Disaggregated']
|
| 171 |
+
# Hapus duplikat jika 'GAS_MMBTU_Disaggregated' sudah ada
|
| 172 |
+
final_cols = list(dict.fromkeys(final_cols))
|
| 173 |
+
|
| 174 |
+
# Buat dataframe final dengan memilih kolom yang relevan dari df_work
|
| 175 |
+
df_disaggregated_final = df_work[final_cols]
|
| 176 |
+
|
| 177 |
+
print(f"Dimensi data final: {df_disaggregated_final.shape}")
|
| 178 |
+
print("Kolom-kolom asli yang tidak digunakan dalam proses telah berhasil dipertahankan.")
|
| 179 |
+
|
| 180 |
+
# ========================================
|
| 181 |
+
# LAPORAN RINGKASAN
|
| 182 |
+
# ========================================
|
| 183 |
+
print("\n" + "="*60)
|
| 184 |
+
print("RINGKASAN PIPELINE")
|
| 185 |
+
print("="*60)
|
| 186 |
+
|
| 187 |
+
validation_report = {
|
| 188 |
+
"total_jam_input": len(jumlah_data_per_jam_df),
|
| 189 |
+
"jam_valid": len(jam_valid),
|
| 190 |
+
"jam_tidak_valid": len(jam_tidak_valid),
|
| 191 |
+
"total_baris_input": len(Data_Terbaru),
|
| 192 |
+
"total_baris_diproses": len(df_work),
|
| 193 |
+
"persentase_data_diproses": len(df_work)/len(Data_Terbaru)*100 if len(Data_Terbaru) > 0 else 0,
|
| 194 |
+
"total_selisih_disagregasi": total_diff,
|
| 195 |
+
"jam_dengan_bobot_nol": len(zero_weight_blocks) if not zero_weight_blocks.empty else 0,
|
| 196 |
+
"validation_df": validation_df,
|
| 197 |
+
"jam_tidak_valid_detail": jam_tidak_valid,
|
| 198 |
+
"weight_min": float(df_work["w_m"].min()),
|
| 199 |
+
"weight_max": float(df_work["w_m"].max())
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
print(f"• Total jam input: {validation_report['total_jam_input']}")
|
| 203 |
+
print(f"• Jam valid untuk disagregasi: {validation_report['jam_valid']}")
|
| 204 |
+
print(f"• Jam tidak valid (skip): {validation_report['jam_tidak_valid']}")
|
| 205 |
+
print(f"• Total baris yang diproses: {validation_report['total_baris_diproses']:,} dari {validation_report['total_baris_input']:,}")
|
| 206 |
+
print(f"• Persentase data diproses: {validation_report['persentase_data_diproses']:.2f}%")
|
| 207 |
+
print(f"• Akurasi disagregasi (total selisih): {validation_report['total_selisih_disagregasi']:.10f}")
|
| 208 |
+
|
| 209 |
+
print("\n✅ Pipeline selesai dijalankan!")
|
| 210 |
+
|
| 211 |
+
return df_disaggregated_final, validation_report
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
# ========================================
|
| 215 |
+
# FUNGSI UTILITAS TAMBAHAN
|
| 216 |
+
# ========================================
|
| 217 |
+
|
| 218 |
+
def analyze_disagregation_results(df_result, validation_report):
|
| 219 |
+
"""
|
| 220 |
+
Analisis mendalam hasil disagregasi
|
| 221 |
+
"""
|
| 222 |
+
print("\n" + "="*60)
|
| 223 |
+
print("ANALISIS HASIL DISAGREGASI")
|
| 224 |
+
print("="*60)
|
| 225 |
+
|
| 226 |
+
# Cek jika df_result kosong
|
| 227 |
+
if df_result.empty:
|
| 228 |
+
print("Tidak ada hasil untuk dianalisis.")
|
| 229 |
+
return None
|
| 230 |
+
|
| 231 |
+
# Statistik GAS_MMBTU sebelum dan sesudah
|
| 232 |
+
print("\n📊 Statistik GAS_MMBTU_Disaggregated:")
|
| 233 |
+
print(df_result['GAS_MMBTU_Disaggregated'].describe())
|
| 234 |
+
|
| 235 |
+
# Distribusi per jam
|
| 236 |
+
print("\n📊 Distribusi data per jam (top 10):")
|
| 237 |
+
hourly_stats = df_result.groupby('fixed_rounded_time').agg({
|
| 238 |
+
'GAS_MMBTU_Disaggregated': ['sum', 'mean', 'std', 'count']
|
| 239 |
+
}).round(4)
|
| 240 |
+
print(hourly_stats.head(10))
|
| 241 |
+
|
| 242 |
+
# Jam dengan nilai ekstrem
|
| 243 |
+
print("\n⚠️ Jam dengan total GAS tertinggi:")
|
| 244 |
+
top_hours = df_result.groupby('fixed_rounded_time')['GAS_MMBTU_Disaggregated'].sum().nlargest(5)
|
| 245 |
+
print(top_hours)
|
| 246 |
+
|
| 247 |
+
print("\n⚠️ Jam dengan total GAS terendah:")
|
| 248 |
+
bottom_hours = df_result.groupby('fixed_rounded_time')['GAS_MMBTU_Disaggregated'].sum().nsmallest(5)
|
| 249 |
+
print(bottom_hours)
|
| 250 |
+
|
| 251 |
+
return hourly_stats
|
| 252 |
+
|
| 253 |
+
def run_disagregasi_pipeline(file_obj, min_minutes_threshold=50):
|
| 254 |
+
"""
|
| 255 |
+
Wrapper yang dipakai dashboard Streamlit.
|
| 256 |
+
- file_obj bisa berupa path string atau UploadedFile dari Streamlit.
|
| 257 |
+
- Mengembalikan: (df_hasil_disagregasi, validation_report_diperluas)
|
| 258 |
+
"""
|
| 259 |
+
# Baca CSV dari file_obj (path string atau UploadedFile)
|
| 260 |
+
if isinstance(file_obj, str):
|
| 261 |
+
df_input = pd.read_csv(file_obj)
|
| 262 |
+
else:
|
| 263 |
+
# Asumsikan ini adalah UploadedFile dari st.file_uploader
|
| 264 |
+
df_input = pd.read_csv(file_obj)
|
| 265 |
+
|
| 266 |
+
# Jalankan pipeline utama
|
| 267 |
+
df_hasil, laporan = pipeline_disagregasi(
|
| 268 |
+
Data_Terbaru=df_input,
|
| 269 |
+
min_minutes_threshold=min_minutes_threshold
|
| 270 |
+
)
|
| 271 |
+
|
| 272 |
+
# Jika hasil kosong → langsung kembalikan
|
| 273 |
+
if df_hasil.empty:
|
| 274 |
+
return df_hasil, laporan
|
| 275 |
+
|
| 276 |
+
# Tambah statistik GAS_MMBTU_Disaggregated untuk kebutuhan dashboard
|
| 277 |
+
if "GAS_MMBTU_Disaggregated" in df_hasil.columns:
|
| 278 |
+
stats = df_hasil["GAS_MMBTU_Disaggregated"].describe()
|
| 279 |
+
laporan["gas_disagg_stats"] = stats
|
| 280 |
+
|
| 281 |
+
# Total per jam
|
| 282 |
+
hourly_total = (
|
| 283 |
+
df_hasil
|
| 284 |
+
.groupby("fixed_rounded_time")["GAS_MMBTU_Disaggregated"]
|
| 285 |
+
.sum()
|
| 286 |
+
)
|
| 287 |
+
|
| 288 |
+
# 10 jam dengan konsumsi gas tertinggi
|
| 289 |
+
laporan["top_hours"] = hourly_total.sort_values(ascending=False).head(10)
|
| 290 |
+
|
| 291 |
+
# Jam dengan total GAS = 0 (kemungkinan shutdown)
|
| 292 |
+
laporan["zero_hours"] = hourly_total[hourly_total == 0.0]
|
| 293 |
+
|
| 294 |
+
return df_hasil, laporan
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
|
| 298 |
+
# ========================================
|
| 299 |
+
# CARA PENGGUNAAN
|
| 300 |
+
# ========================================
|
| 301 |
+
|
| 302 |
+
# Ganti dengan path file Anda yang sebenarnya
|
| 303 |
+
if __name__ == "__main__":
|
| 304 |
+
# Jalankan pipeline utama (mode CLI / testing)
|
| 305 |
+
df_hasil, laporan = pipeline_disagregasi(
|
| 306 |
+
Data_Terbaru=pd.read_csv("/work/Dataset 18 Mar - 19 Jun/Processed Data Pipeline EDA_10_17_2025.csv"),
|
| 307 |
+
min_minutes_threshold=50
|
| 308 |
+
)
|
| 309 |
+
|
| 310 |
+
# Analisis hasil (opsional)
|
| 311 |
+
stats = analyze_disagregation_results(df_hasil, laporan)
|
| 312 |
+
|
| 313 |
+
# Simpan hasil ke CSV (opsional)
|
| 314 |
+
df_hasil = df_hasil[~df_hasil['Product'].isin(['CIP', 'CIP CHAMBER'])]
|
| 315 |
+
df_hasil.to_csv('/work/Dataset 18 Mar - 19 Jun/disagregasi_data_spraydryer_terbaru_10_17_2025.csv', index=False)
|
| 316 |
+
|
| 317 |
+
# Akses detail validasi
|
| 318 |
+
print(laporan['validation_df'])
|
| 319 |
+
print(laporan['jam_tidak_valid_detail'])
|
| 320 |
+
|
| 321 |
+
|
Hasil_Inverse_Model.csv
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Converged,D101264FTSCL,D101330TT,D102260TIC_CV,D102265TIC_CV,D102265TIC_PV,D102266TIC,Error,Error_Pct,Iterations,Level,Objective_Value,Predicted_MMBTU,Product,Soft_Violations,Target_MMBTU,prediction
|
| 2 |
+
,3955.77,93.24,53.44,31.11,174.33,18.5,0.000392,,,,,,CKR BASE,,0.2667,0.267092
|
| 3 |
+
,3952.31,93.32,60.19,31.64,174.19,18.53,0.000064,,,,,,CKR BASE,,0.27,0.269936
|
| 4 |
+
,3970.4,93.0,60.64,32.82,174.33,18.53,0.003857,,,,,,CKR BASE,,0.2733,0.277157
|
| 5 |
+
,4290.9,94.53,62.77,47.4,180.93,16.04,0.017178,,,,,,CKR BASE,,0.2767,0.293878
|
| 6 |
+
,4431.08,94.49,60.99,46.49,181.08,16.44,0.003841,,,,,,CKR BASE,,0.28,0.283841
|
| 7 |
+
,4457.67,94.32,60.39,46.73,181.46,16.4,0.000053,,,,,,CKR BASE,,0.2833,0.283353
|
| 8 |
+
,4476.58,94.4,61.71,44.93,180.09,16.38,0.000087,,,,,,CKR BASE,,0.2867,0.286613
|
| 9 |
+
,4221.75,94.28,60.94,45.09,185.49,16.22,0.000004,,,,,,CKR BASE,,0.29,0.290004
|
| 10 |
+
,4156.33,95.47,65.84,45.4,184.37,16.13,0.000005,,,,,,CKR BASE,,0.2933,0.293295
|
| 11 |
+
,4314.51,94.52,64.5,46.42,185.9,16.27,0.000061,,,,,,CKR BASE,,0.2967,0.296761
|
| 12 |
+
,4195.16,94.31,67.84,52.73,180.15,16.11,0.000067,,,,,,CKR BASE,,0.3,0.299933
|
| 13 |
+
,4140.37,97.51,67.45,51.3,187.75,17.61,0.000041 (0.01%),,,,,,CKR BASE,,0.3033,0.303292
|
| 14 |
+
,4360.28,96.65,66.48,50.23,187.79,17.35,0.000072 (0.02%),,,,,,CKR BASE,,0.3067,0.306738
|
| 15 |
+
,4408.62,93.81,62.23,59.33,189.59,17.48,0.000096 (0.03%),,,,,,CKR BASE,,0.31,0.310096
|
| 16 |
+
,4468.98,97.68,59.9,54.51,194.32,19.25,0.000009 (0.00%),,,,,,CKR BASE,,0.3133,0.313342
|
| 17 |
+
,4481.76,92.15,67.27,55.53,193.24,19.06,0.000001 (0.00%),,,,,,CKR BASE,,0.3333,0.333332
|
| 18 |
+
,4833.92,93.0,64.14,54.36,190.49,19.39,0.000010 (0.00%),,,,,,CKR BASE,,0.3367,0.33671
|
| 19 |
+
,4675.53,93.3,66.21,53.31,188.33,18.82,0.000035 (0.01%),,,,,,CKR BASE,,0.34,0.340035
|
| 20 |
+
,4587.25,92.91,66.09,53.97,189.73,18.78,0.000035 (0.01%),,,,,,CKR BASE,,0.3467,0.346743
|
| 21 |
+
,4589.9,92.78,65.33,55.22,189.54,19.54,0.000002 (0.00%),,,,,,CKR BASE,,0.3433,0.343332
|
| 22 |
+
,4615.33,92.4,65.76,54.24,190.33,18.83,0.004873 (1.39%),,,,,,CKR BASE,,0.35,0.345127
|
| 23 |
+
,4195.164127,94.31367329,67.83752807,52.72994004,180.1513437,16.11178086,6.67453E-05,,,,,,CKR BASE,,0.3,0.299933255
|
| 24 |
+
,4345.077523,95.73564815,61.94377257,47.97807791,178.5670734,15.94367712,0.015211162,,,,,,CKR BASE,,0.278,0.293211162
|
| 25 |
+
,3714.134992,94.12849457,50.19545981,24.45841829,176.6102062,17.71849347,0.041373372,,,,,,CKR BASE,,0.25,0.291373372
|
| 26 |
+
,4329.033348,96.36362196,61.44665789,47.72381537,177.4725578,16.00013482,0.017786913,,,,,,CKR BASE,,0.276,0.293786913
|
| 27 |
+
,4221.747978,94.27552004,60.94445271,45.09113873,185.4924565,16.22248105,4.49181E-06,,,,,,CKR BASE,,0.29,0.290004492
|
| 28 |
+
,4492.864459,94.21253004,61.40316114,46.06813204,180.9288264,16.4082628,6.45216E-05,,,,,,CKR BASE,,0.287,0.286935478
|
| 29 |
+
,4474.17151,94.69035643,62.42412584,46.69251985,185.2116553,16.41205744,5.86555E-05,,,,,,CKR BASE,,0.289,0.289058656
|
| 30 |
+
,3941.755261,93.92589611,44.02782054,21.35222818,174.9003476,18.31076668,0.034599949,,,,,,CKR BASE,,0.23,0.264599949
|
| 31 |
+
,3935.359007,93.82213382,44.06991256,21.19780621,174.4379727,18.34101723,0.030370471,,,,,,CKR BASE,,0.234,0.264370471
|
| 32 |
+
,3934.465917,94.2948149,44.12207343,21.16965631,174.5110539,18.33528665,0.054408622,,,,,,CKR BASE,,0.212,0.266408622
|
| 33 |
+
,4496.939745,94.48287836,60.40561786,46.98256551,179.586896,16.3735464,7.61817E-05,,,,,,CKR BASE,,0.2865,0.286576182
|
| 34 |
+
,3939.045858,94.37142197,44.09776324,21.14782051,174.5390788,18.32985818,0.056263753,,,,,,CKR BASE,,0.21,0.266263753
|
| 35 |
+
,3960.72,96.25,50.99,18.7,170.17,18.36,0.000049,,,,,,CMR BASE,,0.2667,0.266749
|
| 36 |
+
,4022.34,97.5,50.49,18.54,176.04,16.45,0.000016,,,,,,CMR BASE,,0.27,0.269984
|
| 37 |
+
,4112.43,98.52,55.82,21.49,169.98,16.67,0.000025,,,,,,CMR BASE,,0.2733,0.273275
|
| 38 |
+
,3995.18,96.52,52.26,36.78,172.76,18.57,0.00008,,,,,,CMR BASE,,0.2767,0.27662
|
| 39 |
+
,3920.17,95.81,55.88,38.57,172.86,18.57,0.000011,,,,,,CMR BASE,,0.28,0.279989
|
| 40 |
+
,4212.96,98.78,53.91,24.09,173.13,19.01,0.000019,,,,,,CMR BASE,,0.2833,0.283281
|
| 41 |
+
,4018.11,97.85,61.8,28.33,178.51,16.55,0.000051,,,,,,CMR BASE,,0.2867,0.286751
|
| 42 |
+
,4234.78,98.53,61.17,23.86,175.44,16.48,0.000078,,,,,,CMR BASE,,0.29,0.290078
|
| 43 |
+
,4074.51,98.44,63.57,26.17,183.23,18.89,0.000091,,,,,,CMR BASE,,0.2933,0.293391
|
| 44 |
+
,4280.19,96.32,61.46,24.87,185.23,19.01,0.000091,,,,,,CMR BASE,,0.2967,0.296609
|
| 45 |
+
,4275.9,93.46,60.26,29.42,184.92,18.92,0.000063,,,,,,CMR BASE,,0.3,0.300063
|
| 46 |
+
,4460.72,94.73,61.74,31.95,184.81,17.66,0.000002,,,,,,CMR BASE,,0.3033,0.303298
|
| 47 |
+
,4423.21,92.59,61.8,29.02,181.42,18.31,0.00004,,,,,,CMR BASE,,0.3067,0.30666
|
| 48 |
+
,4105.81,95.59,56.25,30.73,177.18,17.26,0.000075,,,,,,CMR BASE,,0.31,0.310075
|
| 49 |
+
,4511.25,98.45,53.97,27.77,185.56,17.09,0.000093,,,,,,CMR BASE,,0.3133,0.313393
|
| 50 |
+
,4545.64,95.05,56.61,34.31,178.17,17.13,0.000099,,,,,,CMR BASE,,0.3167,0.316799
|
| 51 |
+
,4387.78,95.76,51.89,27.58,181.26,17.22,0.00004,,,,,,CMR BASE,,0.32,0.32004
|
| 52 |
+
,4247.24,95.56,50.73,36.75,182.35,17.12,0.000026,,,,,,CMR BASE,,0.3233,0.323326
|
| 53 |
+
,4320.47,94.24,53.0,35.12,181.15,17.63,0.000008,,,,,,CMR BASE,,0.3267,0.326692
|
| 54 |
+
,4280.32,97.17,51.85,37.78,180.91,17.3,0.000049,,,,,,CMR BASE,,0.33,0.330049
|
| 55 |
+
,4292.95,92.68,54.28,34.86,181.28,17.64,0.002914,,,,,,CMR BASE,,0.3333,0.330386
|
| 56 |
+
,4323.88,92.73,54.1,31.21,181.95,17.63,0.008765,,,,,,CMR BASE,,0.3367,0.327935
|
| 57 |
+
,4371.36,93.22,54.18,31.47,182.42,17.58,0.012532,,,,,,CMR BASE,,0.34,0.327468
|
| 58 |
+
,4404.97,92.77,54.08,31.66,183.11,17.58,0.01776,,,,,,CMR BASE,,0.3433,0.32554
|
| 59 |
+
,4530.4,95.43,54.19,31.63,183.68,17.53,0.022474,,,,,,CMR BASE,,0.3467,0.324226
|
| 60 |
+
,4511.7,95.23,54.68,31.56,184.31,17.54,0.028114,,,,,,CMR BASE,,0.35,0.321886
|
| 61 |
+
,4232.09,92.9,55.2,40.6,173.35,17.42,0.012035,,,,,,CKP BASE,,0.2667,0.278735
|
| 62 |
+
,4225.85,95.07,56.02,39.07,181.37,16.01,0.006521,,,,,,CKP BASE,,0.27,0.276521
|
| 63 |
+
,4314.5,95.08,56.01,39.72,181.97,16.02,0.001893,,,,,,CKP BASE,,0.2733,0.275193
|
| 64 |
+
,4320.76,94.88,63.23,41.57,173.55,17.38,0.000048,,,,,,CKP BASE,,0.2767,0.276652
|
| 65 |
+
,4275.47,98.23,58.08,45.22,181.94,15.87,0.000073,,,,,,CKP BASE,,0.28,0.280073
|
| 66 |
+
,4325.12,98.2,55.74,43.9,183.04,16.17,0.000049,,,,,,CKP BASE,,0.2833,0.283349
|
| 67 |
+
,4336.85,94.77,57.4,39.75,177.04,16.14,0.000028,,,,,,CKP BASE,,0.2867,0.286672
|
| 68 |
+
,4223.68,96.0,62.08,40.75,182.92,16.27,0.000075,,,,,,CKP BASE,,0.29,0.289925
|
| 69 |
+
,4102.85,97.25,65.7,40.94,184.35,17.51,0.000013,,,,,,CKP BASE,,0.2933,0.293313
|
| 70 |
+
,4180.6,93.34,61.73,40.57,177.51,17.58,0.000077,,,,,,CKP BASE,,0.2967,0.296623
|
| 71 |
+
,4184.98,95.03,58.75,40.35,186.57,17.36,0.000099,,,,,,CKP BASE,,0.3,0.299901
|
| 72 |
+
,4445.89,94.24,63.47,47.46,181.55,18.3,0.000063,,,,,,CKP BASE,,0.3033,0.303237
|
| 73 |
+
,4277.34,93.85,64.3,43.57,182.0,17.39,0.000008,,,,,,CKP BASE,,0.3067,0.306708
|
| 74 |
+
,4453.7,98.33,63.97,38.56,181.43,18.48,0.000086,,,,,,CKP BASE,,0.31,0.310086
|
| 75 |
+
,4773.75,95.2,63.56,39.53,180.62,18.81,0.000038,,,,,,CKP BASE,,0.3133,0.313262
|
| 76 |
+
,4535.66,97.4,62.11,45.25,179.82,17.31,0.000039,,,,,,CKP BASE,,0.3167,0.316739
|
| 77 |
+
,4584.37,93.98,64.3,38.7,182.31,18.53,0.000034,,,,,,CKP BASE,,0.32,0.320034
|
| 78 |
+
,4546.56,93.87,57.79,45.99,188.63,17.82,0.00006,,,,,,CKP BASE,,0.3233,0.32324
|
| 79 |
+
,4870.81,97.41,55.3,37.72,182.27,18.6,0.000093,,,,,,CKP BASE,,0.3267,0.326607
|
| 80 |
+
,4867.39,93.27,58.38,38.59,186.96,17.98,0.000099,,,,,,CKP BASE,,0.33,0.329901
|
| 81 |
+
,4541.39,94.08,57.45,41.44,187.13,18.6,0.00005,,,,,,CKP BASE,,0.3333,0.33335
|
| 82 |
+
,4410.88,93.19,59.46,44.06,183.72,18.67,0.000094,,,,,,CKP BASE,,0.3367,0.336794
|
| 83 |
+
,4425.0,95.6,58.1,42.62,186.65,19.0,0.000053,,,,,,CKP BASE,,0.34,0.340053
|
| 84 |
+
,4430.36,92.7,66.4,42.52,193.34,18.99,0.000018,,,,,,CKP BASE,,0.3433,0.343318
|
| 85 |
+
,4532.0,93.35,59.67,39.58,186.57,18.67,0.000086,,,,,,CKP BASE,,0.3467,0.346614
|
| 86 |
+
,4809.65,92.92,65.24,46.53,188.16,18.94,0.000047,,,,,,CKP BASE,,0.35,0.349953
|
| 87 |
+
,4218.146647,95.08684851,56.00051072,39.59284696,181.3237826,16.0520592,0.007520789,,,,,,CKP BASE,,0.269,0.276520789
|
| 88 |
+
,3795.55,98.05,55.61,28.68,181.44,18.16,0.000083,,,,,,MORIGRO BASE,,0.2667,0.266617
|
| 89 |
+
,3694.83,92.72,56.65,29.32,181.89,18.19,0.000038,,,,,,MORIGRO BASE,,0.27,0.269962
|
| 90 |
+
,3784.48,98.26,55.43,32.16,181.93,18.18,0.000071,,,,,,MORIGRO BASE,,0.2733,0.273229
|
| 91 |
+
,3729.28,94.49,56.11,32.26,182.14,18.18,0.000015,,,,,,MORIGRO BASE,,0.2767,0.276685
|
| 92 |
+
,3694.71,94.5,56.59,30.27,184.71,18.2,0.00003,,,,,,MORIGRO BASE,,0.28,0.28003
|
| 93 |
+
,3741.95,94.61,63.73,39.42,182.76,18.28,0.000001,,,,,,MORIGRO BASE,,0.2833,0.283299
|
| 94 |
+
,3770.44,92.65,59.31,31.7,183.89,18.16,0.00001,,,,,,MORIGRO BASE,,0.2867,0.28671
|
| 95 |
+
,3860.63,94.59,63.26,38.77,183.82,18.29,0.000075,,,,,,MORIGRO BASE,,0.29,0.289925
|
| 96 |
+
,3838.07,92.09,63.31,38.71,185.39,18.29,0.000069,,,,,,MORIGRO BASE,,0.2933,0.293231
|
| 97 |
+
,3943.6,94.41,55.48,38.91,182.92,17.89,0.000009,,,,,,MORIGRO BASE,,0.2967,0.296709
|
| 98 |
+
,3754.46,94.68,60.21,41.18,185.66,18.26,0.000057,,,,,,MORIGRO BASE,,0.3,0.300057
|
| 99 |
+
,4006.48,93.89,60.87,43.27,188.25,18.27,0.00003,,,,,,MORIGRO BASE,,0.3033,0.30333
|
| 100 |
+
,3887.14,92.38,59.76,42.61,185.05,18.28,0.000068,,,,,,MORIGRO BASE,,0.3067,0.306632
|
| 101 |
+
,3803.81,92.58,57.93,44.17,187.97,17.76,0.000048,,,,,,MORIGRO BASE,,0.31,0.310048
|
| 102 |
+
,3776.87,96.95,63.09,44.77,187.02,18.25,0.000063,,,,,,MORIGRO BASE,,0.3133,0.313237
|
| 103 |
+
,3814.03,96.55,58.76,35.79,186.12,18.23,0.000011,,,,,,MORIGRO BASE,,0.3167,0.316711
|
| 104 |
+
,3824.88,94.02,57.86,41.7,188.3,17.58,0.000003,,,,,,MORIGRO BASE,,0.32,0.320003
|
| 105 |
+
,3864.91,94.68,60.99,44.18,188.13,17.39,0.000034,,,,,,MORIGRO BASE,,0.3233,0.323334
|
| 106 |
+
,3860.49,97.0,61.18,44.95,186.93,18.15,0.000038,,,,,,MORIGRO BASE,,0.3267,0.326662
|
| 107 |
+
,4014.13,92.26,61.19,43.86,188.73,17.72,0.000002,,,,,,MORIGRO BASE,,0.33,0.329998
|
| 108 |
+
,4008.89,94.22,64.65,43.83,186.2,16.94,0.000038,,,,,,MORIGRO BASE,,0.3333,0.333262
|
| 109 |
+
,4061.17,93.44,61.93,45.06,187.11,17.9,0.000015,,,,,,MORIGRO BASE,,0.3367,0.336715
|
| 110 |
+
,3849.15,94.57,62.98,38.43,188.74,17.77,0.000036,,,,,,MORIGRO BASE,,0.34,0.339964
|
| 111 |
+
,4053.06,93.45,58.44,40.78,186.31,17.88,0.000059,,,,,,MORIGRO BASE,,0.3433,0.343241
|
| 112 |
+
,4049.82,93.53,60.48,41.4,186.96,17.48,0.000028,,,,,,MORIGRO BASE,,0.3467,0.346728
|
| 113 |
+
,3942.5,93.26,60.25,44.66,186.89,17.59,0.000084,,,,,,MORIGRO BASE,,0.35,0.349916
|
| 114 |
+
,3662.153746,92.87885955,56.44008445,37.16931959,182.1015547,18.23756242,1.50828E-05,,,,,,MORIGRO BASE,,0.278,0.277984917
|
| 115 |
+
,3583.481721,94.61536745,47.72407145,33.93406928,184.1877979,18.15319544,9.45765E-05,,,,,,MORIGRO BASE,,0.22,0.220094576
|
| 116 |
+
,4004.1,95.82,53.85,17.76,173.9,18.84,0.000001,,,,,,BMR BASE,,0.2667,0.266701
|
| 117 |
+
,3889.04,92.86,56.08,16.61,176.49,18.39,0.000064,,,,,,BMR BASE,,0.27,0.270064
|
| 118 |
+
,3977.01,94.89,50.45,17.26,172.84,17.88,0.000028,,,,,,BMR BASE,,0.2733,0.273328
|
| 119 |
+
,3975.74,96.97,58.0,20.65,176.81,17.91,0.000046,,,,,,BMR BASE,,0.2767,0.276654
|
| 120 |
+
,4118.13,97.06,59.87,21.46,179.1,17.95,0.000034,,,,,,BMR BASE,,0.28,0.279966
|
| 121 |
+
,4036.57,94.61,57.52,22.4,175.41,17.96,0.000047,,,,,,BMR BASE,,0.2833,0.283253
|
| 122 |
+
,3957.07,94.96,59.03,25.83,180.33,18.06,0.000019,,,,,,BMR BASE,,0.2867,0.286719
|
| 123 |
+
,4012.08,95.13,55.6,23.87,173.89,18.25,0.000079,,,,,,BMR BASE,,0.29,0.290079
|
| 124 |
+
,4006.01,97.24,59.59,26.92,182.22,18.13,0.000009,,,,,,BMR BASE,,0.2933,0.293291
|
| 125 |
+
,4095.45,94.26,57.81,22.97,179.89,18.12,0.000078,,,,,,BMR BASE,,0.2967,0.296778
|
| 126 |
+
,4118.56,95.79,53.04,24.09,182.91,17.94,0.000025,,,,,,BMR BASE,,0.3,0.299975
|
| 127 |
+
,4173.75,93.33,46.73,25.32,180.23,17.22,0.000026,,,,,,BMR BASE,,0.3033,0.303326
|
| 128 |
+
,4261.18,93.59,48.9,25.45,179.85,17.07,0.000032,,,,,,BMR BASE,,0.3067,0.306668
|
| 129 |
+
,3859.52,94.76,51.81,23.39,181.66,17.29,0.000027,,,,,,BMR BASE,,0.31,0.310027
|
| 130 |
+
,3980.3,94.28,51.82,24.51,180.67,17.15,0.000088,,,,,,BMR BASE,,0.3133,0.313212
|
| 131 |
+
,4016.03,93.24,55.74,29.26,186.03,18.0,0.000033,,,,,,BMR BASE,,0.3167,0.316667
|
| 132 |
+
,4130.35,93.03,52.02,28.41,180.54,17.21,0.000099,,,,,,BMR BASE,,0.32,0.319901
|
| 133 |
+
,4060.71,95.76,53.21,31.61,190.97,17.24,0.000055,,,,,,BMR BASE,,0.3233,0.323355
|
| 134 |
+
,4100.03,95.47,59.71,28.84,181.87,17.08,0.000005,,,,,,BMR BASE,,0.3267,0.326695
|
| 135 |
+
,4076.16,98.89,58.05,26.81,185.79,17.13,0.000095,,,,,,BMR BASE,,0.33,0.329905
|
| 136 |
+
,4218.2,94.07,53.28,29.11,187.83,17.04,0.00003,,,,,,BMR BASE,,0.3333,0.33333
|
| 137 |
+
,4083.73,96.28,52.63,27.47,189.31,16.92,0.000009,,,,,,BMR BASE,,0.3367,0.336709
|
| 138 |
+
,4131.45,92.68,52.91,29.2,187.46,17.19,0.000004,,,,,,BMR BASE,,0.34,0.339996
|
| 139 |
+
,4273.64,92.39,52.52,32.09,188.33,17.57,0.000054,,,,,,BMR BASE,,0.3433,0.343354
|
| 140 |
+
,4173.31,94.49,53.5,27.82,191.52,16.45,0.000034,,,,,,BMR BASE,,0.3467,0.346734
|
| 141 |
+
,4037.2,97.81,57.66,30.44,186.81,17.41,0.000089,,,,,,BMR BASE,,0.35,0.350089
|
| 142 |
+
0.0,3954.9311079077775,94.45976055285084,48.6254940047852,19.32750786639565,172.50786350143548,18.96226889703589,5.821013450624246e-05,0.0216394552067815,100.0,menengah,0.0,0.2689417898654938,BMR BASE,,0.269,
|
| 143 |
+
1.0,4208.694512984114,92.90771191305322,56.013646682243845,28.699410878647587,190.6548727013951,16.751303269434544,1.3470649717906014e-07,3.741847143862782e-05,38.0,tinggi,0.0,0.3599998652935028,BMR BASE,,0.36,
|
| 144 |
+
1.0,3905.2788959322415,94.51705193020004,44.83638640133726,20.8867494655602,174.5534716840486,18.29888208092669,0.07266210317611693,36.33105158805847,100.0,rendah,0.0052797812379766,0.2726621031761169,CKR BASE,,0.2,
|
| 145 |
+
1.0,3958.483778501467,93.25064970149444,53.72320503170738,31.112422878232174,174.36778855806637,18.497168825666424,0.0020924067497253285,0.789587452726539,52.0,menengah,4.378166006296113e-06,0.26709240674972534,CKR BASE,,0.265,
|
Inverse_Model.py
ADDED
|
@@ -0,0 +1,440 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Inverse_Model.py
|
| 2 |
+
import numpy as np
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import joblib
|
| 5 |
+
from scipy.optimize import differential_evolution
|
| 6 |
+
|
| 7 |
+
# =========================================================
|
| 8 |
+
# KONFIGURASI GLOBAL (TIDAK ADA STREAMLIT DI FILE INI)
|
| 9 |
+
# =========================================================
|
| 10 |
+
|
| 11 |
+
# List produk yang digunakan untuk inverse model dan dapat juga di-import ke Dashboard
|
| 12 |
+
AVAILABLE_PRODUCTS = ["BMR BASE", "CKP BASE", "CKR BASE", "CMR BASE", "MORIGRO BASE"]
|
| 13 |
+
|
| 14 |
+
# Batas global parameter (dipakai di perhitungan bounds)
|
| 15 |
+
PARAMS_BOUNDS = {
|
| 16 |
+
"D101330TT": (92, 99),
|
| 17 |
+
"D102260TIC_CV": (35, 80),
|
| 18 |
+
"D102265TIC_PV": (160, 195),
|
| 19 |
+
"D102265TIC_CV": (10, 70),
|
| 20 |
+
"D102266TIC": (15, 22),
|
| 21 |
+
"D101264FTSCL": (3300, 4900),
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
# Konfigurasi per-produk: GAS range, korelasi, dan BINNING_DATA
|
| 25 |
+
PRODUCT_CONFIG = {
|
| 26 |
+
"CKR BASE": {
|
| 27 |
+
"gas_min": 0.20,
|
| 28 |
+
"gas_max": 0.35,
|
| 29 |
+
"param_corr": {
|
| 30 |
+
"D101330TT": "negatif",
|
| 31 |
+
"D102260TIC_CV": "positif",
|
| 32 |
+
"D102265TIC_PV": "positif",
|
| 33 |
+
"D102265TIC_CV": "positif",
|
| 34 |
+
"D102266TIC": "netral",
|
| 35 |
+
"D101264FTSCL": "positif",
|
| 36 |
+
},
|
| 37 |
+
"binning": {
|
| 38 |
+
(0.20, 0.275): {
|
| 39 |
+
"D101330TT": (92.01, 95.14),
|
| 40 |
+
"D102260TIC_CV": (44.0, 70.0),
|
| 41 |
+
"D102265TIC_PV": (173.96, 193.58),
|
| 42 |
+
"D102265TIC_CV": (19.68, 52.23),
|
| 43 |
+
"D102266TIC": (17.42, 18.59),
|
| 44 |
+
"D101264FTSCL": (3710.76, 4690.91),
|
| 45 |
+
},
|
| 46 |
+
(0.275, 0.35): {
|
| 47 |
+
"D101330TT": (92.01, 98.31),
|
| 48 |
+
"D102260TIC_CV": (38.0, 68.0),
|
| 49 |
+
"D102265TIC_PV": (171.59, 194.97),
|
| 50 |
+
"D102265TIC_CV": (15.29, 63.62),
|
| 51 |
+
"D102266TIC": (15.94, 19.59),
|
| 52 |
+
"D101264FTSCL": (3496.96, 4888.82),
|
| 53 |
+
},
|
| 54 |
+
},
|
| 55 |
+
},
|
| 56 |
+
"BMR BASE": {
|
| 57 |
+
"gas_min": 0.20,
|
| 58 |
+
"gas_max": 0.375,
|
| 59 |
+
"param_corr": {
|
| 60 |
+
"D101330TT": "netral",
|
| 61 |
+
"D102260TIC_CV": "positif",
|
| 62 |
+
"D102265TIC_PV": "positif",
|
| 63 |
+
"D102265TIC_CV": "positif",
|
| 64 |
+
"D102266TIC": "netral",
|
| 65 |
+
"D101264FTSCL": "positif",
|
| 66 |
+
},
|
| 67 |
+
"binning": {
|
| 68 |
+
(0.20, 0.275): {
|
| 69 |
+
"D101330TT": (92.62, 97.05),
|
| 70 |
+
"D102260TIC_CV": (38, 62),
|
| 71 |
+
"D102265TIC_PV": (171.64, 190.05),
|
| 72 |
+
"D102265TIC_CV": (14.47, 24.46),
|
| 73 |
+
"D102266TIC": (17.01, 18.98),
|
| 74 |
+
"D101264FTSCL": (3633.08, 4125.52),
|
| 75 |
+
},
|
| 76 |
+
(0.275, 0.375): {
|
| 77 |
+
"D101330TT": (92.23, 98.96),
|
| 78 |
+
"D102260TIC_CV": (36.0, 60.0),
|
| 79 |
+
"D102265TIC_PV": (171.64, 192.53),
|
| 80 |
+
"D102265TIC_CV": (11.75, 33.91),
|
| 81 |
+
"D102266TIC": (16.16, 18.43),
|
| 82 |
+
"D101264FTSCL": (3535.08, 4283.65),
|
| 83 |
+
},
|
| 84 |
+
},
|
| 85 |
+
},
|
| 86 |
+
"CKP BASE": {
|
| 87 |
+
"gas_min": 0.18,
|
| 88 |
+
"gas_max": 0.375,
|
| 89 |
+
"param_corr": {
|
| 90 |
+
"D101330TT": "netral",
|
| 91 |
+
"D102260TIC_CV": "positif",
|
| 92 |
+
"D102265TIC_PV": "positif",
|
| 93 |
+
"D102265TIC_CV": "positif",
|
| 94 |
+
"D102266TIC": "netral",
|
| 95 |
+
"D101264FTSCL": "positif",
|
| 96 |
+
},
|
| 97 |
+
"binning": {
|
| 98 |
+
(0.18, 0.28): {
|
| 99 |
+
"D101330TT": (92.01, 98.83),
|
| 100 |
+
"D102260TIC_CV": (36, 68),
|
| 101 |
+
"D102265TIC_PV": (168.11, 194.97),
|
| 102 |
+
"D102265TIC_CV": (13.99, 49.36),
|
| 103 |
+
"D102266TIC": (15.83, 18.84),
|
| 104 |
+
"D101264FTSCL": (3632.62, 4890.58),
|
| 105 |
+
},
|
| 106 |
+
(0.28, 0.38): {
|
| 107 |
+
"D101330TT": (92.01, 99.00),
|
| 108 |
+
"D102260TIC_CV": (38, 68),
|
| 109 |
+
"D102265TIC_PV": (169.50, 194.97),
|
| 110 |
+
"D102265TIC_CV": (13.93, 49.36),
|
| 111 |
+
"D102266TIC": (15.86, 19.02),
|
| 112 |
+
"D101264FTSCL": (3658.91, 4890.58),
|
| 113 |
+
},
|
| 114 |
+
},
|
| 115 |
+
},
|
| 116 |
+
"CMR BASE": {
|
| 117 |
+
"gas_min": 0.19,
|
| 118 |
+
"gas_max": 0.375,
|
| 119 |
+
"param_corr": {
|
| 120 |
+
"D101330TT": "netral",
|
| 121 |
+
"D102260TIC_CV": "positif",
|
| 122 |
+
"D102265TIC_PV": "positif",
|
| 123 |
+
"D102265TIC_CV": "positif",
|
| 124 |
+
"D102266TIC": "netral",
|
| 125 |
+
"D101264FTSCL": "positif",
|
| 126 |
+
},
|
| 127 |
+
"binning": {
|
| 128 |
+
(0.19, 0.275): {
|
| 129 |
+
"D101264FTSCL": (3618.73, 4539.96),
|
| 130 |
+
"D101330TT": (92.1, 98.91),
|
| 131 |
+
"D102260TIC_CV": (38, 62),
|
| 132 |
+
"D102265TIC_CV": (15.3, 26.01),
|
| 133 |
+
"D102265TIC_PV": (163.14, 192.25),
|
| 134 |
+
"D102266TIC": (16.35, 19.55),
|
| 135 |
+
},
|
| 136 |
+
(0.275, 0.375): {
|
| 137 |
+
"D101264FTSCL": (3445.31, 4684.92),
|
| 138 |
+
"D101330TT": (92.06, 99.0),
|
| 139 |
+
"D102260TIC_CV": (36, 64),
|
| 140 |
+
"D102265TIC_CV": (14.75, 39.87),
|
| 141 |
+
"D102265TIC_PV": (162.09, 191.96),
|
| 142 |
+
"D102266TIC": (16.2, 19.55),
|
| 143 |
+
},
|
| 144 |
+
},
|
| 145 |
+
},
|
| 146 |
+
"MORIGRO BASE": {
|
| 147 |
+
"gas_min": 0.12,
|
| 148 |
+
"gas_max": 0.375,
|
| 149 |
+
"param_corr": {
|
| 150 |
+
"D101330TT": "netral",
|
| 151 |
+
"D102260TIC_CV": "positif",
|
| 152 |
+
"D102265TIC_PV": "positif",
|
| 153 |
+
"D102265TIC_CV": "positif",
|
| 154 |
+
"D102266TIC": "netral",
|
| 155 |
+
"D101264FTSCL": "positif",
|
| 156 |
+
},
|
| 157 |
+
"binning": {
|
| 158 |
+
(0.12, 0.28): {
|
| 159 |
+
"D101264FTSCL": (3437.81, 3922.18),
|
| 160 |
+
"D101330TT": (92.01, 98.78),
|
| 161 |
+
"D102260TIC_CV": (36, 70),
|
| 162 |
+
"D102265TIC_CV": (20.0, 42.95),
|
| 163 |
+
"D102265TIC_PV": (179.98, 194.97),
|
| 164 |
+
"D102266TIC": (17.3, 18.32),
|
| 165 |
+
},
|
| 166 |
+
(0.28, 0.375): {
|
| 167 |
+
"D101264FTSCL": (3389.88, 4072.64),
|
| 168 |
+
"D101330TT": (92.01, 97.27),
|
| 169 |
+
"D102260TIC_CV": (38, 66),
|
| 170 |
+
"D102265TIC_CV": (19.65, 45.87),
|
| 171 |
+
"D102265TIC_PV": (180.32, 189.0),
|
| 172 |
+
"D102266TIC": (16.91, 18.32),
|
| 173 |
+
},
|
| 174 |
+
},
|
| 175 |
+
},
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
# =========================================================
|
| 179 |
+
# FUNGSI MODEL & OPTIMISASI (BACKEND)
|
| 180 |
+
# =========================================================
|
| 181 |
+
|
| 182 |
+
def load_model(model_path: str):
|
| 183 |
+
"""Load forward model XGBoost + poly_transformer dari checkpoint."""
|
| 184 |
+
deployment_bundle = joblib.load(model_path)
|
| 185 |
+
return (
|
| 186 |
+
deployment_bundle["model"],
|
| 187 |
+
deployment_bundle["poly_transformer"],
|
| 188 |
+
deployment_bundle["input_features"],
|
| 189 |
+
deployment_bundle["poly_feature_names"],
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
def predict_mmbtu(params_array, model, poly_transformer, input_features, poly_feature_names):
|
| 193 |
+
"""Prediksi GAS_MMBTU dari array parameter."""
|
| 194 |
+
params_dict = dict(zip(input_features, params_array))
|
| 195 |
+
X = pd.DataFrame([params_dict])[input_features]
|
| 196 |
+
X_poly = poly_transformer.transform(X)
|
| 197 |
+
X_poly_df = pd.DataFrame(X_poly, columns=poly_feature_names)
|
| 198 |
+
return float(model.predict(X_poly_df)[0])
|
| 199 |
+
|
| 200 |
+
def get_operational_bounds(target_mmbtu: float, binning: dict):
|
| 201 |
+
"""
|
| 202 |
+
Ambil bounds operasional dari BINNING_DATA terdekat.
|
| 203 |
+
- Jika target di dalam salah satu bin => pakai bin itu
|
| 204 |
+
- Jika di bawah minimum => pakai bin pertama
|
| 205 |
+
- Jika di atas maksimum => pakai bin terakhir
|
| 206 |
+
"""
|
| 207 |
+
bins_sorted = sorted(binning.keys(), key=lambda x: x[0]) # sort by lower bound
|
| 208 |
+
|
| 209 |
+
for (lo, hi) in bins_sorted:
|
| 210 |
+
if lo <= target_mmbtu <= hi:
|
| 211 |
+
return binning[(lo, hi)]
|
| 212 |
+
|
| 213 |
+
# fallback
|
| 214 |
+
if target_mmbtu < bins_sorted[0][0]:
|
| 215 |
+
return binning[bins_sorted[0]]
|
| 216 |
+
else:
|
| 217 |
+
return binning[bins_sorted[-1]]
|
| 218 |
+
|
| 219 |
+
def calculate_bounds(target_mmbtu, input_features, product_cfg):
|
| 220 |
+
"""
|
| 221 |
+
Hitung hard_bounds & soft_bounds untuk satu produk dan satu target MMBTU.
|
| 222 |
+
"""
|
| 223 |
+
gas_min = product_cfg["gas_min"]
|
| 224 |
+
gas_max = product_cfg["gas_max"]
|
| 225 |
+
binning = product_cfg["binning"]
|
| 226 |
+
param_corr = product_cfg["param_corr"]
|
| 227 |
+
|
| 228 |
+
# Step 1: SP_target dan level
|
| 229 |
+
gas_range = gas_max - gas_min
|
| 230 |
+
sp_target = (target_mmbtu - gas_min) / gas_range
|
| 231 |
+
sp_target = float(np.clip(sp_target, 0, 1))
|
| 232 |
+
sp_inverse = 1.0 - sp_target
|
| 233 |
+
|
| 234 |
+
if sp_target < 0.33:
|
| 235 |
+
level = "rendah"
|
| 236 |
+
elif sp_target < 0.67:
|
| 237 |
+
level = "menengah"
|
| 238 |
+
else:
|
| 239 |
+
level = "tinggi"
|
| 240 |
+
|
| 241 |
+
# Step 2: hard bounds dari binning
|
| 242 |
+
operational_bounds = get_operational_bounds(target_mmbtu, binning)
|
| 243 |
+
hard_bounds = {}
|
| 244 |
+
soft_bounds = {}
|
| 245 |
+
|
| 246 |
+
# Step 3: soft bounds per parameter
|
| 247 |
+
for param in input_features:
|
| 248 |
+
keras_min, keras_max = operational_bounds[param]
|
| 249 |
+
keras_range = keras_max - keras_min
|
| 250 |
+
hard_bounds[param] = (keras_min, keras_max)
|
| 251 |
+
|
| 252 |
+
korelasi = param_corr.get(param, "netral")
|
| 253 |
+
|
| 254 |
+
# SP relevan tergantung korelasi
|
| 255 |
+
sp = sp_inverse if korelasi == "negatif" else sp_target
|
| 256 |
+
|
| 257 |
+
# Target ideal global dari global bounds
|
| 258 |
+
min_global, max_global = PARAMS_BOUNDS[param]
|
| 259 |
+
range_global = max_global - min_global
|
| 260 |
+
target_ideal_global = min_global + (sp * range_global)
|
| 261 |
+
|
| 262 |
+
# Jika target ideal global masih within bin => pakai
|
| 263 |
+
if keras_min <= target_ideal_global <= keras_max:
|
| 264 |
+
target_ideal = target_ideal_global
|
| 265 |
+
else:
|
| 266 |
+
target_ideal = keras_min + (sp * keras_range)
|
| 267 |
+
|
| 268 |
+
buffer = 0.2 * keras_range # 20% dari range
|
| 269 |
+
|
| 270 |
+
if korelasi == "netral":
|
| 271 |
+
ideal_min = keras_min
|
| 272 |
+
ideal_max = keras_max
|
| 273 |
+
elif level == "rendah":
|
| 274 |
+
if korelasi == "positif":
|
| 275 |
+
ideal_min = keras_min
|
| 276 |
+
ideal_max = target_ideal + buffer
|
| 277 |
+
else: # negatif
|
| 278 |
+
ideal_min = target_ideal - buffer
|
| 279 |
+
ideal_max = keras_max
|
| 280 |
+
elif level == "menengah":
|
| 281 |
+
ideal_min = target_ideal - buffer
|
| 282 |
+
ideal_max = target_ideal + buffer
|
| 283 |
+
else: # tinggi
|
| 284 |
+
if korelasi == "positif":
|
| 285 |
+
ideal_min = target_ideal - buffer
|
| 286 |
+
ideal_max = keras_max
|
| 287 |
+
else: # negatif
|
| 288 |
+
ideal_min = keras_min
|
| 289 |
+
ideal_max = target_ideal + buffer
|
| 290 |
+
|
| 291 |
+
ideal_min = max(ideal_min, keras_min)
|
| 292 |
+
ideal_max = min(ideal_max, keras_max)
|
| 293 |
+
soft_bounds[param] = (ideal_min, ideal_max)
|
| 294 |
+
|
| 295 |
+
return hard_bounds, soft_bounds, level
|
| 296 |
+
|
| 297 |
+
def objective_function(
|
| 298 |
+
params_array,
|
| 299 |
+
target_mmbtu,
|
| 300 |
+
model,
|
| 301 |
+
poly_transformer,
|
| 302 |
+
input_features,
|
| 303 |
+
poly_feature_names,
|
| 304 |
+
hard_bounds,
|
| 305 |
+
soft_bounds,
|
| 306 |
+
):
|
| 307 |
+
"""Fungsi objektif untuk Differential Evolution."""
|
| 308 |
+
prediction = predict_mmbtu(params_array, model, poly_transformer, input_features, poly_feature_names)
|
| 309 |
+
error_pred = (prediction - target_mmbtu) ** 2
|
| 310 |
+
|
| 311 |
+
total_penalty = 0.0
|
| 312 |
+
for i, param in enumerate(input_features):
|
| 313 |
+
value = params_array[i]
|
| 314 |
+
ideal_min, ideal_max = soft_bounds[param]
|
| 315 |
+
keras_min, keras_max = hard_bounds[param]
|
| 316 |
+
param_range = keras_max - keras_min
|
| 317 |
+
|
| 318 |
+
violation = 0.0
|
| 319 |
+
if value < ideal_min:
|
| 320 |
+
violation = ideal_min - value
|
| 321 |
+
elif value > ideal_max:
|
| 322 |
+
violation = value - ideal_max
|
| 323 |
+
|
| 324 |
+
if param_range > 0:
|
| 325 |
+
total_penalty += (violation / param_range)
|
| 326 |
+
|
| 327 |
+
ERROR_THRESHOLD = 1e-4
|
| 328 |
+
if abs(prediction - target_mmbtu) < ERROR_THRESHOLD:
|
| 329 |
+
return total_penalty
|
| 330 |
+
else:
|
| 331 |
+
return error_pred + total_penalty
|
| 332 |
+
|
| 333 |
+
def optimize_one_target(
|
| 334 |
+
target_mmbtu,
|
| 335 |
+
model,
|
| 336 |
+
poly_transformer,
|
| 337 |
+
input_features,
|
| 338 |
+
poly_feature_names,
|
| 339 |
+
product_cfg,
|
| 340 |
+
maxiter=100,
|
| 341 |
+
popsize=30,
|
| 342 |
+
):
|
| 343 |
+
"""Optimasi inverse model untuk satu nilai target MMBTU."""
|
| 344 |
+
hard_bounds, soft_bounds, level = calculate_bounds(target_mmbtu, input_features, product_cfg)
|
| 345 |
+
optimizer_bounds = [hard_bounds[param] for param in input_features]
|
| 346 |
+
|
| 347 |
+
def obj_wrapper(params_array):
|
| 348 |
+
return objective_function(
|
| 349 |
+
params_array,
|
| 350 |
+
target_mmbtu,
|
| 351 |
+
model,
|
| 352 |
+
poly_transformer,
|
| 353 |
+
input_features,
|
| 354 |
+
poly_feature_names,
|
| 355 |
+
hard_bounds,
|
| 356 |
+
soft_bounds,
|
| 357 |
+
)
|
| 358 |
+
|
| 359 |
+
result = differential_evolution(
|
| 360 |
+
func=obj_wrapper,
|
| 361 |
+
bounds=optimizer_bounds,
|
| 362 |
+
strategy="best1bin",
|
| 363 |
+
maxiter=maxiter,
|
| 364 |
+
popsize=popsize,
|
| 365 |
+
tol=1e-4,
|
| 366 |
+
mutation=(0.5, 1),
|
| 367 |
+
recombination=0.7,
|
| 368 |
+
seed=42,
|
| 369 |
+
polish=True,
|
| 370 |
+
atol=1e-6,
|
| 371 |
+
disp=False,
|
| 372 |
+
)
|
| 373 |
+
|
| 374 |
+
optimal_params = dict(zip(input_features, result.x))
|
| 375 |
+
final_pred = predict_mmbtu(result.x, model, poly_transformer, input_features, poly_feature_names)
|
| 376 |
+
|
| 377 |
+
violations = []
|
| 378 |
+
for param, value in optimal_params.items():
|
| 379 |
+
ideal_min, ideal_max = soft_bounds[param]
|
| 380 |
+
if not (ideal_min <= value <= ideal_max):
|
| 381 |
+
violations.append(param)
|
| 382 |
+
|
| 383 |
+
return {
|
| 384 |
+
"target": float(target_mmbtu),
|
| 385 |
+
"level": level,
|
| 386 |
+
"optimal_params": optimal_params,
|
| 387 |
+
"prediction": float(final_pred),
|
| 388 |
+
"error": abs(final_pred - target_mmbtu),
|
| 389 |
+
"error_pct": abs(final_pred - target_mmbtu) / target_mmbtu * 100.0,
|
| 390 |
+
"objective_value": float(result.fun),
|
| 391 |
+
"converged": bool(result.success),
|
| 392 |
+
"iterations": int(result.nit),
|
| 393 |
+
"soft_violations": violations,
|
| 394 |
+
"hard_bounds": hard_bounds,
|
| 395 |
+
"soft_bounds": soft_bounds,
|
| 396 |
+
}
|
| 397 |
+
|
| 398 |
+
def run_inverse_for_targets(model_path, product_name, targets):
|
| 399 |
+
"""
|
| 400 |
+
Wrapper: load model dan jalankan optimasi untuk list target.
|
| 401 |
+
Dipanggil dari Dashboard.
|
| 402 |
+
"""
|
| 403 |
+
product_cfg = PRODUCT_CONFIG[product_name]
|
| 404 |
+
model, poly_transformer, input_features, poly_feature_names = load_model(model_path)
|
| 405 |
+
|
| 406 |
+
results = []
|
| 407 |
+
for t in targets:
|
| 408 |
+
res = optimize_one_target(
|
| 409 |
+
target_mmbtu=t,
|
| 410 |
+
model=model,
|
| 411 |
+
poly_transformer=poly_transformer,
|
| 412 |
+
input_features=input_features,
|
| 413 |
+
poly_feature_names=poly_feature_names,
|
| 414 |
+
product_cfg=product_cfg,
|
| 415 |
+
maxiter=100,
|
| 416 |
+
popsize=30,
|
| 417 |
+
)
|
| 418 |
+
results.append(res)
|
| 419 |
+
return results
|
| 420 |
+
|
| 421 |
+
def results_to_dataframe(results, product_name):
|
| 422 |
+
"""Convert list of result dicts menjadi DataFrame flat untuk ditampilkan / disimpan."""
|
| 423 |
+
rows = []
|
| 424 |
+
for r in results:
|
| 425 |
+
base = {
|
| 426 |
+
"Product": product_name,
|
| 427 |
+
"Target_MMBTU": r["target"],
|
| 428 |
+
"Level": r["level"],
|
| 429 |
+
"Predicted_MMBTU": r["prediction"],
|
| 430 |
+
"Error": r["error"],
|
| 431 |
+
"Error_Pct": r["error_pct"],
|
| 432 |
+
"Objective_Value": r["objective_value"],
|
| 433 |
+
"Converged": r["converged"],
|
| 434 |
+
"Iterations": r["iterations"],
|
| 435 |
+
"Soft_Violations": ", ".join(r["soft_violations"]) if r["soft_violations"] else "",
|
| 436 |
+
}
|
| 437 |
+
for param, value in r["optimal_params"].items():
|
| 438 |
+
base[param] = value
|
| 439 |
+
rows.append(base)
|
| 440 |
+
return pd.DataFrame(rows)
|
MonitoringModel.py
ADDED
|
@@ -0,0 +1,245 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
import joblib
|
| 4 |
+
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
|
| 5 |
+
import matplotlib.pyplot as plt
|
| 6 |
+
import seaborn as sns
|
| 7 |
+
import os
|
| 8 |
+
|
| 9 |
+
# =========================================================
|
| 10 |
+
# KONFIGURASI GLOBAL (tetap)
|
| 11 |
+
# =========================================================
|
| 12 |
+
DATA_FILENAME = r'C:\Dokumen\One To Many_17_10_2025\MMBTU\DASHBOARD\One To Many\disagregasi_data_spraydryer_terbaru_10_17_2025.csv'
|
| 13 |
+
MODEL_FOLDER = r'C:\Dokumen\One To Many_17_10_2025\MMBTU\DASHBOARD\One To Many\MODEL CHECKPOINT FOR INVERSE MODEL'
|
| 14 |
+
TARGET_COLUMN = 'GAS_MMBTU_Disaggregated'
|
| 15 |
+
|
| 16 |
+
PRODUCT_LIST = [
|
| 17 |
+
'BMR BASE',
|
| 18 |
+
'CKP BASE',
|
| 19 |
+
'CKR BASE',
|
| 20 |
+
'CMR BASE',
|
| 21 |
+
'MORIGRO BASE'
|
| 22 |
+
]
|
| 23 |
+
|
| 24 |
+
FEATURES = [
|
| 25 |
+
'D101330TT',
|
| 26 |
+
'D102260TIC_CV',
|
| 27 |
+
'D102265TIC_PV',
|
| 28 |
+
'D102265TIC_CV',
|
| 29 |
+
'D102266TIC',
|
| 30 |
+
'D101264FTSCL'
|
| 31 |
+
]
|
| 32 |
+
|
| 33 |
+
PREDICTION_COLUMN = 'Prediksi_Gas'
|
| 34 |
+
MODEL_FILENAME_TEMPLATE = 'model_checkpoint_xgb_{}.joblib'
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
# =========================================================
|
| 38 |
+
# FUNGSI UTILITAS (tetap)
|
| 39 |
+
# =========================================================
|
| 40 |
+
def calculate_metrics(y_true, y_pred):
|
| 41 |
+
"""Menghitung R2, RMSE, dan MAE."""
|
| 42 |
+
r2 = r2_score(y_true, y_pred)
|
| 43 |
+
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
|
| 44 |
+
mae = mean_absolute_error(y_true, y_pred)
|
| 45 |
+
return r2, rmse, mae
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def _load_model_for_product(model_dir, product):
|
| 49 |
+
"""Load model XGBoost + poly_transformer untuk satu produk."""
|
| 50 |
+
model_path = os.path.join(model_dir, MODEL_FILENAME_TEMPLATE.format(product))
|
| 51 |
+
if not os.path.exists(model_path):
|
| 52 |
+
raise FileNotFoundError(f"File model tidak ditemukan: {model_path}")
|
| 53 |
+
|
| 54 |
+
deployment_bundle = joblib.load(model_path)
|
| 55 |
+
|
| 56 |
+
model = deployment_bundle.get('model')
|
| 57 |
+
poly_transformer = deployment_bundle.get('poly_transformer')
|
| 58 |
+
poly_feature_names = deployment_bundle.get('poly_feature_names')
|
| 59 |
+
|
| 60 |
+
if model is None or poly_transformer is None or poly_feature_names is None:
|
| 61 |
+
raise KeyError(
|
| 62 |
+
"Bundle model tidak lengkap. Pastikan berisi "
|
| 63 |
+
"'model', 'poly_transformer', dan 'poly_feature_names'."
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
return model, poly_transformer, poly_feature_names
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
# =========================================================
|
| 70 |
+
# FUNGSI UTAMA UNTUK DASHBOARD (PERBAIKAN)
|
| 71 |
+
# =========================================================
|
| 72 |
+
def evaluate_models_for_dashboard(
|
| 73 |
+
data_path: str = DATA_FILENAME,
|
| 74 |
+
model_dir: str = MODEL_FOLDER,
|
| 75 |
+
products: list = None,
|
| 76 |
+
features: list = None,
|
| 77 |
+
target_col: str = TARGET_COLUMN,
|
| 78 |
+
data_df=None, # <--- NEW: bisa kirim DataFrame langsung dari Streamlit
|
| 79 |
+
):
|
| 80 |
+
"""
|
| 81 |
+
Fungsi utama yang melakukan evaluasi performa.
|
| 82 |
+
Mengembalikan:
|
| 83 |
+
- summary_df: DataFrame berisi [Product, R², RMSE, MAE]
|
| 84 |
+
- product_figs: dict {product_name: matplotlib.figure.Figure}
|
| 85 |
+
|
| 86 |
+
Prioritas data:
|
| 87 |
+
1) Jika data_df tidak None -> gunakan data_df (upload dari Streamlit)
|
| 88 |
+
2) Jika data_df None -> baca dari data_path (CSV default)
|
| 89 |
+
"""
|
| 90 |
+
if products is None:
|
| 91 |
+
products = PRODUCT_LIST
|
| 92 |
+
if features is None:
|
| 93 |
+
features = FEATURES
|
| 94 |
+
|
| 95 |
+
# --- 1. Load data ---
|
| 96 |
+
if data_df is not None:
|
| 97 |
+
# Pakai dataset yang di-upload user (sudah dalam bentuk DataFrame)
|
| 98 |
+
df = data_df.copy()
|
| 99 |
+
else:
|
| 100 |
+
# Fallback: baca dari CSV path seperti sebelumnya
|
| 101 |
+
try:
|
| 102 |
+
df = pd.read_csv(data_path)
|
| 103 |
+
except FileNotFoundError:
|
| 104 |
+
print(f"[ERROR] Data file tidak ditemukan di: {data_path}")
|
| 105 |
+
return pd.DataFrame(columns=['Product', 'R²', 'RMSE', 'MAE']), {}
|
| 106 |
+
except Exception as e:
|
| 107 |
+
print(f"[ERROR] Gagal memuat data: {e}")
|
| 108 |
+
return pd.DataFrame(columns=['Product', 'R²', 'RMSE', 'MAE']), {}
|
| 109 |
+
|
| 110 |
+
# Pastikan Date_time ada dan dalam bentuk datetime (kalau mau pakai time-series)
|
| 111 |
+
if 'Date_time' in df.columns:
|
| 112 |
+
df['Date_time'] = pd.to_datetime(df['Date_time'], errors='coerce')
|
| 113 |
+
|
| 114 |
+
summary_results = []
|
| 115 |
+
plot_data_list = []
|
| 116 |
+
|
| 117 |
+
# --- 2. Loop per produk ---
|
| 118 |
+
for product in products:
|
| 119 |
+
df_prod = df[df['Product'] == product].copy()
|
| 120 |
+
|
| 121 |
+
if df_prod.empty or len(df_prod) < 2:
|
| 122 |
+
continue
|
| 123 |
+
|
| 124 |
+
missing_features = [f for f in features if f not in df_prod.columns]
|
| 125 |
+
if missing_features:
|
| 126 |
+
print(f"[WARN] Fitur hilang untuk {product}: {missing_features}")
|
| 127 |
+
continue
|
| 128 |
+
|
| 129 |
+
if 'Date_time' in df_prod.columns:
|
| 130 |
+
df_prod = df_prod.sort_values('Date_time')
|
| 131 |
+
|
| 132 |
+
X_raw = df_prod[features]
|
| 133 |
+
y_true = df_prod[target_col]
|
| 134 |
+
|
| 135 |
+
# --- 2a. Load model produk ---
|
| 136 |
+
try:
|
| 137 |
+
model, poly_transformer, poly_feature_names = _load_model_for_product(model_dir, product)
|
| 138 |
+
except Exception as e:
|
| 139 |
+
print(f"[WARN] Gagal load model untuk {product}: {e}")
|
| 140 |
+
continue
|
| 141 |
+
|
| 142 |
+
# --- 2b. Transformasi dan prediksi ---
|
| 143 |
+
try:
|
| 144 |
+
X_transformed_np = poly_transformer.transform(X_raw)
|
| 145 |
+
X_transformed_df = pd.DataFrame(
|
| 146 |
+
X_transformed_np,
|
| 147 |
+
columns=poly_feature_names,
|
| 148 |
+
index=X_raw.index
|
| 149 |
+
)
|
| 150 |
+
y_pred = model.predict(X_transformed_df)
|
| 151 |
+
except Exception as e:
|
| 152 |
+
print(f"[WARN] Gagal transform/predict untuk {product}: {e}")
|
| 153 |
+
continue
|
| 154 |
+
|
| 155 |
+
# --- 2c. Hitung metrik ---
|
| 156 |
+
r2, rmse, mae = calculate_metrics(y_true, y_pred)
|
| 157 |
+
summary_results.append({
|
| 158 |
+
'Product': product,
|
| 159 |
+
'R²': r2,
|
| 160 |
+
'RMSE': rmse,
|
| 161 |
+
'MAE': mae
|
| 162 |
+
})
|
| 163 |
+
|
| 164 |
+
# --- 2d. Siapkan data untuk plot ---
|
| 165 |
+
plot_df = pd.DataFrame({
|
| 166 |
+
'Actual': y_true.values,
|
| 167 |
+
'Predicted': y_pred,
|
| 168 |
+
'Product': product
|
| 169 |
+
})
|
| 170 |
+
plot_data_list.append(plot_df)
|
| 171 |
+
|
| 172 |
+
# --- 3. Buat summary_df ---
|
| 173 |
+
if summary_results:
|
| 174 |
+
summary_df = pd.DataFrame(summary_results)
|
| 175 |
+
summary_df['Product'] = pd.Categorical(summary_df['Product'], categories=products, ordered=True)
|
| 176 |
+
summary_df = summary_df.sort_values('Product').reset_index(drop=True)
|
| 177 |
+
else:
|
| 178 |
+
summary_df = pd.DataFrame(columns=['Product', 'R²', 'RMSE', 'MAE'])
|
| 179 |
+
return summary_df, {}
|
| 180 |
+
|
| 181 |
+
product_figs = {}
|
| 182 |
+
|
| 183 |
+
# --- 4. Generate Figures (per produk, untuk Streamlit) ---
|
| 184 |
+
if plot_data_list:
|
| 185 |
+
all_plot_data = pd.concat(plot_data_list)
|
| 186 |
+
products_evaluated = summary_df['Product'].tolist()
|
| 187 |
+
|
| 188 |
+
sns.set_style("whitegrid")
|
| 189 |
+
|
| 190 |
+
for product in products_evaluated:
|
| 191 |
+
product_data = all_plot_data[all_plot_data['Product'] == product].dropna()
|
| 192 |
+
if product_data.empty:
|
| 193 |
+
continue
|
| 194 |
+
|
| 195 |
+
metrics = summary_df[summary_df['Product'] == product].iloc[0]
|
| 196 |
+
title = (f'{product}\n'
|
| 197 |
+
f'$R^2$: {metrics["R²"]:.3f}, '
|
| 198 |
+
f'RMSE: {metrics["RMSE"]:.3f}, '
|
| 199 |
+
f'MAE: {metrics["MAE"]:.3f}')
|
| 200 |
+
|
| 201 |
+
min_val = min(product_data['Actual'].min(), product_data['Predicted'].min())
|
| 202 |
+
max_val = max(product_data['Actual'].max(), product_data['Predicted'].max())
|
| 203 |
+
margin = (max_val - min_val) * 0.05
|
| 204 |
+
plot_range = [min_val - margin, max_val + margin]
|
| 205 |
+
|
| 206 |
+
# Figure tunggal per produk
|
| 207 |
+
fig_single = plt.figure(figsize=(8, 6))
|
| 208 |
+
ax_single = fig_single.add_subplot(111)
|
| 209 |
+
sns.scatterplot(
|
| 210 |
+
x='Actual',
|
| 211 |
+
y='Predicted',
|
| 212 |
+
data=product_data,
|
| 213 |
+
ax=ax_single,
|
| 214 |
+
alpha=0.6
|
| 215 |
+
)
|
| 216 |
+
ax_single.plot(plot_range, plot_range, 'r--', label='Ideal (Actual = Predicted)')
|
| 217 |
+
ax_single.set_xlim(plot_range)
|
| 218 |
+
ax_single.set_ylim(plot_range)
|
| 219 |
+
ax_single.set_title(title)
|
| 220 |
+
ax_single.set_xlabel(f'Actual {target_col}')
|
| 221 |
+
ax_single.set_ylabel(f'Predicted {target_col}')
|
| 222 |
+
ax_single.legend()
|
| 223 |
+
|
| 224 |
+
product_figs[product] = fig_single
|
| 225 |
+
plt.close(fig_single)
|
| 226 |
+
|
| 227 |
+
return summary_df, product_figs
|
| 228 |
+
|
| 229 |
+
# =========================================================
|
| 230 |
+
# OPSIONAL: MODE CLI (tetap)
|
| 231 |
+
# =========================================================
|
| 232 |
+
if __name__ == "__main__":
|
| 233 |
+
|
| 234 |
+
print("Memulai Evaluasi Performa Model Inverse...")
|
| 235 |
+
|
| 236 |
+
summary_df, figs = evaluate_models_for_dashboard()
|
| 237 |
+
|
| 238 |
+
print("\n" + "="*40)
|
| 239 |
+
print("=== Ringkasan Performa Model ===")
|
| 240 |
+
print("="*40)
|
| 241 |
+
|
| 242 |
+
if not summary_df.empty:
|
| 243 |
+
print(summary_df.to_markdown(index=False, floatfmt=".4f"))
|
| 244 |
+
else:
|
| 245 |
+
print("Gagal memproses data atau model. Periksa pesan error di atas.")
|
README.md
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Dashboard Prediksi Model XGBoost dan Inverse Model untuk Spray Dryer
|
| 2 |
+
|
| 3 |
+
Dashboard ini adalah aplikasi Streamlit yang dirancang untuk melakukan prediksi konsumsi gas (MMBTU) pada proses spray dryer menggunakan model XGBoost dan inverse modelling. Dashboard ini juga menyertakan fitur EDA (Exploratory Data Analysis), disagregasi data, filtering dengan rule engine, serta monitoring performa model. Aplikasi ini mendukung prediksi berdasarkan parameter proses, simulasi inverse untuk menemukan parameter optimal dari target gas, serta cleaning data untuk memastikan kualitas input.
|
| 4 |
+
|
| 5 |
+
## Cara Instalasi dan Menjalankan Dashboard Secara Lokal
|
| 6 |
+
Ikuti langkah-langkah berikut untuk menjalankan dashboard :
|
| 7 |
+
|
| 8 |
+
1. **Buka Terminal atau Command Prompt**
|
| 9 |
+
Jika menggunakan VS Code, buka terminal terintegrasi dengan `Ctrl + ` (backtick).
|
| 10 |
+
|
| 11 |
+
2. **Beralih ke Direktori Proyek**
|
| 12 |
+
Gunakan perintah berikut untuk pindah ke folder tempat proyek disimpan:
|
| 13 |
+
```
|
| 14 |
+
cd "path/ke/folder/proyek"
|
| 15 |
+
```
|
| 16 |
+
Ganti `"path/ke/folder/proyek"` dengan lokasi folder Anda (contoh: `cd "C:\Users\NamaUser\Documents\DashboardSprayDryer"`).
|
| 17 |
+
|
| 18 |
+
3. **Buat Environment Virtual Baru**
|
| 19 |
+
Buat environment virtual untuk mengisolasi dependensi:
|
| 20 |
+
```
|
| 21 |
+
python -m venv nama_env
|
| 22 |
+
```
|
| 23 |
+
Ganti `nama_env` dengan nama yang diinginkan (contoh: `python -m venv spraydryer_env`).
|
| 24 |
+
|
| 25 |
+
4. **Aktifkan Environment Virtual**
|
| 26 |
+
Aktifkan environment:
|
| 27 |
+
- Pada Windows:
|
| 28 |
+
```
|
| 29 |
+
nama_env\Scripts\activate
|
| 30 |
+
```
|
| 31 |
+
- Pada macOS/Linux:
|
| 32 |
+
```
|
| 33 |
+
source nama_env/bin/activate
|
| 34 |
+
```
|
| 35 |
+
Setelah diaktifkan, prompt terminal Anda akan menampilkan nama environment (misalnya: `(nama_env)`).
|
| 36 |
+
|
| 37 |
+
5. **Instal Dependensi**
|
| 38 |
+
Instal semua package yang diperlukan dari file `requirements.txt`:
|
| 39 |
+
```
|
| 40 |
+
pip install -r requirements.txt
|
| 41 |
+
```
|
| 42 |
+
Pastikan file `requirements.txt` ada di direktori proyek (contoh isi: `streamlit`, `pandas`, `numpy`, `xgboost`, `scikit-learn`, dll.).
|
| 43 |
+
|
| 44 |
+
6. **Jalankan Dashboard**
|
| 45 |
+
Jalankan aplikasi Streamlit dengan perintah:
|
| 46 |
+
```
|
| 47 |
+
streamlit run Dashboard.py
|
| 48 |
+
```
|
| 49 |
+
(Catatan: Jika nama file utama berbeda, sesuaikan dengan nama file Anda.)
|
| 50 |
+
|
| 51 |
+
7. **Akses Dashboard**
|
| 52 |
+
Setelah dijalankan, Streamlit akan menampilkan pesan seperti:
|
| 53 |
+
```
|
| 54 |
+
You can now view your Streamlit app in your browser.
|
| 55 |
+
Local URL: http://localhost:8501
|
| 56 |
+
Network URL: http://192.168.x.x:8501
|
| 57 |
+
```
|
| 58 |
+
Klik salah satu URL untuk membuka dashboard di browser. Jika gagal, coba akses `http://localhost:8501` secara manual.
|
| 59 |
+
|
| 60 |
+
Jika mengalami error, periksa:
|
| 61 |
+
- Versi Python yang benar.
|
| 62 |
+
- Semua dependensi terinstal (jalankan `pip list` untuk verifikasi).
|
| 63 |
+
- File model (misalnya XGBoost) dan data historis tersedia di direktori yang tepat.
|
| 64 |
+
|
| 65 |
+
## Fitur dan Halaman Dashboard
|
| 66 |
+
Dashboard ini memiliki 6 halaman utama, masing-masing dengan fungsi spesifik untuk mendukung prediksi, analisis, dan pembersihan data spray dryer. Berikut deskripsi lengkapnya:
|
| 67 |
+
|
| 68 |
+
### 1. Prediksi Gas dari 6 Parameter
|
| 69 |
+
Halaman ini menggunakan model XGBoost dengan feature engineering berbasis waktu untuk memprediksi konsumsi gas (MMBTU) dari 6 parameter proses utama:
|
| 70 |
+
- `D101330TT` (Outlet Temperature)
|
| 71 |
+
- `D102260TIC_CV` (HP Steam Damper CV)
|
| 72 |
+
- `D102265TIC_PV` (Inlet Temperature PV)
|
| 73 |
+
- `D102265TIC_CV` (LP Steam Damper CV)
|
| 74 |
+
- `D102266TIC` (Dehumidifier Temperature)
|
| 75 |
+
- `D101264FTSCL` (Flow Feed)
|
| 76 |
+
|
| 77 |
+
**Cara Penggunaan:**
|
| 78 |
+
- Input rentang nilai untuk masing-masing parameter.
|
| 79 |
+
- Klik tombol **"Prediksi Konsumsi Gas (MMBTU)"** untuk menjalankan prediksi.
|
| 80 |
+
- Opsi tambahan: Unggah file CSV untuk prediksi batch.
|
| 81 |
+
Fitur ini membutuhkan konteks data historis untuk menghitung fitur waktu.
|
| 82 |
+
|
| 83 |
+
### 2. Prediksi Parameter dari Gas (MMBTU)
|
| 84 |
+
Halaman ini terdiri dari 3 bagian utama: Inverse Model, Validasi Model, dan Simulasi Prediksi. Model yang digunakan adalah XGBoost dengan Polynomial Features untuk inverse modelling (memprediksi 6 parameter dari target gas).
|
| 85 |
+
|
| 86 |
+
**Alur Penggunaan:**
|
| 87 |
+
1. **Inverse Model:**
|
| 88 |
+
- Pilih produk
|
| 89 |
+
- Input target konsumsi gas (MMBTU) yang diinginkan.
|
| 90 |
+
- Klik **"Optimasi Parameter"** untuk mencari rekomendasi 6 parameter.
|
| 91 |
+
- Metode pencarian:
|
| 92 |
+
- Cari di file csv historis (jika tersedia).
|
| 93 |
+
- Atau jalankan algoritma Differential Evolution untuk optimasi.
|
| 94 |
+
- Hasil rekomendasi akan disimpan ke file Excel.
|
| 95 |
+
|
| 96 |
+
2. **Validasi Model – XGBoost Many-to-One:**
|
| 97 |
+
- Gunakan parameter hasil inverse untuk validasi forward modelling.
|
| 98 |
+
- Tampilkan metrik performa seperti MAE, RMSE, R², dan Prediksi GAS (MMBTU).
|
| 99 |
+
|
| 100 |
+
3. **Simulasi Prediksi Konsumsi Gas (Forward Modelling):**
|
| 101 |
+
- Input manual 6 parameter proses.
|
| 102 |
+
- Dapatkan prediksi konsumsi gas secara real-time menggunakan model forward yang sama.
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
### 3. Monitoring Model
|
| 106 |
+
Halaman ini untuk memantau performa model secara real-time.
|
| 107 |
+
|
| 108 |
+
**Cara Penggunaan:**
|
| 109 |
+
- Unggah file CSV data evaluasi yang mencakup target `GAS_MMBTU_Disaggregated`.
|
| 110 |
+
- Sistem akan memprediksi nilai gas untuk setiap produk, menghitung selisih antara prediksi dan data aktual, serta menampilkan metrik performa per produk (seperti MAE, RMSE, R²).
|
| 111 |
+
Fitur ini berguna untuk evaluasi model pada data baru.
|
| 112 |
+
|
| 113 |
+
### 4. Exploratory Data Analysis (EDA)
|
| 114 |
+
Halaman ini menyediakan analisis eksploratif data spray dryer.
|
| 115 |
+
|
| 116 |
+
**Fitur Utama:**
|
| 117 |
+
- **Ringkasan Data (Data Summary):** Tampilkan metric seperti total baris, total kolom, rentang tanggal produksi, total missing values, jumlah produk unik, jumlah data duplikat, dan ringkasan anomali.
|
| 118 |
+
- **EDA per Kategori Produk:** Gunakan tab untuk "All Data" dan setiap produk unik (misalnya: BMR BASE, CKP BASE, CKR BASE, CMR BASE, MORIGRO BASE, dll.). Di setiap tab:
|
| 119 |
+
- Distribusi Parameter Proses (boxplot/violin plot untuk 6 parameter utama).
|
| 120 |
+
- Deteksi Outlier (boxplot dengan highlight, plus jumlah outlier per parameter).
|
| 121 |
+
- Tabel Statistik Deskriptif (`df.describe()` untuk 6 parameter + GAS_MMBTU_Disaggregated).
|
| 122 |
+
- Segmen Produksi per Produk (tabel dengan Start_Time, End_Time, Duration_Minutes, Data_Points).
|
| 123 |
+
- Tombol **"Refresh EDA"** untuk update data baru.
|
| 124 |
+
|
| 125 |
+
### 5. Disagregasi Data
|
| 126 |
+
Halaman ini untuk mengubah data konsumsi gas dari skala per jam menjadi per menit menggunakan algoritma disagregasi berbasis bobot indikator proses (proportional weight splitting).
|
| 127 |
+
|
| 128 |
+
**Alur Penggunaan:**
|
| 129 |
+
- Unggah file CSV data per jam.
|
| 130 |
+
- Klik **"Jalankan Proses Disagregasi"**.
|
| 131 |
+
- Tampilkan laporan lengkap: validasi jam produksi, perhitungan bobot, validasi akurasi (selisih nol), ringkasan pipeline, analisis hasil (statistik, jam tertinggi/terendah).
|
| 132 |
+
- Unduh file hasil disagregasi (dengan kolom baru `GAS_MMBTU_Disaggregated`).
|
| 133 |
+
Panggil fungsi dari `disagregasi_data.py` untuk proses inti.
|
| 134 |
+
|
| 135 |
+
### 6. Filter Rule Engine
|
| 136 |
+
Halaman ini untuk membersihkan data hasil disagregasi menggunakan rule engine dengan 4 aturan anomali.
|
| 137 |
+
|
| 138 |
+
**Alur Penggunaan:**
|
| 139 |
+
- Unggah file CSV hasil disagregasi.
|
| 140 |
+
- Klik **"Jalankan Rule Engine"**.
|
| 141 |
+
- Tampilkan ringkasan: total baris awal/bersih, jumlah anomali dihapus, persentase data bersih.
|
| 142 |
+
- Detail anomali: tabel dengan jenis anomali, jumlah baris, persentase, contoh waktu.
|
| 143 |
+
- Unduh file data bersih dan file anomali (dengan kolom `anomaly_reason`).
|
disagregasi_data_spraydryer_terbaru_10_17_2025.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
eda_functions.py
ADDED
|
@@ -0,0 +1,1111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
import matplotlib.pyplot as plt
|
| 4 |
+
import seaborn as sns
|
| 5 |
+
from datetime import datetime, timedelta
|
| 6 |
+
import warnings
|
| 7 |
+
warnings.filterwarnings('ignore')
|
| 8 |
+
|
| 9 |
+
class SprayDryerEDAPipeline:
|
| 10 |
+
"""
|
| 11 |
+
Pipeline untuk Exploratory Data Analysis (EDA) dan preprocessing data spray dryer
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
def __init__(self, data_path=None, dataframe=None):
|
| 15 |
+
"""
|
| 16 |
+
Inisialisasi pipeline
|
| 17 |
+
|
| 18 |
+
Parameters:
|
| 19 |
+
-----------
|
| 20 |
+
data_path : str, optional
|
| 21 |
+
Path ke file data (CSV, Excel, dll)
|
| 22 |
+
dataframe : pd.DataFrame, optional
|
| 23 |
+
DataFrame yang sudah dimuat
|
| 24 |
+
"""
|
| 25 |
+
if dataframe is not None:
|
| 26 |
+
self.df_original = dataframe.copy()
|
| 27 |
+
elif data_path:
|
| 28 |
+
self.df_original = self.load_data(data_path)
|
| 29 |
+
else:
|
| 30 |
+
raise ValueError("Harus memberikan data_path atau dataframe")
|
| 31 |
+
|
| 32 |
+
self.df = self.df_original.copy()
|
| 33 |
+
self.product_dataframes = {}
|
| 34 |
+
self.setup_visualization()
|
| 35 |
+
|
| 36 |
+
def setup_visualization(self):
|
| 37 |
+
"""Setup parameter visualisasi"""
|
| 38 |
+
plt.style.use('default')
|
| 39 |
+
sns.set_palette("husl")
|
| 40 |
+
|
| 41 |
+
def load_data(self, path):
|
| 42 |
+
"""Load data dari file"""
|
| 43 |
+
if path.endswith('.csv'):
|
| 44 |
+
return pd.read_csv(path)
|
| 45 |
+
elif path.endswith(('.xlsx', '.xls')):
|
| 46 |
+
return pd.read_excel(path)
|
| 47 |
+
else:
|
| 48 |
+
raise ValueError("Format file tidak didukung")
|
| 49 |
+
|
| 50 |
+
# ============= STEP 1: PEMERIKSAAN KOLOM AWAL =============
|
| 51 |
+
def check_and_fix_columns(self):
|
| 52 |
+
"""
|
| 53 |
+
Step 1: Pemeriksaan dan perbaikan nama kolom
|
| 54 |
+
"""
|
| 55 |
+
print("="*80)
|
| 56 |
+
print("STEP 1: PEMERIKSAAN KOLOM AWAL")
|
| 57 |
+
print("="*80)
|
| 58 |
+
|
| 59 |
+
# Daftar kolom standar
|
| 60 |
+
standard_columns = [
|
| 61 |
+
'Date_time', 'Drier_On_Product', 'D101330TT', 'D102260TIC_CV',
|
| 62 |
+
'D102265TIC_PV', 'D102265TIC_CV', 'D102266TIC', 'D101264FTSCL',
|
| 63 |
+
'Product', 'GAS_MMBTU', 'fixed_rounded_time'
|
| 64 |
+
]
|
| 65 |
+
|
| 66 |
+
print(f"Kolom yang ada di dataframe: {list(self.df.columns)}")
|
| 67 |
+
print(f"\nKolom standar yang diharapkan: {standard_columns}")
|
| 68 |
+
standard_lookup = {col.lower(): col for col in standard_columns}
|
| 69 |
+
column_mapping = {}
|
| 70 |
+
unmatched_column = []
|
| 71 |
+
for actual_col in self.df.columns:
|
| 72 |
+
actual_col_lower = actual_col.lower()
|
| 73 |
+
if actual_col_lower in standard_lookup:
|
| 74 |
+
standard_name = standard_lookup[actual_col_lower]
|
| 75 |
+
if actual_col != standard_name:
|
| 76 |
+
column_mapping[actual_col] = standard_name
|
| 77 |
+
else:
|
| 78 |
+
unmatched_column.append(actual_col)
|
| 79 |
+
|
| 80 |
+
# Rename kolom
|
| 81 |
+
self.df.rename(columns=column_mapping, inplace=True)
|
| 82 |
+
|
| 83 |
+
# Hapus kolom yang tidak ada dalam daftar standar
|
| 84 |
+
cols_to_keep = [col for col in self.df.columns if col in standard_columns]
|
| 85 |
+
cols_removed = [col for col in self.df.columns if col not in standard_columns]
|
| 86 |
+
|
| 87 |
+
if cols_removed:
|
| 88 |
+
print(f"\nKolom yang dihapus: {cols_removed}")
|
| 89 |
+
|
| 90 |
+
self.df = self.df[cols_to_keep]
|
| 91 |
+
|
| 92 |
+
# Cek apakah fixed_rounded_time ada
|
| 93 |
+
if 'fixed_rounded_time' not in self.df.columns:
|
| 94 |
+
print("\nKolom 'fixed_rounded_time' tidak ditemukan. Akan dibuat nanti.")
|
| 95 |
+
|
| 96 |
+
print(f"\nKolom final: {list(self.df.columns)}")
|
| 97 |
+
print(f"Shape dataframe: {self.df.shape}")
|
| 98 |
+
|
| 99 |
+
# ============= STEP 2: VALIDASI KOLOM PRODUCT =============
|
| 100 |
+
def validate_product_names(self):
|
| 101 |
+
"""
|
| 102 |
+
Step 2: Validasi dan standardisasi nama produk secara otomatis.
|
| 103 |
+
"""
|
| 104 |
+
print("\n" + "="*80)
|
| 105 |
+
print("STEP 2: VALIDASI DAN STANDARDISASI KOLOM PRODUCT")
|
| 106 |
+
print("="*80)
|
| 107 |
+
|
| 108 |
+
# Pastikan kolom 'Product' ada
|
| 109 |
+
if 'Product' not in self.df.columns:
|
| 110 |
+
print("PERINGATAN: Kolom 'Product' tidak ditemukan. Melewati langkah ini.")
|
| 111 |
+
print("="*80 + "\n")
|
| 112 |
+
return self.df
|
| 113 |
+
|
| 114 |
+
# 1. Daftar nama produk standar (sumber kebenaran)
|
| 115 |
+
standard_products = [
|
| 116 |
+
'CKP BASE', 'CMP BASE', 'BMP BASE', 'MORIGRO BASE', 'CKH BASE',
|
| 117 |
+
'CMH BASE', 'BMH BASE', 'CKR BASE', 'CMR BASE', 'BMR BASE',
|
| 118 |
+
'CGI BASE', 'NL33 BASE POWDER', 'CKS BASE', 'CHIL SCHOOL',
|
| 119 |
+
'CHIL MIL SOYA', 'CIP', 'CIP CHAMBER'
|
| 120 |
+
]
|
| 121 |
+
|
| 122 |
+
# 2. Mapping HANYA untuk kasus-kasus khusus/salah ketik yang tidak bisa ditebak
|
| 123 |
+
# Contoh: ada kata 'BASE' ganda, atau singkatan yang tidak standar.
|
| 124 |
+
special_product_mapping = {
|
| 125 |
+
'CMR BASE BASE': 'CMR BASE',
|
| 126 |
+
'CGI 6-12 BASE' : 'CGI BASE',
|
| 127 |
+
'CMH BASE': 'CMH BASE',
|
| 128 |
+
'BMH BASE': 'BMH BASE'
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
print(f"Produk unik sebelum standardisasi: {self.df['Product'].unique()}")
|
| 132 |
+
|
| 133 |
+
# 3. Buat kamus pencocokan (lookup map) utama secara otomatis
|
| 134 |
+
# Kunci: nama produk dalam format UPPERCASE dan tanpa spasi berlebih
|
| 135 |
+
# Nilai: nama produk standar yang benar
|
| 136 |
+
|
| 137 |
+
# Mulai dengan standard products
|
| 138 |
+
product_lookup = {prod.upper().strip(): prod for prod in standard_products}
|
| 139 |
+
|
| 140 |
+
# Timpa/tambahkan dengan special mapping. Ini memastikan kasus khusus diutamakan.
|
| 141 |
+
for key, value in special_product_mapping.items():
|
| 142 |
+
product_lookup[key.upper().strip()] = value
|
| 143 |
+
|
| 144 |
+
# 4. Gunakan metode .map() dari Pandas untuk efisiensi tinggi
|
| 145 |
+
# Ini jauh lebih cepat daripada .apply() untuk data besar
|
| 146 |
+
|
| 147 |
+
# Simpan kolom produk asli untuk perbandingan
|
| 148 |
+
original_products = self.df['Product'].copy()
|
| 149 |
+
|
| 150 |
+
# Buat series baru dengan nilai yang sudah dinormalisasi (uppercase, strip)
|
| 151 |
+
normalized_products = self.df['Product'].astype(str).str.upper().str.strip()
|
| 152 |
+
|
| 153 |
+
# Gunakan .map() untuk mengganti nilai. Nilai yang tidak ada di `product_lookup` akan menjadi NaN
|
| 154 |
+
self.df['Product'] = normalized_products.map(product_lookup)
|
| 155 |
+
|
| 156 |
+
# Isi kembali nilai yang menjadi NaN dengan nilai aslinya.
|
| 157 |
+
# Ini memastikan produk yang tidak dikenali tidak akan hilang/diubah.
|
| 158 |
+
self.df['Product'].fillna(original_products, inplace=True)
|
| 159 |
+
|
| 160 |
+
print(f"\nProduk unik setelah standardisasi: {self.df['Product'].unique()}")
|
| 161 |
+
print(f"\nJumlah setiap produk:\n{self.df['Product'].value_counts()}")
|
| 162 |
+
|
| 163 |
+
# 5. (Opsional tapi sangat direkomendasikan) Laporkan produk yang tidak berhasil distandardisasi
|
| 164 |
+
final_products_set = set(self.df['Product'].unique())
|
| 165 |
+
standard_products_set = set(standard_products)
|
| 166 |
+
|
| 167 |
+
unstandardized = final_products_set - standard_products_set
|
| 168 |
+
# Hapus None atau NaN jika ada dalam hasil
|
| 169 |
+
unstandardized = {item for item in unstandardized if pd.notna(item)}
|
| 170 |
+
|
| 171 |
+
if unstandardized:
|
| 172 |
+
print("\n" + "-"*40)
|
| 173 |
+
print(f"PERINGATAN: Ditemukan {len(unstandardized)} produk yang tidak sesuai standar:")
|
| 174 |
+
for item in unstandardized:
|
| 175 |
+
print(f" - '{item}'")
|
| 176 |
+
print("Pertimbangkan untuk menambahkannya ke `standard_products` atau `special_product_mapping`.")
|
| 177 |
+
print("-"*40)
|
| 178 |
+
|
| 179 |
+
print("\n" + "="*80)
|
| 180 |
+
print("STEP 2 SELESAI")
|
| 181 |
+
print("="*80 + "\n")
|
| 182 |
+
return self.df
|
| 183 |
+
|
| 184 |
+
# ============= STEP 3: PEMISAHAN DATA PER PRODUK =============
|
| 185 |
+
def separate_data_by_product(self):
|
| 186 |
+
"""
|
| 187 |
+
Step 3: Pemisahan data berdasarkan produk
|
| 188 |
+
"""
|
| 189 |
+
print("\n" + "="*80)
|
| 190 |
+
print("STEP 3: PEMISAHAN DATA PER PRODUK")
|
| 191 |
+
print("="*80)
|
| 192 |
+
|
| 193 |
+
unique_products = self.df['Product'].unique()
|
| 194 |
+
print(f"Memisahkan data untuk {len(unique_products)} produk...")
|
| 195 |
+
|
| 196 |
+
for product in unique_products:
|
| 197 |
+
self.product_dataframes[product] = self.df[self.df['Product'] == product].copy()
|
| 198 |
+
print(f"\n{product}: {len(self.product_dataframes[product])} baris")
|
| 199 |
+
|
| 200 |
+
# Tampilkan statistik deskriptif
|
| 201 |
+
print("\n" + "-"*50)
|
| 202 |
+
print("STATISTIK DESKRIPTIF - DATA KESELURUHAN")
|
| 203 |
+
print("-"*50)
|
| 204 |
+
print(self.df.describe())
|
| 205 |
+
|
| 206 |
+
print("\n" + "-"*50)
|
| 207 |
+
print("INFO DATA KESELURUHAN")
|
| 208 |
+
print("-"*50)
|
| 209 |
+
print(self.df.info())
|
| 210 |
+
|
| 211 |
+
# Statistik per produk
|
| 212 |
+
for product, df_product in self.product_dataframes.items():
|
| 213 |
+
print("\n" + "-"*50)
|
| 214 |
+
print(f"STATISTIK DESKRIPTIF - {product}")
|
| 215 |
+
print("-"*50)
|
| 216 |
+
print(df_product.describe())
|
| 217 |
+
|
| 218 |
+
print(f"\nINFO - {product}")
|
| 219 |
+
print(df_product.info())
|
| 220 |
+
|
| 221 |
+
# ============= STEP 4: IDENTIFIKASI ANOMALI DATA =============
|
| 222 |
+
def identify_anomalies(self):
|
| 223 |
+
"""
|
| 224 |
+
Step 4: Identifikasi anomali berdasarkan aturan teknis
|
| 225 |
+
"""
|
| 226 |
+
print("\n" + "="*80)
|
| 227 |
+
print("STEP 4: IDENTIFIKASI ANOMALI DATA")
|
| 228 |
+
print("="*80)
|
| 229 |
+
|
| 230 |
+
anomaly_rules = {
|
| 231 |
+
'D101330TT': {'min': 20, 'max': 130, 'zero_anomaly': True},
|
| 232 |
+
'D102265TIC_PV': {'min': 20, 'zero_anomaly': True},
|
| 233 |
+
'D102265TIC_CV': {'zero_allowed_products': ['CIP', 'CIP CHAMBER']},
|
| 234 |
+
'D102266TIC': {'zero_anomaly': True}
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
anomalies = []
|
| 238 |
+
|
| 239 |
+
for product, df_product in self.product_dataframes.items():
|
| 240 |
+
print(f"\nMemeriksa anomali untuk produk: {product}")
|
| 241 |
+
|
| 242 |
+
for column, rules in anomaly_rules.items():
|
| 243 |
+
if column not in df_product.columns:
|
| 244 |
+
continue
|
| 245 |
+
|
| 246 |
+
# Cek nilai 0
|
| 247 |
+
if 'zero_anomaly' in rules and rules['zero_anomaly']:
|
| 248 |
+
zero_count = (df_product[column] == 0).sum()
|
| 249 |
+
if zero_count > 0:
|
| 250 |
+
anomalies.append({
|
| 251 |
+
'Product': product,
|
| 252 |
+
'Column': column,
|
| 253 |
+
'Anomaly': 'Nilai 0',
|
| 254 |
+
'Count': zero_count
|
| 255 |
+
})
|
| 256 |
+
print(f" - {column}: Ditemukan {zero_count} nilai 0 (anomali)")
|
| 257 |
+
|
| 258 |
+
# Cek nilai 0 untuk D102265TIC_CV
|
| 259 |
+
if 'zero_allowed_products' in rules:
|
| 260 |
+
if product not in rules['zero_allowed_products']:
|
| 261 |
+
zero_count = (df_product[column] == 0).sum()
|
| 262 |
+
if zero_count > 0:
|
| 263 |
+
anomalies.append({
|
| 264 |
+
'Product': product,
|
| 265 |
+
'Column': column,
|
| 266 |
+
'Anomaly': 'Nilai 0 (tidak diizinkan untuk produk ini)',
|
| 267 |
+
'Count': zero_count
|
| 268 |
+
})
|
| 269 |
+
print(f" - {column}: Ditemukan {zero_count} nilai 0 (anomali untuk produk non-CIP)")
|
| 270 |
+
|
| 271 |
+
# Cek nilai minimum
|
| 272 |
+
if 'min' in rules:
|
| 273 |
+
below_min = (df_product[column] < rules['min']).sum()
|
| 274 |
+
if below_min > 0:
|
| 275 |
+
anomalies.append({
|
| 276 |
+
'Product': product,
|
| 277 |
+
'Column': column,
|
| 278 |
+
'Anomaly': f'Nilai < {rules["min"]}',
|
| 279 |
+
'Count': below_min
|
| 280 |
+
})
|
| 281 |
+
print(f" - {column}: Ditemukan {below_min} nilai < {rules['min']}")
|
| 282 |
+
|
| 283 |
+
# Cek nilai maksimum
|
| 284 |
+
if 'max' in rules:
|
| 285 |
+
above_max = (df_product[column] > rules['max']).sum()
|
| 286 |
+
if above_max > 0:
|
| 287 |
+
anomalies.append({
|
| 288 |
+
'Product': product,
|
| 289 |
+
'Column': column,
|
| 290 |
+
'Anomaly': f'Nilai > {rules["max"]}',
|
| 291 |
+
'Count': above_max
|
| 292 |
+
})
|
| 293 |
+
print(f" - {column}: Ditemukan {above_max} nilai > {rules['max']}")
|
| 294 |
+
|
| 295 |
+
if anomalies:
|
| 296 |
+
anomaly_df = pd.DataFrame(anomalies)
|
| 297 |
+
print("\n" + "-"*50)
|
| 298 |
+
print("RINGKASAN ANOMALI")
|
| 299 |
+
print("-"*50)
|
| 300 |
+
print(anomaly_df.to_string())
|
| 301 |
+
else:
|
| 302 |
+
print("\nTidak ditemukan anomali berdasarkan aturan yang ditetapkan.")
|
| 303 |
+
|
| 304 |
+
# ============= STEP 5: VALIDASI KOLOM DRIER_ON_PRODUCT =============
|
| 305 |
+
def validate_drier_on_product(self):
|
| 306 |
+
"""
|
| 307 |
+
Step 5: Validasi kolom Drier_On_Product
|
| 308 |
+
"""
|
| 309 |
+
print("\n" + "="*80)
|
| 310 |
+
print("STEP 5: VALIDASI KOLOM DRIER_ON_PRODUCT")
|
| 311 |
+
print("="*80)
|
| 312 |
+
|
| 313 |
+
production_products = [
|
| 314 |
+
'CKP BASE', 'CMP BASE', 'BMP BASE', 'MORIGRO BASE', 'CKH BASE',
|
| 315 |
+
'CMH BASE', 'BMH BASE', 'CKR BASE', 'CMR BASE', 'BMR BASE',
|
| 316 |
+
'CGI BASE', 'NL33 BASE POWDER', 'CKS BASE', 'CHIL SCHOOL',
|
| 317 |
+
'CHIL MIL SOYA'
|
| 318 |
+
]
|
| 319 |
+
|
| 320 |
+
cip_products = ['CIP', 'CIP CHAMBER']
|
| 321 |
+
|
| 322 |
+
validation_errors = []
|
| 323 |
+
|
| 324 |
+
for product in self.df['Product'].unique():
|
| 325 |
+
df_product = self.df[self.df['Product'] == product]
|
| 326 |
+
|
| 327 |
+
if product in production_products:
|
| 328 |
+
# Harus 1
|
| 329 |
+
wrong_values = df_product[df_product['Drier_On_Product'] != 1]
|
| 330 |
+
if len(wrong_values) > 0:
|
| 331 |
+
validation_errors.append({
|
| 332 |
+
'Product': product,
|
| 333 |
+
'Expected': 1,
|
| 334 |
+
'Wrong_Count': len(wrong_values)
|
| 335 |
+
})
|
| 336 |
+
print(f"ERROR: {product} memiliki {len(wrong_values)} baris dengan Drier_On_Product != 1")
|
| 337 |
+
|
| 338 |
+
elif product in cip_products:
|
| 339 |
+
# Harus 0
|
| 340 |
+
wrong_values = df_product[df_product['Drier_On_Product'] != 0]
|
| 341 |
+
if len(wrong_values) > 0:
|
| 342 |
+
validation_errors.append({
|
| 343 |
+
'Product': product,
|
| 344 |
+
'Expected': 0,
|
| 345 |
+
'Wrong_Count': len(wrong_values)
|
| 346 |
+
})
|
| 347 |
+
print(f"ERROR: {product} memiliki {len(wrong_values)} baris dengan Drier_On_Product != 0")
|
| 348 |
+
|
| 349 |
+
if not validation_errors:
|
| 350 |
+
print("✓ Semua nilai Drier_On_Product sesuai dengan ketentuan")
|
| 351 |
+
else:
|
| 352 |
+
error_df = pd.DataFrame(validation_errors)
|
| 353 |
+
print("\nRingkasan Error Validasi:")
|
| 354 |
+
print(error_df)
|
| 355 |
+
|
| 356 |
+
# ============= STEP 6: CEK MISSING VALUES DAN DUPLIKASI =============
|
| 357 |
+
def check_missing_and_duplicates(self):
|
| 358 |
+
"""
|
| 359 |
+
Step 6: Periksa missing values dan hapus duplikasi
|
| 360 |
+
"""
|
| 361 |
+
print("\n" + "="*80)
|
| 362 |
+
print("STEP 6: CEK MISSING VALUES DAN DUPLIKASI")
|
| 363 |
+
print("="*80)
|
| 364 |
+
|
| 365 |
+
# Cek missing values
|
| 366 |
+
print("Missing Values per Kolom:")
|
| 367 |
+
missing_counts = self.df.isnull().sum()
|
| 368 |
+
print(missing_counts[missing_counts > 0] if any(missing_counts > 0) else "Tidak ada missing values")
|
| 369 |
+
|
| 370 |
+
# Cek duplikasi berdasarkan Date_time
|
| 371 |
+
duplicates = self.df[self.df.duplicated(subset=['Date_time'], keep=False)]
|
| 372 |
+
print(f"\nJumlah baris duplikat berdasarkan Date_time: {len(duplicates)}")
|
| 373 |
+
|
| 374 |
+
if len(duplicates) > 0:
|
| 375 |
+
print("Menghapus duplikasi...")
|
| 376 |
+
self.df = self.df.drop_duplicates(subset=['Date_time'], keep='first')
|
| 377 |
+
print(f"Shape setelah menghapus duplikasi: {self.df.shape}")
|
| 378 |
+
|
| 379 |
+
# Update product dataframes
|
| 380 |
+
for product in self.product_dataframes.keys():
|
| 381 |
+
self.product_dataframes[product] = self.df[self.df['Product'] == product].copy()
|
| 382 |
+
|
| 383 |
+
# ============= STEP 7: PERHITUNGAN DURASI PRODUKSI =============
|
| 384 |
+
def calculate_production_duration(self):
|
| 385 |
+
"""
|
| 386 |
+
Step 7: Hitung durasi produksi untuk setiap produk berdasarkan segmen produksi yang berkelanjutan.
|
| 387 |
+
|
| 388 |
+
Logika:
|
| 389 |
+
1. Data diurutkan berdasarkan waktu.
|
| 390 |
+
2. Sebuah "segmen" produksi diidentifikasi sebagai blok baris yang berurutan
|
| 391 |
+
di mana nama produknya sama.
|
| 392 |
+
3. Jika nama produk pada baris saat ini berbeda dari baris sebelumnya,
|
| 393 |
+
maka itu dianggap sebagai awal dari segmen baru.
|
| 394 |
+
4. Durasi dihitung untuk setiap segmen (end_time - start_time).
|
| 395 |
+
5. Total durasi untuk satu produk adalah jumlah dari semua durasi segmennya.
|
| 396 |
+
"""
|
| 397 |
+
print("\n" + "="*80)
|
| 398 |
+
print("STEP 7: PERHITUNGAN DURASI PRODUKSI (METODE SEGMENTASI)")
|
| 399 |
+
print("="*80)
|
| 400 |
+
|
| 401 |
+
# Pastikan tipe data dan urutan sudah benar
|
| 402 |
+
try:
|
| 403 |
+
self.df['Date_time'] = pd.to_datetime(self.df['Date_time'])
|
| 404 |
+
except Exception as e:
|
| 405 |
+
print(f"Error saat konversi 'Date_time': {e}")
|
| 406 |
+
return
|
| 407 |
+
|
| 408 |
+
if self.df.empty:
|
| 409 |
+
print("DataFrame kosong, tidak ada durasi untuk dihitung.")
|
| 410 |
+
return
|
| 411 |
+
|
| 412 |
+
# Urutkan dataframe berdasarkan waktu, ini krusial untuk logika segmentasi
|
| 413 |
+
df_sorted = self.df.sort_values('Date_time').copy()
|
| 414 |
+
|
| 415 |
+
# --- Logika Inti: Identifikasi Segmen Produksi ---
|
| 416 |
+
# Buat kolom 'segment_id' yang akan unik untuk setiap blok produksi yang berkelanjutan.
|
| 417 |
+
# .shift() membandingkan produk di baris saat ini dengan baris sebelumnya.
|
| 418 |
+
# .cumsum() akan mengakumulasi nilai (True=1, False=0), sehingga menciptakan ID unik untuk setiap segmen.
|
| 419 |
+
df_sorted['segment_id'] = (df_sorted['Product'] != df_sorted['Product'].shift()).cumsum()
|
| 420 |
+
|
| 421 |
+
# Kelompokkan berdasarkan Produk dan ID Segmen untuk mendapatkan start dan end time setiap segmen
|
| 422 |
+
production_segments = df_sorted.groupby(['Product', 'segment_id']).agg(
|
| 423 |
+
Start_Time=('Date_time', 'min'),
|
| 424 |
+
End_Time=('Date_time', 'max'),
|
| 425 |
+
Data_Points=('Date_time', 'count')
|
| 426 |
+
).reset_index()
|
| 427 |
+
|
| 428 |
+
# Hitung durasi untuk setiap segmen
|
| 429 |
+
production_segments['Duration'] = production_segments['End_Time'] - production_segments['Start_Time']
|
| 430 |
+
|
| 431 |
+
# Filter hanya untuk produk produksi (bukan CIP)
|
| 432 |
+
production_segments_filtered = production_segments[
|
| 433 |
+
~production_segments['Product'].isin(['CIP', 'CIP CHAMBER'])
|
| 434 |
+
].copy()
|
| 435 |
+
|
| 436 |
+
if production_segments_filtered.empty:
|
| 437 |
+
print("Tidak ada data produksi (non-CIP) untuk dihitung durasinya.")
|
| 438 |
+
return
|
| 439 |
+
|
| 440 |
+
# Hitung total durasi dengan menjumlahkan durasi dari semua segmen per produk
|
| 441 |
+
total_durations = production_segments_filtered.groupby('Product')['Duration'].sum().reset_index()
|
| 442 |
+
|
| 443 |
+
# Konversi total durasi ke jam
|
| 444 |
+
total_durations['Total_Duration_Hours'] = round(total_durations['Duration'].dt.total_seconds() / 3600, 2)
|
| 445 |
+
|
| 446 |
+
# Gabungkan dengan jumlah data points
|
| 447 |
+
total_data_points = production_segments_filtered.groupby('Product')['Data_Points'].sum().reset_index()
|
| 448 |
+
summary_df = pd.merge(total_durations, total_data_points, on='Product')
|
| 449 |
+
|
| 450 |
+
print("--- RINGKASAN TOTAL DURASI PRODUKSI PER PRODUK ---")
|
| 451 |
+
print(summary_df[['Product', 'Total_Duration_Hours', 'Data_Points']].to_string(index=False))
|
| 452 |
+
|
| 453 |
+
print("\n" + "-"*80)
|
| 454 |
+
print("--- DETAIL SEGMEN PRODUKSI ---")
|
| 455 |
+
# Tampilkan detail setiap segmen untuk setiap produk
|
| 456 |
+
for product in summary_df['Product'].unique():
|
| 457 |
+
print(f"\nProduk: {product}")
|
| 458 |
+
product_segment_details = production_segments_filtered[production_segments_filtered['Product'] == product].copy()
|
| 459 |
+
|
| 460 |
+
# Konversi durasi segmen ke menit untuk keterbacaan
|
| 461 |
+
product_segment_details['Duration_Minutes'] = round(product_segment_details['Duration'].dt.total_seconds() / 60, 2)
|
| 462 |
+
|
| 463 |
+
print(product_segment_details[[
|
| 464 |
+
'Start_Time',
|
| 465 |
+
'End_Time',
|
| 466 |
+
'Duration_Minutes',
|
| 467 |
+
'Data_Points'
|
| 468 |
+
]].to_string(index=False))
|
| 469 |
+
|
| 470 |
+
# ============= STEP 8: PEMBUATAN KOLOM FIXED_ROUNDED_TIME =============
|
| 471 |
+
def create_fixed_rounded_time(self):
|
| 472 |
+
"""
|
| 473 |
+
Step 8: Buat kolom fixed_rounded_time jika belum ada
|
| 474 |
+
"""
|
| 475 |
+
print("\n" + "="*80)
|
| 476 |
+
print("STEP 8: PEMBUATAN KOLOM FIXED_ROUNDED_TIME")
|
| 477 |
+
print("="*80)
|
| 478 |
+
|
| 479 |
+
if 'fixed_rounded_time' not in self.df.columns:
|
| 480 |
+
print("Membuat kolom fixed_rounded_time...")
|
| 481 |
+
self.df['Date_time'] = pd.to_datetime(self.df['Date_time'])
|
| 482 |
+
self.df['fixed_rounded_time'] = (self.df['Date_time'] + pd.Timedelta(hours=1)).dt.floor('H')
|
| 483 |
+
|
| 484 |
+
print("Sample hasil:")
|
| 485 |
+
print(self.df[['Date_time', 'fixed_rounded_time']].head(10))
|
| 486 |
+
|
| 487 |
+
# Update product dataframes
|
| 488 |
+
for product in self.product_dataframes.keys():
|
| 489 |
+
self.product_dataframes[product] = self.df[self.df['Product'] == product].copy()
|
| 490 |
+
else:
|
| 491 |
+
print("Kolom fixed_rounded_time sudah ada")
|
| 492 |
+
|
| 493 |
+
# ============= STEP 9: PERHITUNGAN JUMLAH MENIT =============
|
| 494 |
+
def calculate_minutes_per_hour(self):
|
| 495 |
+
"""
|
| 496 |
+
Step 9: Hitung jumlah data per jam berdasarkan fixed_rounded_time
|
| 497 |
+
"""
|
| 498 |
+
print("\n" + "="*80)
|
| 499 |
+
print("STEP 9: PERHITUNGAN JUMLAH DATA PER JAM")
|
| 500 |
+
print("="*80)
|
| 501 |
+
|
| 502 |
+
if 'fixed_rounded_time' not in self.df.columns:
|
| 503 |
+
print("ERROR: Kolom fixed_rounded_time tidak ditemukan!")
|
| 504 |
+
return
|
| 505 |
+
|
| 506 |
+
# Hitung jumlah data per jam
|
| 507 |
+
jumlah_data_per_jam = self.df.groupby('fixed_rounded_time').size()
|
| 508 |
+
jumlah_data_per_jam_df = jumlah_data_per_jam.reset_index(name='Jumlah Data Per Jam')
|
| 509 |
+
|
| 510 |
+
# Filter data dengan jumlah kurang dari 60
|
| 511 |
+
jumlah_data_kurang_60 = jumlah_data_per_jam_df[jumlah_data_per_jam_df['Jumlah Data Per Jam'] <= 60]
|
| 512 |
+
|
| 513 |
+
# Urutkan dari jumlah terkecil ke terbesar
|
| 514 |
+
jumlah_data_kurang_60 = jumlah_data_kurang_60.sort_values(by='Jumlah Data Per Jam', ascending=True)
|
| 515 |
+
|
| 516 |
+
print(f"Jam dengan data < 60 menit ({len(jumlah_data_kurang_60)} jam):")
|
| 517 |
+
pd.set_option("display.max_rows", None)
|
| 518 |
+
print(jumlah_data_kurang_60.to_string())
|
| 519 |
+
pd.set_option("display.max_rows", 10)
|
| 520 |
+
|
| 521 |
+
# ============= STEP 10: VISUALISASI DATA =============
|
| 522 |
+
def create_line_plots(self, show_all_products=True, show_overall=True):
|
| 523 |
+
"""
|
| 524 |
+
Step 10: Buat line plot untuk visualisasi data
|
| 525 |
+
"""
|
| 526 |
+
print("\n" + "="*80)
|
| 527 |
+
print("STEP 10: VISUALISASI DATA (LINE PLOTS)")
|
| 528 |
+
print("="*80)
|
| 529 |
+
|
| 530 |
+
numeric_columns = self.df.select_dtypes(include=[np.number]).columns
|
| 531 |
+
numeric_columns = [col for col in numeric_columns if col not in ['Drier_On_Product']]
|
| 532 |
+
|
| 533 |
+
# Plot untuk keseluruhan data
|
| 534 |
+
if show_overall:
|
| 535 |
+
print("\nMembuat plot untuk keseluruhan produk...")
|
| 536 |
+
df_plot = self.df[self.df['Drier_On_Product'] == 1].copy()
|
| 537 |
+
|
| 538 |
+
if len(df_plot) > 0:
|
| 539 |
+
df_plot = df_plot.sort_values('Date_time')
|
| 540 |
+
|
| 541 |
+
for column in numeric_columns:
|
| 542 |
+
if column in df_plot.columns:
|
| 543 |
+
plt.figure(figsize=(30, 5))
|
| 544 |
+
plt.plot(df_plot['Date_time'], df_plot[column], marker='o', markersize=2, label=column)
|
| 545 |
+
plt.title(f'Line Plot of {column} Over Time - All Products', fontsize=14)
|
| 546 |
+
plt.xlabel('Date_time')
|
| 547 |
+
plt.ylabel(column)
|
| 548 |
+
plt.xticks(rotation=45)
|
| 549 |
+
plt.legend()
|
| 550 |
+
plt.grid(True, alpha=0.3)
|
| 551 |
+
plt.tight_layout()
|
| 552 |
+
plt.show()
|
| 553 |
+
|
| 554 |
+
# Plot untuk setiap produk
|
| 555 |
+
if show_all_products:
|
| 556 |
+
for product, df_product in self.product_dataframes.items():
|
| 557 |
+
if product not in ['CIP', 'CIP CHAMBER']:
|
| 558 |
+
print(f"\nMembuat plot untuk produk: {product}")
|
| 559 |
+
df_plot = df_product[df_product['Drier_On_Product'] == 1].copy()
|
| 560 |
+
|
| 561 |
+
if len(df_plot) > 0:
|
| 562 |
+
df_plot = df_plot.sort_values('Date_time')
|
| 563 |
+
|
| 564 |
+
for column in numeric_columns:
|
| 565 |
+
if column in df_plot.columns:
|
| 566 |
+
plt.figure(figsize=(20, 4))
|
| 567 |
+
plt.plot(df_plot['Date_time'], df_plot[column], marker='o', markersize=3, label=column)
|
| 568 |
+
plt.title(f'{product} - {column} Over Time', fontsize=12)
|
| 569 |
+
plt.xlabel('Date_time')
|
| 570 |
+
plt.ylabel(column)
|
| 571 |
+
plt.xticks(rotation=45)
|
| 572 |
+
plt.legend()
|
| 573 |
+
plt.grid(True, alpha=0.3)
|
| 574 |
+
plt.tight_layout()
|
| 575 |
+
plt.show()
|
| 576 |
+
|
| 577 |
+
# ============= STEP 11: IDENTIFIKASI OUTLIERS =============
|
| 578 |
+
def identify_outliers(self, show_plots=True):
|
| 579 |
+
"""
|
| 580 |
+
Step 11: Identifikasi outliers menggunakan metode IQR
|
| 581 |
+
"""
|
| 582 |
+
print("\n" + "="*80)
|
| 583 |
+
print("STEP 11: IDENTIFIKASI OUTLIERS")
|
| 584 |
+
print("="*80)
|
| 585 |
+
|
| 586 |
+
def analyze_outliers(dataframe, product_name="Overall"):
|
| 587 |
+
"""Analisis outliers untuk dataframe tertentu"""
|
| 588 |
+
|
| 589 |
+
df_copy = dataframe.copy()
|
| 590 |
+
df_copy['Date_time'] = pd.to_datetime(df_copy['Date_time'])
|
| 591 |
+
drier_on_data = df_copy[df_copy['Drier_On_Product'] == 1].copy()
|
| 592 |
+
|
| 593 |
+
if drier_on_data.empty:
|
| 594 |
+
print(f"Tidak ada data dengan Drier_On_Product == 1 untuk {product_name}")
|
| 595 |
+
return None, None
|
| 596 |
+
|
| 597 |
+
print(f"\n{'='*60}")
|
| 598 |
+
print(f"Analisis Outliers - {product_name}")
|
| 599 |
+
print(f"Total data yang dianalisis: {len(drier_on_data)} baris")
|
| 600 |
+
print(f"{'='*60}")
|
| 601 |
+
|
| 602 |
+
all_stats_data = []
|
| 603 |
+
list_of_outliers = []
|
| 604 |
+
|
| 605 |
+
numeric_columns = drier_on_data.select_dtypes(include=np.number).columns.drop('Drier_On_Product', errors='ignore')
|
| 606 |
+
|
| 607 |
+
for column in numeric_columns:
|
| 608 |
+
if column in drier_on_data.columns:
|
| 609 |
+
param_data = drier_on_data[column].dropna()
|
| 610 |
+
|
| 611 |
+
if len(param_data) > 0:
|
| 612 |
+
Q1 = param_data.quantile(0.25)
|
| 613 |
+
Q3 = param_data.quantile(0.75)
|
| 614 |
+
IQR = Q3 - Q1
|
| 615 |
+
lower_bound = Q1 - 1.5 * IQR
|
| 616 |
+
upper_bound = Q3 + 1.5 * IQR
|
| 617 |
+
|
| 618 |
+
outliers = param_data[(param_data < lower_bound) | (param_data > upper_bound)]
|
| 619 |
+
has_outliers = not outliers.empty
|
| 620 |
+
|
| 621 |
+
mean_val = param_data.mean()
|
| 622 |
+
median_val = param_data.median()
|
| 623 |
+
std_val = param_data.std()
|
| 624 |
+
chosen_val = median_val if has_outliers else mean_val
|
| 625 |
+
|
| 626 |
+
all_stats_data.append({
|
| 627 |
+
'Parameter': column,
|
| 628 |
+
'Mean': mean_val,
|
| 629 |
+
'Median': median_val,
|
| 630 |
+
'Std_Dev': std_val,
|
| 631 |
+
'Batas_Bawah': lower_bound,
|
| 632 |
+
'Batas_Atas': upper_bound,
|
| 633 |
+
'Has_Outliers': has_outliers,
|
| 634 |
+
'Outliers_Count': len(outliers),
|
| 635 |
+
'Chosen_Value': chosen_val
|
| 636 |
+
})
|
| 637 |
+
|
| 638 |
+
if has_outliers:
|
| 639 |
+
outlier_mask = (drier_on_data[column] < lower_bound) | (drier_on_data[column] > upper_bound)
|
| 640 |
+
outlier_rows = drier_on_data[outlier_mask]
|
| 641 |
+
|
| 642 |
+
for index, row in outlier_rows.iterrows():
|
| 643 |
+
list_of_outliers.append({
|
| 644 |
+
'Tanggal dan Jam': row['Date_time'],
|
| 645 |
+
'Kolom Outliers': column,
|
| 646 |
+
'Nilai Outliers': row[column],
|
| 647 |
+
'Produk': row['Product'] if 'Product' in row else product_name
|
| 648 |
+
})
|
| 649 |
+
|
| 650 |
+
# Visualisasi jika diminta
|
| 651 |
+
if show_plots and has_outliers:
|
| 652 |
+
fig, axes = plt.subplots(1, 2, figsize=(18, 5))
|
| 653 |
+
|
| 654 |
+
# Histogram
|
| 655 |
+
axes[0].hist(param_data, bins=30, edgecolor='black', alpha=0.7)
|
| 656 |
+
axes[0].axvline(mean_val, color='green', linestyle='--', label=f'Mean: {mean_val:.2f}')
|
| 657 |
+
axes[0].axvline(median_val, color='red', linestyle='--', label=f'Median: {median_val:.2f}')
|
| 658 |
+
axes[0].set_title(f'Distribution - {column}')
|
| 659 |
+
axes[0].legend()
|
| 660 |
+
|
| 661 |
+
# Time series with outliers
|
| 662 |
+
axes[1].plot(range(len(param_data)), param_data.values, 'b-', alpha=0.5)
|
| 663 |
+
axes[1].axhline(upper_bound, color='purple', linestyle='--', label=f'Upper: {upper_bound:.2f}')
|
| 664 |
+
axes[1].axhline(lower_bound, color='orange', linestyle='--', label=f'Lower: {lower_bound:.2f}')
|
| 665 |
+
|
| 666 |
+
if has_outliers:
|
| 667 |
+
outlier_indices = []
|
| 668 |
+
outlier_values = []
|
| 669 |
+
for i, (idx, val) in enumerate(param_data.items()):
|
| 670 |
+
if val < lower_bound or val > upper_bound:
|
| 671 |
+
outlier_indices.append(i)
|
| 672 |
+
outlier_values.append(val)
|
| 673 |
+
axes[1].scatter(outlier_indices, outlier_values, color='red', s=50, zorder=5, label='Outliers')
|
| 674 |
+
|
| 675 |
+
axes[1].set_title(f'Time Series - {column}')
|
| 676 |
+
axes[1].legend()
|
| 677 |
+
|
| 678 |
+
plt.suptitle(f'{product_name}: {column}', fontsize=14)
|
| 679 |
+
plt.tight_layout()
|
| 680 |
+
plt.show()
|
| 681 |
+
|
| 682 |
+
result_df = pd.DataFrame(all_stats_data) if all_stats_data else None
|
| 683 |
+
outliers_df = pd.DataFrame(list_of_outliers) if list_of_outliers else None
|
| 684 |
+
|
| 685 |
+
if outliers_df is not None and not outliers_df.empty:
|
| 686 |
+
outliers_df = outliers_df.sort_values(by='Tanggal dan Jam').reset_index(drop=True)
|
| 687 |
+
|
| 688 |
+
return result_df, outliers_df
|
| 689 |
+
|
| 690 |
+
# Analisis untuk keseluruhan data
|
| 691 |
+
print("\n" + "="*70)
|
| 692 |
+
print("ANALISIS OUTLIERS - KESELURUHAN DATA")
|
| 693 |
+
print("="*70)
|
| 694 |
+
overall_stats, overall_outliers = analyze_outliers(self.df, "OVERALL")
|
| 695 |
+
|
| 696 |
+
if overall_stats is not None:
|
| 697 |
+
print("\nRingkasan Statistik - Keseluruhan:")
|
| 698 |
+
print(overall_stats.to_string())
|
| 699 |
+
|
| 700 |
+
if overall_outliers is not None and not overall_outliers.empty:
|
| 701 |
+
print(f"\nTotal Outliers Keseluruhan: {len(overall_outliers)}")
|
| 702 |
+
print("\nSample Outliers (10 pertama):")
|
| 703 |
+
print(overall_outliers.head(10).to_string())
|
| 704 |
+
|
| 705 |
+
# Analisis untuk setiap produk
|
| 706 |
+
for product, df_product in self.product_dataframes.items():
|
| 707 |
+
if product not in ['CIP', 'CIP CHAMBER']:
|
| 708 |
+
stats, outliers = analyze_outliers(df_product, product)
|
| 709 |
+
|
| 710 |
+
if stats is not None:
|
| 711 |
+
print(f"\n{'='*50}")
|
| 712 |
+
print(f"Ringkasan Statistik - {product}:")
|
| 713 |
+
print(stats.to_string())
|
| 714 |
+
|
| 715 |
+
if outliers is not None and not outliers.empty:
|
| 716 |
+
print(f"\nTotal Outliers {product}: {len(outliers)}")
|
| 717 |
+
|
| 718 |
+
# ============= MAIN PIPELINE EXECUTION =============
|
| 719 |
+
def run_full_pipeline(self, show_visualizations=True):
|
| 720 |
+
"""
|
| 721 |
+
Menjalankan seluruh pipeline EDA
|
| 722 |
+
"""
|
| 723 |
+
print("\n" + "="*80)
|
| 724 |
+
print(" " * 20 + "SPRAY DRYER EDA PIPELINE")
|
| 725 |
+
print(" " * 25 + "STARTING ANALYSIS")
|
| 726 |
+
print("="*80)
|
| 727 |
+
|
| 728 |
+
try:
|
| 729 |
+
# Step 1: Pemeriksaan kolom
|
| 730 |
+
self.check_and_fix_columns()
|
| 731 |
+
|
| 732 |
+
# Step 2: Validasi nama produk
|
| 733 |
+
self.validate_product_names()
|
| 734 |
+
|
| 735 |
+
# Step 3: Pemisahan data per produk
|
| 736 |
+
self.separate_data_by_product()
|
| 737 |
+
|
| 738 |
+
# Step 4: Identifikasi anomali
|
| 739 |
+
self.identify_anomalies()
|
| 740 |
+
|
| 741 |
+
# Step 5: Validasi Drier_On_Product
|
| 742 |
+
self.validate_drier_on_product()
|
| 743 |
+
|
| 744 |
+
# Step 6: Cek missing values dan duplikasi
|
| 745 |
+
self.check_missing_and_duplicates()
|
| 746 |
+
|
| 747 |
+
# Step 7: Hitung durasi produksi
|
| 748 |
+
self.calculate_production_duration()
|
| 749 |
+
|
| 750 |
+
# Step 8: Buat kolom fixed_rounded_time
|
| 751 |
+
self.create_fixed_rounded_time()
|
| 752 |
+
|
| 753 |
+
# Step 9: Hitung jumlah menit per jam
|
| 754 |
+
self.calculate_minutes_per_hour()
|
| 755 |
+
|
| 756 |
+
# Step 10: Visualisasi (optional)
|
| 757 |
+
if show_visualizations:
|
| 758 |
+
self.create_line_plots(show_all_products=False, show_overall=True)
|
| 759 |
+
|
| 760 |
+
# Step 11: Identifikasi outliers
|
| 761 |
+
self.identify_outliers(show_plots=show_visualizations)
|
| 762 |
+
|
| 763 |
+
print("\n" + "="*80)
|
| 764 |
+
print(" " * 25 + "PIPELINE COMPLETED SUCCESSFULLY")
|
| 765 |
+
print("="*80)
|
| 766 |
+
|
| 767 |
+
return self.df, self.product_dataframes
|
| 768 |
+
|
| 769 |
+
except Exception as e:
|
| 770 |
+
print(f"\nERROR dalam pipeline: {str(e)}")
|
| 771 |
+
raise
|
| 772 |
+
|
| 773 |
+
def get_summary(self):
|
| 774 |
+
"""
|
| 775 |
+
Mendapatkan ringkasan hasil analisis
|
| 776 |
+
"""
|
| 777 |
+
summary = {
|
| 778 |
+
'total_rows': len(self.df),
|
| 779 |
+
'total_columns': len(self.df.columns),
|
| 780 |
+
'unique_products': self.df['Product'].nunique(),
|
| 781 |
+
'date_range': {
|
| 782 |
+
'start': self.df['Date_time'].min(),
|
| 783 |
+
'end': self.df['Date_time'].max()
|
| 784 |
+
},
|
| 785 |
+
'missing_values': self.df.isnull().sum().to_dict(),
|
| 786 |
+
'product_counts': self.df['Product'].value_counts().to_dict()
|
| 787 |
+
}
|
| 788 |
+
|
| 789 |
+
print("\n" + "="*50)
|
| 790 |
+
print("📊 DATA SUMMARY")
|
| 791 |
+
print("="*50)
|
| 792 |
+
print(f"{'Total rows':20}: {summary['total_rows']:,}")
|
| 793 |
+
print(f"{'Total columns':20}: {summary['total_columns']}")
|
| 794 |
+
print(f"{'Unique products':20}: {summary['unique_products']}")
|
| 795 |
+
print(f"{'Date range':20}: {summary['date_range']['start']} → {summary['date_range']['end']}")
|
| 796 |
+
|
| 797 |
+
print("\n🔍 Missing values per column")
|
| 798 |
+
print("-"*50)
|
| 799 |
+
for col, val in summary['missing_values'].items():
|
| 800 |
+
print(f"{col:25} : {val}")
|
| 801 |
+
|
| 802 |
+
print("\n📦 Product counts")
|
| 803 |
+
print("-"*50)
|
| 804 |
+
for prod, count in summary['product_counts'].items():
|
| 805 |
+
print(f"{prod:25} : {count:,}")
|
| 806 |
+
|
| 807 |
+
return summary
|
| 808 |
+
|
| 809 |
+
# ======================================================================
|
| 810 |
+
# HELPER FUNCTIONS UNTUK DASHBOARD STREAMLIT (EDA)
|
| 811 |
+
# ======================================================================
|
| 812 |
+
|
| 813 |
+
def compute_eda_summary(df: pd.DataFrame,
|
| 814 |
+
date_col: str = "Date_time",
|
| 815 |
+
product_col: str = "Product") -> dict:
|
| 816 |
+
"""Ringkasan umum dataset untuk metric/cards di dashboard."""
|
| 817 |
+
df = df.copy()
|
| 818 |
+
|
| 819 |
+
# Tanggal
|
| 820 |
+
if date_col in df.columns:
|
| 821 |
+
df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
|
| 822 |
+
date_min = df[date_col].min()
|
| 823 |
+
date_max = df[date_col].max()
|
| 824 |
+
else:
|
| 825 |
+
date_min, date_max = pd.NaT, pd.NaT
|
| 826 |
+
|
| 827 |
+
total_rows = len(df)
|
| 828 |
+
total_columns = df.shape[1]
|
| 829 |
+
total_missing = int(df.isna().sum().sum())
|
| 830 |
+
duplicate_rows = int(df.duplicated().sum())
|
| 831 |
+
|
| 832 |
+
if product_col in df.columns:
|
| 833 |
+
product_counts = df[product_col].value_counts().to_dict()
|
| 834 |
+
unique_products = int(df[product_col].nunique())
|
| 835 |
+
else:
|
| 836 |
+
product_counts = {}
|
| 837 |
+
unique_products = 0
|
| 838 |
+
|
| 839 |
+
summary = {
|
| 840 |
+
"total_rows": total_rows,
|
| 841 |
+
"total_columns": total_columns,
|
| 842 |
+
"date_min": date_min,
|
| 843 |
+
"date_max": date_max,
|
| 844 |
+
"total_missing": total_missing,
|
| 845 |
+
"duplicate_rows": duplicate_rows,
|
| 846 |
+
"unique_products": unique_products,
|
| 847 |
+
"product_counts": product_counts,
|
| 848 |
+
}
|
| 849 |
+
return summary
|
| 850 |
+
|
| 851 |
+
|
| 852 |
+
def compute_anomaly_table(df: pd.DataFrame,
|
| 853 |
+
product_col: str = "Product") -> pd.DataFrame:
|
| 854 |
+
"""
|
| 855 |
+
Hitung anomali berbasis rule teknis (error validasi),
|
| 856 |
+
dengan struktur kolom: Product, Column, Anomaly, Count.
|
| 857 |
+
"""
|
| 858 |
+
if product_col not in df.columns:
|
| 859 |
+
return pd.DataFrame(columns=["Product", "Column", "Anomaly", "Count"])
|
| 860 |
+
|
| 861 |
+
anomaly_rules = {
|
| 862 |
+
'D101330TT': {'min': 20, 'max': 130, 'zero_anomaly': True},
|
| 863 |
+
'D102265TIC_PV': {'min': 20, 'zero_anomaly': True},
|
| 864 |
+
'D102265TIC_CV': {'zero_allowed_products': ['CIP', 'CIP CHAMBER']},
|
| 865 |
+
'D102266TIC': {'zero_anomaly': True}
|
| 866 |
+
}
|
| 867 |
+
|
| 868 |
+
anomalies = []
|
| 869 |
+
|
| 870 |
+
for product, df_product in df.groupby(product_col):
|
| 871 |
+
for column, rules in anomaly_rules.items():
|
| 872 |
+
if column not in df_product.columns:
|
| 873 |
+
continue
|
| 874 |
+
|
| 875 |
+
# Nilai 0 yang dianggap anomali
|
| 876 |
+
if rules.get("zero_anomaly", False):
|
| 877 |
+
zero_count = (df_product[column] == 0).sum()
|
| 878 |
+
if zero_count > 0:
|
| 879 |
+
anomalies.append({
|
| 880 |
+
"Product": product,
|
| 881 |
+
"Column": column,
|
| 882 |
+
"Anomaly": "Nilai 0",
|
| 883 |
+
"Count": int(zero_count),
|
| 884 |
+
})
|
| 885 |
+
|
| 886 |
+
# Nilai 0 untuk kolom yang hanya boleh 0 di produk tertentu
|
| 887 |
+
if "zero_allowed_products" in rules:
|
| 888 |
+
if product not in rules["zero_allowed_products"]:
|
| 889 |
+
zero_count = (df_product[column] == 0).sum()
|
| 890 |
+
if zero_count > 0:
|
| 891 |
+
anomalies.append({
|
| 892 |
+
"Product": product,
|
| 893 |
+
"Column": column,
|
| 894 |
+
"Anomaly": "Nilai 0 (tidak diizinkan untuk produk ini)",
|
| 895 |
+
"Count": int(zero_count),
|
| 896 |
+
})
|
| 897 |
+
|
| 898 |
+
# Nilai < min
|
| 899 |
+
if "min" in rules:
|
| 900 |
+
below_min = (df_product[column] < rules["min"]).sum()
|
| 901 |
+
if below_min > 0:
|
| 902 |
+
anomalies.append({
|
| 903 |
+
"Product": product,
|
| 904 |
+
"Column": column,
|
| 905 |
+
"Anomaly": f"Nilai < {rules['min']}",
|
| 906 |
+
"Count": int(below_min),
|
| 907 |
+
})
|
| 908 |
+
|
| 909 |
+
# Nilai > max
|
| 910 |
+
if "max" in rules:
|
| 911 |
+
above_max = (df_product[column] > rules["max"]).sum()
|
| 912 |
+
if above_max > 0:
|
| 913 |
+
anomalies.append({
|
| 914 |
+
"Product": product,
|
| 915 |
+
"Column": column,
|
| 916 |
+
"Anomaly": f"Nilai > {rules['max']}",
|
| 917 |
+
"Count": int(above_max),
|
| 918 |
+
})
|
| 919 |
+
|
| 920 |
+
if not anomalies:
|
| 921 |
+
return pd.DataFrame(columns=["Product", "Column", "Anomaly", "Count"])
|
| 922 |
+
|
| 923 |
+
anomaly_df = pd.DataFrame(anomalies)
|
| 924 |
+
anomaly_df = anomaly_df.groupby(
|
| 925 |
+
["Product", "Column", "Anomaly"], as_index=False
|
| 926 |
+
)["Count"].sum()
|
| 927 |
+
|
| 928 |
+
return anomaly_df
|
| 929 |
+
|
| 930 |
+
|
| 931 |
+
def compute_production_segments(df: pd.DataFrame,
|
| 932 |
+
product_col: str = "Product",
|
| 933 |
+
time_col: str = "Date_time") -> pd.DataFrame:
|
| 934 |
+
"""
|
| 935 |
+
Hitung segmen produksi kontinu per produk.
|
| 936 |
+
|
| 937 |
+
Output kolom:
|
| 938 |
+
Product | Start_Time | End_Time | Duration_Minutes | Data_Points
|
| 939 |
+
"""
|
| 940 |
+
if product_col not in df.columns or time_col not in df.columns:
|
| 941 |
+
return pd.DataFrame(columns=["Product", "Start_Time", "End_Time", "Duration_Minutes", "Data_Points"])
|
| 942 |
+
|
| 943 |
+
df_seg = df[[product_col, time_col]].copy()
|
| 944 |
+
df_seg[time_col] = pd.to_datetime(df_seg[time_col], errors="coerce")
|
| 945 |
+
df_seg = df_seg.dropna(subset=[time_col]).sort_values(time_col)
|
| 946 |
+
|
| 947 |
+
# Segment id: berubah setiap kali Product berubah
|
| 948 |
+
df_seg["segment_id"] = (df_seg[product_col] != df_seg[product_col].shift()).cumsum()
|
| 949 |
+
|
| 950 |
+
grouped = df_seg.groupby([product_col, "segment_id"]).agg(
|
| 951 |
+
Start_Time=(time_col, "min"),
|
| 952 |
+
End_Time=(time_col, "max"),
|
| 953 |
+
Data_Points=(time_col, "count"),
|
| 954 |
+
).reset_index()
|
| 955 |
+
|
| 956 |
+
grouped["Duration"] = grouped["End_Time"] - grouped["Start_Time"]
|
| 957 |
+
# Filter non-CIP jika perlu (optional)
|
| 958 |
+
grouped = grouped[~grouped[product_col].isin(["CIP", "CIP CHAMBER"])]
|
| 959 |
+
|
| 960 |
+
if grouped.empty:
|
| 961 |
+
return pd.DataFrame(columns=["Product", "Start_Time", "End_Time", "Duration_Minutes", "Data_Points"])
|
| 962 |
+
|
| 963 |
+
grouped["Duration_Minutes"] = grouped["Duration"].dt.total_seconds() / 60.0
|
| 964 |
+
|
| 965 |
+
result = grouped[[product_col, "Start_Time", "End_Time", "Duration_Minutes", "Data_Points"]].copy()
|
| 966 |
+
result.rename(columns={product_col: "Product"}, inplace=True)
|
| 967 |
+
|
| 968 |
+
return result
|
| 969 |
+
|
| 970 |
+
|
| 971 |
+
def create_line_plots(df: pd.DataFrame,
|
| 972 |
+
params: list,
|
| 973 |
+
product_label: str = "All Data",
|
| 974 |
+
time_col: str = "Date_time"):
|
| 975 |
+
"""
|
| 976 |
+
Membuat 6 plot distribusi parameter proses (2x3 grid) vs waktu.
|
| 977 |
+
Mengembalikan satu Figure matplotlib untuk ditampilkan di Streamlit.
|
| 978 |
+
"""
|
| 979 |
+
df_plot = df.copy()
|
| 980 |
+
if time_col in df_plot.columns:
|
| 981 |
+
df_plot[time_col] = pd.to_datetime(df_plot[time_col], errors="coerce")
|
| 982 |
+
df_plot = df_plot.dropna(subset=[time_col]).sort_values(time_col)
|
| 983 |
+
|
| 984 |
+
# Siapkan figure 2x3
|
| 985 |
+
fig, axes = plt.subplots(2, 3, figsize=(18, 8), sharex=True)
|
| 986 |
+
axes = axes.flatten()
|
| 987 |
+
|
| 988 |
+
for i, param in enumerate(params):
|
| 989 |
+
ax = axes[i]
|
| 990 |
+
if param in df_plot.columns:
|
| 991 |
+
ax.plot(df_plot[time_col], df_plot[param], marker=".", linewidth=0.7)
|
| 992 |
+
ax.set_title(param)
|
| 993 |
+
ax.grid(True, alpha=0.3)
|
| 994 |
+
else:
|
| 995 |
+
ax.set_title(f"{param} (not found)")
|
| 996 |
+
ax.axis("off")
|
| 997 |
+
|
| 998 |
+
# Jika params < 6, matikan axis kosong
|
| 999 |
+
for j in range(len(params), 6):
|
| 1000 |
+
axes[j].axis("off")
|
| 1001 |
+
|
| 1002 |
+
fig.suptitle(f"Distribusi Parameter Proses – {product_label}", fontsize=14)
|
| 1003 |
+
fig.tight_layout(rect=[0, 0.03, 1, 0.95])
|
| 1004 |
+
return fig
|
| 1005 |
+
|
| 1006 |
+
|
| 1007 |
+
def identify_outliers(df: pd.DataFrame,
|
| 1008 |
+
params: list,
|
| 1009 |
+
product_label: str = "All Data",
|
| 1010 |
+
time_col: str = "Date_time"):
|
| 1011 |
+
"""
|
| 1012 |
+
Deteksi outlier dengan metode IQR untuk setiap parameter dalam `params`.
|
| 1013 |
+
Mengembalikan:
|
| 1014 |
+
- fig_out : Figure 2x3 dengan plot time series + highlight outlier
|
| 1015 |
+
- total_outliers: total jumlah outlier semua parameter
|
| 1016 |
+
- outlier_stats_df: tabel ringkasan per parameter
|
| 1017 |
+
"""
|
| 1018 |
+
df_proc = df.copy()
|
| 1019 |
+
if time_col in df_proc.columns:
|
| 1020 |
+
df_proc[time_col] = pd.to_datetime(df_proc[time_col], errors="coerce")
|
| 1021 |
+
df_proc = df_proc.dropna(subset=[time_col]).sort_values(time_col)
|
| 1022 |
+
|
| 1023 |
+
stats_rows = []
|
| 1024 |
+
total_outliers = 0
|
| 1025 |
+
|
| 1026 |
+
fig, axes = plt.subplots(2, 3, figsize=(18, 8), sharex=True)
|
| 1027 |
+
axes = axes.flatten()
|
| 1028 |
+
|
| 1029 |
+
for i, param in enumerate(params):
|
| 1030 |
+
ax = axes[i]
|
| 1031 |
+
if param not in df_proc.columns:
|
| 1032 |
+
ax.set_title(f"{param} (not found)")
|
| 1033 |
+
ax.axis("off")
|
| 1034 |
+
continue
|
| 1035 |
+
|
| 1036 |
+
series = df_proc[param].astype(float)
|
| 1037 |
+
series_no_na = series.dropna()
|
| 1038 |
+
|
| 1039 |
+
if series_no_na.empty:
|
| 1040 |
+
ax.set_title(f"{param} (no data)")
|
| 1041 |
+
ax.axis("off")
|
| 1042 |
+
continue
|
| 1043 |
+
|
| 1044 |
+
Q1 = series_no_na.quantile(0.25)
|
| 1045 |
+
Q3 = series_no_na.quantile(0.75)
|
| 1046 |
+
IQR = Q3 - Q1
|
| 1047 |
+
lower = Q1 - 1.5 * IQR
|
| 1048 |
+
upper = Q3 + 1.5 * IQR
|
| 1049 |
+
|
| 1050 |
+
outlier_mask = (series < lower) | (series > upper)
|
| 1051 |
+
outlier_idx = df_proc.index[outlier_mask]
|
| 1052 |
+
outlier_vals = series[outlier_mask]
|
| 1053 |
+
|
| 1054 |
+
count_out = int(outlier_mask.sum())
|
| 1055 |
+
total_outliers += count_out
|
| 1056 |
+
|
| 1057 |
+
stats_rows.append({
|
| 1058 |
+
"Parameter": param,
|
| 1059 |
+
"Q1": Q1,
|
| 1060 |
+
"Q3": Q3,
|
| 1061 |
+
"IQR": IQR,
|
| 1062 |
+
"Lower_Bound": lower,
|
| 1063 |
+
"Upper_Bound": upper,
|
| 1064 |
+
"Outliers_Count": count_out,
|
| 1065 |
+
})
|
| 1066 |
+
|
| 1067 |
+
# Plot time series + highlight outliers
|
| 1068 |
+
ax.plot(df_proc[time_col], series, linewidth=0.7)
|
| 1069 |
+
if count_out > 0:
|
| 1070 |
+
ax.scatter(df_proc.loc[outlier_idx, time_col], outlier_vals, s=15)
|
| 1071 |
+
ax.axhline(lower, linestyle="--")
|
| 1072 |
+
ax.axhline(upper, linestyle="--")
|
| 1073 |
+
ax.set_title(f"{param} (outliers: {count_out})")
|
| 1074 |
+
ax.grid(True, alpha=0.3)
|
| 1075 |
+
|
| 1076 |
+
# Matikan axis kosong jika params < 6
|
| 1077 |
+
for j in range(len(params), 6):
|
| 1078 |
+
axes[j].axis("off")
|
| 1079 |
+
|
| 1080 |
+
fig.suptitle(f"Outlier Detection – {product_label}", fontsize=14)
|
| 1081 |
+
fig.tight_layout(rect=[0, 0.03, 1, 0.95])
|
| 1082 |
+
|
| 1083 |
+
outlier_stats_df = pd.DataFrame(stats_rows)
|
| 1084 |
+
return fig, total_outliers, outlier_stats_df
|
| 1085 |
+
|
| 1086 |
+
|
| 1087 |
+
def compute_stats_table(df: pd.DataFrame,
|
| 1088 |
+
params: list,
|
| 1089 |
+
target_col: str = None) -> pd.DataFrame:
|
| 1090 |
+
"""
|
| 1091 |
+
Tabel statistik deskriptif untuk parameter proses + kolom target gas (jika ada).
|
| 1092 |
+
"""
|
| 1093 |
+
cols = [c for c in params if c in df.columns]
|
| 1094 |
+
if target_col and target_col in df.columns:
|
| 1095 |
+
cols.append(target_col)
|
| 1096 |
+
|
| 1097 |
+
if not cols:
|
| 1098 |
+
return pd.DataFrame()
|
| 1099 |
+
|
| 1100 |
+
desc = df[cols].describe().T # index = parameter
|
| 1101 |
+
return desc
|
| 1102 |
+
|
| 1103 |
+
|
| 1104 |
+
# ============= CARA PENGGUNAAN =============
|
| 1105 |
+
if __name__ == "__main__":
|
| 1106 |
+
df = pd.read_csv(r"C:\Dokumen\One To Many_17_10_2025\MMBTU\DASHBOARD\One To Many\disagregasi_data_spraydryer_terbaru_10_17_2025.csv")
|
| 1107 |
+
pipeline = SprayDryerEDAPipeline(dataframe=df)
|
| 1108 |
+
processed_df, product_dfs = pipeline.run_full_pipeline(show_visualizations=True)
|
| 1109 |
+
processed_df.to_csv(r"Processed Data Pipeline EDA_21_10_2025.csv", index=False)
|
| 1110 |
+
summary = pipeline.get_summary()
|
| 1111 |
+
print(summary)
|
filter_rule_engine.py
ADDED
|
@@ -0,0 +1,361 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
# =========================
|
| 5 |
+
# RULE 1 (detect_anomaly_new_rules)
|
| 6 |
+
# =========================
|
| 7 |
+
def _detect_anomaly_rule_1(df: pd.DataFrame) -> pd.Series:
|
| 8 |
+
"""
|
| 9 |
+
LOGIKA ASLI (tidak diubah)
|
| 10 |
+
"""
|
| 11 |
+
NOZZLE_PRESSURE = 'D101463PIC_PV' # Tekanan nozzle
|
| 12 |
+
TIC_PV = 'D102265TIC_PV' # Inlet Temperature
|
| 13 |
+
TOC = 'D101330TT' # Outlet Temperature
|
| 14 |
+
HP_CV = 'D102260TIC_CV' # Steam Damper CV (High Pressure)
|
| 15 |
+
LP_CV = 'D102265TIC_CV' # Steam Damper CV (Low Pressure)
|
| 16 |
+
|
| 17 |
+
required_cols = [NOZZLE_PRESSURE, TIC_PV, TOC, HP_CV, LP_CV, 'Date_time']
|
| 18 |
+
for col in required_cols:
|
| 19 |
+
if col not in df.columns:
|
| 20 |
+
raise ValueError(f"❌ Kolom yang dibutuhkan '{col}' tidak ada di DataFrame.")
|
| 21 |
+
|
| 22 |
+
tmp = df.copy()
|
| 23 |
+
tmp['Date_time'] = pd.to_datetime(tmp['Date_time'])
|
| 24 |
+
tmp = tmp.sort_values('Date_time').reset_index(drop=True)
|
| 25 |
+
|
| 26 |
+
# 1 menit sebelumnya
|
| 27 |
+
rename_prev = {c: f"prev_{c}" for c in [TIC_PV, TOC, HP_CV, LP_CV]}
|
| 28 |
+
df_prev = tmp[['Date_time'] + list(rename_prev.keys())].rename(columns=rename_prev)
|
| 29 |
+
df_prev['Date_time'] = df_prev['Date_time'] + pd.Timedelta(minutes=1)
|
| 30 |
+
|
| 31 |
+
tmp = tmp.merge(df_prev, on='Date_time', how='left')
|
| 32 |
+
|
| 33 |
+
# Delta
|
| 34 |
+
tmp['delta_TIC'] = tmp[TIC_PV] - tmp[f'prev_{TIC_PV}']
|
| 35 |
+
tmp['delta_TOC'] = tmp[TOC] - tmp[f'prev_{TOC}']
|
| 36 |
+
tmp['delta_HP'] = tmp[HP_CV] - tmp[f'prev_{HP_CV}']
|
| 37 |
+
tmp['delta_LP'] = tmp[LP_CV] - tmp[f'prev_{LP_CV}']
|
| 38 |
+
|
| 39 |
+
# Kondisi dasar: mesin running
|
| 40 |
+
is_running = tmp[NOZZLE_PRESSURE] >= 135
|
| 41 |
+
|
| 42 |
+
# Case A: Inlet TURUN
|
| 43 |
+
is_tic_delta_eq_neg_3 = (tmp['delta_TIC'] == -3)
|
| 44 |
+
is_toc_up = (tmp['delta_TOC'] > 0)
|
| 45 |
+
anomaly_case_A1 = is_running & is_tic_delta_eq_neg_3 & is_toc_up
|
| 46 |
+
|
| 47 |
+
is_tic_delta_lt_neg_3 = (tmp['delta_TIC'] < -3)
|
| 48 |
+
is_damper_up = (tmp['delta_HP'] > 0) | (tmp['delta_LP'] > 0)
|
| 49 |
+
anomaly_case_A2 = is_running & is_tic_delta_lt_neg_3 & is_damper_up
|
| 50 |
+
|
| 51 |
+
# Case B: Inlet NAIK
|
| 52 |
+
is_tic_delta_eq_pos_3 = (tmp['delta_TIC'] == 3)
|
| 53 |
+
is_toc_down = (tmp['delta_TOC'] < 0)
|
| 54 |
+
anomaly_case_B1 = is_running & is_tic_delta_eq_pos_3 & is_toc_down
|
| 55 |
+
|
| 56 |
+
is_tic_delta_gt_pos_3 = (tmp['delta_TIC'] > 3)
|
| 57 |
+
is_damper_down = (tmp['delta_HP'] < 0) | (tmp['delta_LP'] < 0)
|
| 58 |
+
anomaly_case_B2 = is_running & is_tic_delta_gt_pos_3 & is_damper_down
|
| 59 |
+
|
| 60 |
+
anomaly_flags = anomaly_case_A1 | anomaly_case_A2 | anomaly_case_B1 | anomaly_case_B2
|
| 61 |
+
anomaly_flags.name = "anomaly_flag"
|
| 62 |
+
|
| 63 |
+
print(f"✅ Jumlah anomali (Rule 1 - new_rules) terdeteksi: {anomaly_flags.sum()} dari {len(tmp)} baris data")
|
| 64 |
+
return anomaly_flags
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
# =========================
|
| 68 |
+
# RULE 2 (detect_anomaly_rule_2_revised)
|
| 69 |
+
# =========================
|
| 70 |
+
def _detect_anomaly_rule_2(df: pd.DataFrame) -> pd.Series:
|
| 71 |
+
"""
|
| 72 |
+
LOGIKA ASLI (tidak diubah)
|
| 73 |
+
"""
|
| 74 |
+
NOZZLE_PRESSURE = 'D101463PIC_PV'
|
| 75 |
+
FF = 'D101264FTSCL' # Flow Feed
|
| 76 |
+
TOC = 'D101330TT' # Outlet Temperature
|
| 77 |
+
|
| 78 |
+
required_cols = [NOZZLE_PRESSURE, FF, TOC, 'Date_time']
|
| 79 |
+
for col in required_cols:
|
| 80 |
+
if col not in df.columns:
|
| 81 |
+
raise ValueError(f"❌ Kolom '{col}' tidak ada di DataFrame.")
|
| 82 |
+
|
| 83 |
+
tmp = df.copy()
|
| 84 |
+
tmp['Date_time'] = pd.to_datetime(tmp['Date_time'])
|
| 85 |
+
tmp = tmp.sort_values('Date_time').reset_index(drop=True)
|
| 86 |
+
|
| 87 |
+
# 1 menit sebelumnya
|
| 88 |
+
rename_prev = {c: f'prev_{c}' for c in [FF, TOC]}
|
| 89 |
+
df_prev = tmp[['Date_time'] + list(rename_prev.keys())].rename(columns=rename_prev)
|
| 90 |
+
df_prev['Date_time'] = df_prev['Date_time'] + pd.Timedelta(minutes=1)
|
| 91 |
+
tmp = tmp.merge(df_prev, on='Date_time', how='left')
|
| 92 |
+
|
| 93 |
+
# Delta
|
| 94 |
+
tmp['delta_FF'] = tmp[FF] - tmp[f'prev_{FF}']
|
| 95 |
+
tmp['delta_TOC'] = tmp[TOC] - tmp[f'prev_{TOC}']
|
| 96 |
+
|
| 97 |
+
# Kondisi dasar: mesin running
|
| 98 |
+
is_running = tmp[NOZZLE_PRESSURE] >= 135
|
| 99 |
+
|
| 100 |
+
# Kondisi 1: FF turun & TOC turun signifikan
|
| 101 |
+
is_ff_down = tmp['delta_FF'] < 0
|
| 102 |
+
is_toc_drop_significant = tmp['delta_TOC'] <= -1.0
|
| 103 |
+
anomaly_case_1 = is_running & is_ff_down & is_toc_drop_significant
|
| 104 |
+
|
| 105 |
+
# Kondisi 2: FF naik & TOC naik 1 s/d 12.59
|
| 106 |
+
is_ff_up = tmp['delta_FF'] > 0
|
| 107 |
+
is_toc_rise_in_range = (tmp['delta_TOC'] >= 1.0) & (tmp['delta_TOC'] <= 12.59)
|
| 108 |
+
anomaly_case_2 = is_running & is_ff_up & is_toc_rise_in_range
|
| 109 |
+
|
| 110 |
+
anomaly_flags = anomaly_case_1 | anomaly_case_2
|
| 111 |
+
anomaly_flags.name = "anomaly_flag_2"
|
| 112 |
+
|
| 113 |
+
print(f"✅ Jumlah anomali (Rule 2 Rev) terdeteksi: {anomaly_flags.sum()} dari {len(tmp)} baris data")
|
| 114 |
+
return anomaly_flags
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
# =========================
|
| 118 |
+
# RULE 3 (detect_anomaly_rule_3_revised)
|
| 119 |
+
# =========================
|
| 120 |
+
def _detect_anomaly_rule_3(df: pd.DataFrame) -> pd.Series:
|
| 121 |
+
"""
|
| 122 |
+
LOGIKA ASLI (tidak diubah)
|
| 123 |
+
"""
|
| 124 |
+
NOZZLE_PRESSURE = 'D101463PIC_PV'
|
| 125 |
+
FF = 'D101264FTSCL'
|
| 126 |
+
TIC_PV = 'D102265TIC_PV'
|
| 127 |
+
GAS_MMBTU = 'GAS_MMBTU_Disaggregated'
|
| 128 |
+
|
| 129 |
+
required_cols = [NOZZLE_PRESSURE, FF, TIC_PV, GAS_MMBTU, 'Date_time']
|
| 130 |
+
for col in required_cols:
|
| 131 |
+
if col not in df.columns:
|
| 132 |
+
raise ValueError(f"❌ Kolom '{col}' tidak ada di DataFrame.")
|
| 133 |
+
|
| 134 |
+
tmp = df.copy()
|
| 135 |
+
tmp['Date_time'] = pd.to_datetime(tmp['Date_time'])
|
| 136 |
+
tmp = tmp.sort_values('Date_time').reset_index(drop=True)
|
| 137 |
+
|
| 138 |
+
# 1 menit sebelumnya
|
| 139 |
+
rename_prev = {c: f'prev_{c}' for c in [FF, TIC_PV, GAS_MMBTU]}
|
| 140 |
+
df_prev = tmp[['Date_time'] + list(rename_prev.keys())].rename(columns=rename_prev)
|
| 141 |
+
df_prev['Date_time'] = df_prev['Date_time'] + pd.Timedelta(minutes=1)
|
| 142 |
+
tmp = tmp.merge(df_prev, on='Date_time', how='left')
|
| 143 |
+
|
| 144 |
+
# Delta
|
| 145 |
+
tmp['delta_FF'] = tmp[FF] - tmp[f'prev_{FF}']
|
| 146 |
+
tmp['delta_TIC'] = tmp[TIC_PV] - tmp[f'prev_{TIC_PV}']
|
| 147 |
+
tmp['delta_MMBTU'] = tmp[GAS_MMBTU] - tmp[f'prev_{GAS_MMBTU}']
|
| 148 |
+
|
| 149 |
+
# Kondisi dasar: mesin running
|
| 150 |
+
is_running = tmp[NOZZLE_PRESSURE] >= 135
|
| 151 |
+
|
| 152 |
+
# Rule 1: FF naik, TIC naik, tapi GAS turun
|
| 153 |
+
is_ff_up = tmp['delta_FF'] > 0
|
| 154 |
+
is_tic_up = tmp['delta_TIC'] > 0
|
| 155 |
+
is_mmbtu_down = tmp['delta_MMBTU'] < 0
|
| 156 |
+
anomaly_case_1 = is_running & is_ff_up & is_tic_up & is_mmbtu_down
|
| 157 |
+
|
| 158 |
+
# Rule 2: FF turun, TIC turun, tapi GAS naik
|
| 159 |
+
is_ff_down = tmp['delta_FF'] < 0
|
| 160 |
+
is_tic_down = tmp['delta_TIC'] < 0
|
| 161 |
+
is_mmbtu_up = tmp['delta_MMBTU'] > 0
|
| 162 |
+
anomaly_case_2 = is_running & is_ff_down & is_tic_down & is_mmbtu_up
|
| 163 |
+
|
| 164 |
+
anomaly_flags = anomaly_case_1 | anomaly_case_2
|
| 165 |
+
anomaly_flags.name = "anomaly_flag_3"
|
| 166 |
+
|
| 167 |
+
print(f"✅ Jumlah anomali (Rule 3 Rev) terdeteksi: {anomaly_flags.sum()} dari {len(tmp)} baris data")
|
| 168 |
+
return anomaly_flags
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
# =========================
|
| 172 |
+
# RULE 4 (detect_spray_dryer_anomalies)
|
| 173 |
+
# =========================
|
| 174 |
+
def _detect_anomaly_rule_4(df: pd.DataFrame) -> pd.Series:
|
| 175 |
+
"""
|
| 176 |
+
LOGIKA ASLI (tidak diubah)
|
| 177 |
+
"""
|
| 178 |
+
NOZZLE_PRESSURE = 'D101463PIC_PV'
|
| 179 |
+
HP_CV = 'D102260TIC_CV'
|
| 180 |
+
LP_CV = 'D102265TIC_CV'
|
| 181 |
+
TIC = 'D102265TIC_PV'
|
| 182 |
+
TOC = 'D101330TT'
|
| 183 |
+
|
| 184 |
+
required_cols = [NOZZLE_PRESSURE, HP_CV, LP_CV, TIC, TOC, 'Date_time']
|
| 185 |
+
for col in required_cols:
|
| 186 |
+
if col not in df.columns:
|
| 187 |
+
raise ValueError(f"❌ Kolom '{col}' tidak ada di DataFrame.")
|
| 188 |
+
|
| 189 |
+
tmp = df.copy()
|
| 190 |
+
tmp['Date_time'] = pd.to_datetime(tmp['Date_time'])
|
| 191 |
+
tmp = tmp.sort_values('Date_time').reset_index(drop=True)
|
| 192 |
+
|
| 193 |
+
# 1 menit sebelumnya untuk HP & LP
|
| 194 |
+
rename_prev = {c: f'prev_{c}' for c in [HP_CV, LP_CV]}
|
| 195 |
+
df_prev = tmp[['Date_time'] + list(rename_prev.keys())].rename(columns=rename_prev)
|
| 196 |
+
df_prev['Date_time'] = df_prev['Date_time'] + pd.Timedelta(minutes=1)
|
| 197 |
+
tmp = tmp.merge(df_prev, on='Date_time', how='left')
|
| 198 |
+
|
| 199 |
+
# Delta
|
| 200 |
+
tmp['delta_HP'] = tmp[HP_CV] - tmp[f'prev_{HP_CV}']
|
| 201 |
+
tmp['delta_LP'] = tmp[LP_CV] - tmp[f'prev_{LP_CV}']
|
| 202 |
+
|
| 203 |
+
# Kondisi dasar: mesin running
|
| 204 |
+
is_running = tmp[NOZZLE_PRESSURE] >= 135
|
| 205 |
+
|
| 206 |
+
# Aturan 1: HP naik, LP turun > 1
|
| 207 |
+
is_hp_up = tmp['delta_HP'] > 0
|
| 208 |
+
is_lp_down_significant = tmp['delta_LP'] < -1.0
|
| 209 |
+
anomaly_1 = is_hp_up & is_lp_down_significant
|
| 210 |
+
|
| 211 |
+
# Aturan 2: HP turun, LP naik > 1
|
| 212 |
+
is_hp_down = tmp['delta_HP'] < 0
|
| 213 |
+
is_lp_up_significant = tmp['delta_LP'] > 1.0
|
| 214 |
+
anomaly_2 = is_hp_down & is_lp_up_significant
|
| 215 |
+
|
| 216 |
+
# Aturan 3: TOC > TIC
|
| 217 |
+
anomaly_3 = tmp[TOC] > tmp[TIC]
|
| 218 |
+
|
| 219 |
+
final_anomaly_flags = is_running & (anomaly_1 | anomaly_2 | anomaly_3)
|
| 220 |
+
final_anomaly_flags.name = "anomaly_flag_4"
|
| 221 |
+
|
| 222 |
+
print(f"✅ Jumlah anomali (Rule 4) terdeteksi: {final_anomaly_flags.sum()} dari {len(tmp)} baris data")
|
| 223 |
+
return final_anomaly_flags
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
# =========================
|
| 227 |
+
# PIPELINE UTAMA
|
| 228 |
+
# =========================
|
| 229 |
+
def apply_rule_engine(df: pd.DataFrame):
|
| 230 |
+
"""
|
| 231 |
+
Pipeline terintegrasi + pre-filter CIP/CIP CHAMBER.
|
| 232 |
+
|
| 233 |
+
Returns:
|
| 234 |
+
- df_clean : data yang sudah bersih (tanpa CIP & tanpa anomali rule engine)
|
| 235 |
+
- df_anomalies : semua baris yang dihapus + kolom alasan ('anomaly_reason')
|
| 236 |
+
- summary_report: dict ringkasan (total, persentase, CIP, dst.)
|
| 237 |
+
"""
|
| 238 |
+
df_original = df.copy()
|
| 239 |
+
total_rows_initial = len(df_original)
|
| 240 |
+
|
| 241 |
+
# --- 1) Hapus CIP & CIP CHAMBER lebih dulu ---
|
| 242 |
+
if "Product" in df_original.columns:
|
| 243 |
+
cip_mask = df_original["Product"].isin(["CIP", "CIP CHAMBER"])
|
| 244 |
+
else:
|
| 245 |
+
cip_mask = pd.Series(False, index=df_original.index)
|
| 246 |
+
|
| 247 |
+
df_cip = df_original[cip_mask].copy()
|
| 248 |
+
df_non_cip = df_original[~cip_mask].copy()
|
| 249 |
+
cip_removed = int(cip_mask.sum())
|
| 250 |
+
|
| 251 |
+
# Siapkan kolom flag awal untuk CIP
|
| 252 |
+
rule_flag_cols = ["anomaly_flag", "anomaly_flag_2", "anomaly_flag_3", "anomaly_flag_4"]
|
| 253 |
+
if not df_cip.empty:
|
| 254 |
+
for c in rule_flag_cols + ["anomaly_any"]:
|
| 255 |
+
df_cip[c] = False
|
| 256 |
+
df_cip["anomaly_reason"] = "Produk CIP / CIP CHAMBER (dihapus sebelum rule engine)"
|
| 257 |
+
|
| 258 |
+
# --- 2) Jalankan 4 rule di data non-CIP ---
|
| 259 |
+
if len(df_non_cip) > 0:
|
| 260 |
+
rule1_flags = _detect_anomaly_rule_1(df_non_cip)
|
| 261 |
+
rule2_flags = _detect_anomaly_rule_2(df_non_cip)
|
| 262 |
+
rule3_flags = _detect_anomaly_rule_3(df_non_cip)
|
| 263 |
+
rule4_flags = _detect_anomaly_rule_4(df_non_cip)
|
| 264 |
+
|
| 265 |
+
df_with_flags = df_non_cip.copy()
|
| 266 |
+
df_with_flags["anomaly_flag"] = rule1_flags.values
|
| 267 |
+
df_with_flags["anomaly_flag_2"] = rule2_flags.values
|
| 268 |
+
df_with_flags["anomaly_flag_3"] = rule3_flags.values
|
| 269 |
+
df_with_flags["anomaly_flag_4"] = rule4_flags.values
|
| 270 |
+
|
| 271 |
+
df_with_flags["anomaly_any"] = df_with_flags[rule_flag_cols].any(axis=1)
|
| 272 |
+
|
| 273 |
+
df_clean_rules = df_with_flags[~df_with_flags["anomaly_any"]].reset_index(drop=True)
|
| 274 |
+
df_anomaly_rules = df_with_flags[df_with_flags["anomaly_any"]].reset_index(drop=True)
|
| 275 |
+
|
| 276 |
+
# Build alasan tiap baris rule
|
| 277 |
+
def _build_reasons(row):
|
| 278 |
+
reasons = []
|
| 279 |
+
if row["anomaly_flag"]:
|
| 280 |
+
reasons.append("Rule 1 – TIC/TOC vs Damper dynamics")
|
| 281 |
+
if row["anomaly_flag_2"]:
|
| 282 |
+
reasons.append("Rule 2 – Flow Feed vs TOC")
|
| 283 |
+
if row["anomaly_flag_3"]:
|
| 284 |
+
reasons.append("Rule 3 – Flow & TIC vs GAS MMBTU")
|
| 285 |
+
if row["anomaly_flag_4"]:
|
| 286 |
+
reasons.append("Rule 4 – HP/LP damper & TOC>TIC")
|
| 287 |
+
return "; ".join(reasons)
|
| 288 |
+
|
| 289 |
+
if not df_anomaly_rules.empty:
|
| 290 |
+
df_anomaly_rules["anomaly_reason"] = df_anomaly_rules.apply(_build_reasons, axis=1)
|
| 291 |
+
else:
|
| 292 |
+
# Tidak ada data non-CIP
|
| 293 |
+
df_clean_rules = df_non_cip.copy()
|
| 294 |
+
df_anomaly_rules = df_non_cip.iloc[0:0].copy()
|
| 295 |
+
df_with_flags = df_non_cip.copy()
|
| 296 |
+
|
| 297 |
+
# --- 3) Satukan semua baris anomali: CIP + rule engine ---
|
| 298 |
+
if not df_cip.empty and not df_anomaly_rules.empty:
|
| 299 |
+
df_anomalies = pd.concat([df_cip, df_anomaly_rules], ignore_index=True, sort=False)
|
| 300 |
+
elif not df_cip.empty:
|
| 301 |
+
df_anomalies = df_cip.copy()
|
| 302 |
+
else:
|
| 303 |
+
df_anomalies = df_anomaly_rules.copy()
|
| 304 |
+
|
| 305 |
+
# Data bersih final = clean_rules (non-CIP & lolos semua rule)
|
| 306 |
+
df_clean = df_clean_rules.copy()
|
| 307 |
+
|
| 308 |
+
# --- 4) Ringkasan angka ---
|
| 309 |
+
total_rows_after_filter = len(df_clean)
|
| 310 |
+
total_rows_removed = total_rows_initial - total_rows_after_filter
|
| 311 |
+
percent_clean = float(total_rows_after_filter / total_rows_initial * 100) if total_rows_initial > 0 else 0.0
|
| 312 |
+
|
| 313 |
+
# Breakdown CIP vs Rule
|
| 314 |
+
rule_rows_removed = len(df_anomaly_rules)
|
| 315 |
+
rule_percent = float(rule_rows_removed / total_rows_initial * 100) if total_rows_initial > 0 else 0.0
|
| 316 |
+
cip_percent = float(cip_removed / total_rows_initial * 100) if total_rows_initial > 0 else 0.0
|
| 317 |
+
|
| 318 |
+
# Jumlah jenis anomali unik
|
| 319 |
+
if not df_anomalies.empty and "anomaly_reason" in df_anomalies.columns:
|
| 320 |
+
num_anomaly_types = int(df_anomalies["anomaly_reason"].nunique())
|
| 321 |
+
else:
|
| 322 |
+
num_anomaly_types = 0
|
| 323 |
+
|
| 324 |
+
summary_report = {
|
| 325 |
+
"total_rows_initial": total_rows_initial,
|
| 326 |
+
"total_rows_after_filter": total_rows_after_filter,
|
| 327 |
+
"total_rows_removed": total_rows_removed,
|
| 328 |
+
"percent_clean": percent_clean,
|
| 329 |
+
"cip_rows_removed": cip_removed,
|
| 330 |
+
"cip_percent": cip_percent,
|
| 331 |
+
"rule_rows_removed": rule_rows_removed,
|
| 332 |
+
"rule_percent": rule_percent,
|
| 333 |
+
"num_anomaly_types": num_anomaly_types,
|
| 334 |
+
}
|
| 335 |
+
|
| 336 |
+
print(f"✅ Jumlah data awal : {total_rows_initial}")
|
| 337 |
+
print(f"✅ Jumlah data bersih: {total_rows_after_filter}")
|
| 338 |
+
print(f"🗑️ Jumlah baris yang dihapus (CIP + anomali rule): {total_rows_removed}")
|
| 339 |
+
|
| 340 |
+
return df_clean, df_anomalies, summary_report
|
| 341 |
+
|
| 342 |
+
|
| 343 |
+
# Alias utk kompatibilitas lama
|
| 344 |
+
def apply_spray_dryer_rule_engine(df: pd.DataFrame):
|
| 345 |
+
return apply_rule_engine(df)
|
| 346 |
+
|
| 347 |
+
|
| 348 |
+
if __name__ == "__main__":
|
| 349 |
+
# Contoh pemakaian manual
|
| 350 |
+
path_csv = "/work/Dataset 18 Mar - 19 Jun/disagregasi_data_spraydryer_terbaru_10_17_2025.csv"
|
| 351 |
+
try:
|
| 352 |
+
df_raw = pd.read_csv(path_csv)
|
| 353 |
+
df_clean, df_anom, summary = apply_rule_engine(df_raw)
|
| 354 |
+
|
| 355 |
+
print("\n--- RINGKASAN ---")
|
| 356 |
+
for k, v in summary.items():
|
| 357 |
+
print(f"{k}: {v}")
|
| 358 |
+
print("Contoh anomali:")
|
| 359 |
+
print(df_anom.head())
|
| 360 |
+
except FileNotFoundError:
|
| 361 |
+
print(f"File contoh tidak ditemukan: {path_csv}")
|
inverse_model_forward.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
import joblib
|
| 5 |
+
import os
|
| 6 |
+
|
| 7 |
+
# --- KONFIGURASI PATH ---
|
| 8 |
+
BASE_MODEL_FOLDER = r"C:\Dokumen\One To Many_17_10_2025\MMBTU\DASHBOARD\One To Many\MODEL CHECKPOINT FOR INVERSE MODEL"
|
| 9 |
+
AVAILABLE_PRODUCTS = ["BMR BASE", "CKP BASE", "CKR BASE", "CMR BASE", "MORIGRO BASE"]
|
| 10 |
+
INPUT_FEATURES = [
|
| 11 |
+
"D101330TT", "D102260TIC_CV", "D102265TIC_PV",
|
| 12 |
+
"D102265TIC_CV", "D102266TIC", "D101264FTSCL"
|
| 13 |
+
]
|
| 14 |
+
@st.cache_resource(show_spinner="Memuat model...")
|
| 15 |
+
def load_model_artifacts(product_name, base_folder):
|
| 16 |
+
file_name = f"model_checkpoint_xgb_{product_name}.joblib"
|
| 17 |
+
model_path = os.path.join(base_folder, file_name)
|
| 18 |
+
if not os.path.exists(model_path):
|
| 19 |
+
st.error(f"File model tidak ditemukan untuk **{product_name}** di: **{model_path}**")
|
| 20 |
+
return None, None, None, None
|
| 21 |
+
|
| 22 |
+
try:
|
| 23 |
+
deployment_bundle = joblib.load(model_path)
|
| 24 |
+
|
| 25 |
+
# Mengambil artefak sesuai dengan struktur kode awal Anda
|
| 26 |
+
model = deployment_bundle.get('model')
|
| 27 |
+
poly_transformer = deployment_bundle.get('poly_transformer')
|
| 28 |
+
input_features = deployment_bundle.get('input_features')
|
| 29 |
+
poly_feature_names = deployment_bundle.get('poly_feature_names')
|
| 30 |
+
|
| 31 |
+
# Validasi sederhana
|
| 32 |
+
if model is None or poly_transformer is None or input_features is None or poly_feature_names is None:
|
| 33 |
+
st.error(f"Salah satu artefak (model, poly_transformer, input_features, poly_feature_names) hilang dalam file joblib **{product_name}**.")
|
| 34 |
+
return None, None, None, None
|
| 35 |
+
|
| 36 |
+
return model, poly_transformer, input_features, poly_feature_names
|
| 37 |
+
|
| 38 |
+
except Exception as e:
|
| 39 |
+
st.error(f"Gagal memuat atau membaca file joblib. Error: {e}")
|
| 40 |
+
return None, None, None, None
|
| 41 |
+
|
| 42 |
+
# --- TAMPILAN (UI) STREAMLIT ---
|
| 43 |
+
st.set_page_config(page_title="Dashboard Prediksi GAS MMBTU", layout="wide")
|
| 44 |
+
with st.sidebar:
|
| 45 |
+
st.header("⚙️ Konfigurasi Model")
|
| 46 |
+
# Dropdown untuk memilih produk
|
| 47 |
+
selected_product = st.selectbox(
|
| 48 |
+
"Pilih Produk Target:",
|
| 49 |
+
AVAILABLE_PRODUCTS,
|
| 50 |
+
index=AVAILABLE_PRODUCTS.index("CKR BASE") if "CKR BASE" in AVAILABLE_PRODUCTS else 0
|
| 51 |
+
)
|
| 52 |
+
st.markdown(f"**Folder Model:** `{BASE_MODEL_FOLDER}`")
|
| 53 |
+
st.title(f"🔥 Dashboard Prediksi GAS MMBTU: **{selected_product}**")
|
| 54 |
+
st.markdown("Dashboard ini menggunakan model **XGBoost** dan transformasi **Polinomial**.")
|
| 55 |
+
st.markdown("---")
|
| 56 |
+
model, poly_transformer, input_features_loaded, poly_feature_names = load_model_artifacts(
|
| 57 |
+
selected_product, BASE_MODEL_FOLDER
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
if model is None:
|
| 61 |
+
st.stop() # Berhenti jika model gagal dimuat
|
| 62 |
+
|
| 63 |
+
# Cek apakah fitur input konsisten
|
| 64 |
+
if set(input_features_loaded) != set(INPUT_FEATURES):
|
| 65 |
+
st.warning("Fitur Input yang dimuat dari joblib berbeda dengan daftar fitur default. Menggunakan fitur dari joblib.")
|
| 66 |
+
INPUT_FEATURES = input_features_loaded
|
| 67 |
+
st.subheader("🧪 Masukkan Nilai Input Mentah")
|
| 68 |
+
st.markdown("Harap masukkan nilai numerik untuk 6 fitur di bawah:")
|
| 69 |
+
|
| 70 |
+
# Membuat kolom input untuk 6 fitur
|
| 71 |
+
cols = st.columns(len(INPUT_FEATURES))
|
| 72 |
+
user_raw_data = {}
|
| 73 |
+
for i, feature in enumerate(INPUT_FEATURES):
|
| 74 |
+
# Menggunakan nilai default untuk contoh
|
| 75 |
+
default_value = 0.0
|
| 76 |
+
if feature == "D101330TT": default_value = 95.0
|
| 77 |
+
elif feature == "D102260TIC_CV": default_value = 45.0
|
| 78 |
+
elif feature == "D102265TIC_PV": default_value = 185.0
|
| 79 |
+
elif feature == "D102265TIC_CV": default_value = 17.0
|
| 80 |
+
elif feature == "D102266TIC": default_value = 16.0
|
| 81 |
+
elif feature == "D101264FTSCL": default_value = 3800.0
|
| 82 |
+
|
| 83 |
+
# Input nilai per fitur
|
| 84 |
+
user_raw_data[feature] = cols[i].number_input(
|
| 85 |
+
feature,
|
| 86 |
+
value=default_value,
|
| 87 |
+
format="%.4f",
|
| 88 |
+
key=f"input_{feature}"
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
st.markdown("---")
|
| 92 |
+
|
| 93 |
+
# Tombol prediksi
|
| 94 |
+
if st.button("🔮 Prediksi GAS MMBTU Sekarang", type="primary", use_container_width=True):
|
| 95 |
+
|
| 96 |
+
# Membuat DataFrame dari input mentah (sesuai urutan fitur input)
|
| 97 |
+
sim_input_df = pd.DataFrame([user_raw_data])[INPUT_FEATURES]
|
| 98 |
+
|
| 99 |
+
st.subheader("⚙️ Proses Transformasi & Prediksi")
|
| 100 |
+
|
| 101 |
+
# 1. Transformasi Polinomial
|
| 102 |
+
with st.spinner("1. Menerapkan transformasi polinomial..."):
|
| 103 |
+
transformed_input_np = poly_transformer.transform(sim_input_df)
|
| 104 |
+
transformed_input_df = pd.DataFrame(
|
| 105 |
+
transformed_input_np,
|
| 106 |
+
columns=poly_feature_names,
|
| 107 |
+
index=sim_input_df.index
|
| 108 |
+
)
|
| 109 |
+
# 2. Prediksi
|
| 110 |
+
with st.spinner("2. Melakukan prediksi dengan model..."):
|
| 111 |
+
predictions = model.predict(transformed_input_df)
|
| 112 |
+
prediksi_final = predictions[0]
|
| 113 |
+
|
| 114 |
+
st.markdown("### ✅ Hasil Prediksi")
|
| 115 |
+
st.metric(
|
| 116 |
+
f"Prediksi Kebutuhan **GAS MMBTU** untuk {selected_product}",
|
| 117 |
+
f"{prediksi_final:.6f}MMBTU"
|
| 118 |
+
)
|
| 119 |
+
st.markdown("---")
|
prediksi_model_inverse.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import streamlit as st
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import numpy as np
|
| 5 |
+
import joblib
|
| 6 |
+
import os
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
from sklearn.preprocessing import MinMaxScaler
|
| 9 |
+
import openpyxl
|
| 10 |
+
from scipy.optimize import differential_evolution
|
| 11 |
+
|
| 12 |
+
def predict_forward_from_params(product_name: str, params_dict: dict, model_folder: str) -> float:
|
| 13 |
+
"""
|
| 14 |
+
Jalankan forward modelling XGBoost many-to-one untuk 1 baris input parameter.
|
| 15 |
+
Dipakai baik untuk Validasi maupun Simulasi agar konsisten.
|
| 16 |
+
"""
|
| 17 |
+
model_filename = f"model_checkpoint_xgb_{product_name}.joblib"
|
| 18 |
+
model_path = os.path.join(model_folder, model_filename)
|
| 19 |
+
|
| 20 |
+
if not os.path.exists(model_path):
|
| 21 |
+
raise FileNotFoundError(f"File model untuk produk {product_name} tidak ditemukan: {model_path}")
|
| 22 |
+
|
| 23 |
+
artifacts = joblib.load(model_path)
|
| 24 |
+
|
| 25 |
+
# Struktur bundle: dict dengan kunci 'model', 'poly_transformer', 'input_features', 'poly_feature_names'
|
| 26 |
+
if isinstance(artifacts, dict):
|
| 27 |
+
fwd_model = artifacts.get("model", artifacts)
|
| 28 |
+
poly_transformer = artifacts.get("poly_transformer", None)
|
| 29 |
+
input_features = artifacts.get("input_features", list(params_dict.keys()))
|
| 30 |
+
poly_feature_names = artifacts.get("poly_feature_names", None)
|
| 31 |
+
else:
|
| 32 |
+
# Fallback: kalau bukan dict, anggap langsung model
|
| 33 |
+
fwd_model = artifacts
|
| 34 |
+
poly_transformer = None
|
| 35 |
+
input_features = list(params_dict.keys())
|
| 36 |
+
poly_feature_names = None
|
| 37 |
+
|
| 38 |
+
# Susun DataFrame satu baris
|
| 39 |
+
X_base = pd.DataFrame([params_dict])
|
| 40 |
+
|
| 41 |
+
# Pastikan semua fitur ada
|
| 42 |
+
missing = [f for f in input_features if f not in X_base.columns]
|
| 43 |
+
if missing:
|
| 44 |
+
raise ValueError(
|
| 45 |
+
"Fitur berikut dibutuhkan oleh model namun tidak ada di input: "
|
| 46 |
+
+ ", ".join(missing)
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
# Urutkan kolom sesuai urutan training
|
| 50 |
+
X_base = X_base[input_features]
|
| 51 |
+
|
| 52 |
+
# Polynomial features jika ada
|
| 53 |
+
if poly_transformer is not None:
|
| 54 |
+
X_poly = poly_transformer.transform(X_base)
|
| 55 |
+
if (poly_feature_names is not None) and (len(poly_feature_names) == X_poly.shape[1]):
|
| 56 |
+
X_final = pd.DataFrame(X_poly, columns=poly_feature_names)
|
| 57 |
+
else:
|
| 58 |
+
X_final = pd.DataFrame(X_poly)
|
| 59 |
+
else:
|
| 60 |
+
X_final = X_base
|
| 61 |
+
|
| 62 |
+
# Prediksi
|
| 63 |
+
y_pred = fwd_model.predict(X_final)[0]
|
| 64 |
+
return float(y_pred)
|
requirements.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
pandas
|
| 3 |
+
numpy
|
| 4 |
+
joblib
|
| 5 |
+
scikit-learn
|
| 6 |
+
openpyxl
|
| 7 |
+
scipy
|
| 8 |
+
matplotlib
|
| 9 |
+
seaborn
|
| 10 |
+
xgboost
|