# -*- coding: utf-8 -*- """XGBOOST.ipynb Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/1tDtwG0rHNfmKvSU-iQ32jGa1xB4HAqCn """ # -*- coding: utf-8 -*- """ BTA_Predict_FINAL.ipynb ================================================ BTA Thickness Prediction — Production-Ready Version Strategy: Anchor-Based Wear Rate Model (didasarkan insight bahwa: - Ketebalan diukur manual → diskrit, bukan harian - Korelasi waktu vs ketebalan = -0.995 - Korelasi suhu vs ketebalan = -0.16 (lemah) → Prediksi = Anchor terakhir + laju aus + suhu modifier) ================================================ """ # ============================================================ # CELL 1: Install & Import # ============================================================ # !pip install -q xgboost scikit-learn pandas numpy matplotlib seaborn import pandas as pd import numpy as np from xgboost import XGBRegressor from sklearn.linear_model import LinearRegression, Ridge from sklearn.preprocessing import PolynomialFeatures from sklearn.pipeline import Pipeline from sklearn.metrics import mean_absolute_error, mean_squared_error from datetime import datetime, timedelta import matplotlib.pyplot as plt import matplotlib.dates as mdates import seaborn as sns import warnings, io warnings.filterwarnings('ignore') print("✅ Libraries loaded.") # ============================================================ # CELL 2: Upload & Load Data # ============================================================ import os import sys import glob # Cari semua berkas CSV di folder saat ini csv_files = glob.glob('*.csv') + glob.glob('*.csv.csv') # Hilangkan duplikat dan rapikan path csv_files = list(set([os.path.basename(f) for f in csv_files])) # Prioritaskan argumen baris perintah if len(sys.argv) > 1: filename = sys.argv[1] if not os.path.exists(filename): print(f"❌ File '{filename}' yang diberikan melalui argumen tidak ditemukan!") sys.exit(1) else: # Jika tidak ada argumen, cari CSV secara otomatis if not csv_files: raise FileNotFoundError("Tidak ditemukan file CSV di direktori saat ini.") elif len(csv_files) == 1: filename = csv_files[0] else: print("\n📂 Ditemukan beberapa file CSV di direktori saat ini:") for idx, f in enumerate(csv_files, 1): print(f" [{idx}] {f}") # Cari default file jika ada default_file = 'data-temp-clean.csv' if default_file not in csv_files and 'data-temp-clean.csv.csv' in csv_files: default_file = 'data-temp-clean.csv.csv' default_idx = csv_files.index(default_file) + 1 if default_file in csv_files else 1 try: choice = input(f"Pilih file untuk diproses (Tekan Enter untuk default [{csv_files[default_idx-1]}]): ").strip() if choice == "": filename = csv_files[default_idx-1] else: choice_idx = int(choice) - 1 if 0 <= choice_idx < len(csv_files): filename = csv_files[choice_idx] else: print("❌ Pilihan tidak valid. Menggunakan file default.") filename = csv_files[default_idx-1] except Exception: filename = csv_files[default_idx-1] print(f"📂 Membaca file data: '{filename}'") df = pd.read_csv(filename) # Menentukan nama dasar berkas untuk keluaran dinamis base_name = os.path.splitext(filename)[0] if base_name.endswith('.csv'): base_name = os.path.splitext(base_name)[0] # bersihin nama kolom dari spasi df.columns = [str(c).strip() for c in df.columns] # rename kolomnya biar gampang dipanggil df = df.rename(columns={ 'Tanggal' : 'tanggal', 'Cone Depan (°C)' : 'cone_depan', 'Bodi Tengah (°C)' : 'body_tengah', 'Cone Belakang (°C)': 'cone_belakang', 'Ketebalan BTA (mm)': 'ketebalan' }) # pastiin tipe data udah angka for col in ['cone_depan', 'body_tengah', 'cone_belakang', 'ketebalan']: df[col] = pd.to_numeric(df[col], errors='coerce') df['tanggal'] = pd.to_datetime(df['tanggal'], dayfirst=False, errors='coerce') df = df.dropna(subset=['cone_depan', 'ketebalan', 'tanggal']).sort_values('tanggal').reset_index(drop=True) # referensi waktu dari hari pertama operasi T0 = df['tanggal'].min() df['hari_ke'] = (df['tanggal'] - T0).dt.days print(f"✅ Data: {len(df)} baris | {df['tanggal'].min().date()} s/d {df['tanggal'].max().date()}") print(f" Ketebalan: min={df['ketebalan'].min():.0f}mm | max={df['ketebalan'].max():.0f}mm") print(f" Pengukuran terakhir: {df['ketebalan'].iloc[-1]:.0f}mm pada {df['tanggal'].iloc[-1].date()}") # ============================================================ # CELL 3: Hitung Titik Pengukuran & Laju Aus # ============================================================ # # INSIGHT: Ketebalan BTA diukur manual secara berkala (tidak harian). # Data harian hanya mengulang nilai pengukuran terakhir sampai ada # pengukuran baru. Oleh karena itu, model yang benar adalah: # Ketebalan_hari_ini = Ketebalan_terakhir_diukur + (laju_aus × hari_berlalu) # # Identifikasi titik pengukuran aktual (ketika nilai berubah) mask_change = df['ketebalan'] != df['ketebalan'].shift(1) df_ukur = df[mask_change].copy().reset_index(drop=True) # Hitung laju aus antar pengukuran df_ukur['delta_tebal'] = df_ukur['ketebalan'].diff() # negatif = menipis df_ukur['delta_hari'] = df_ukur['hari_ke'].diff() df_ukur['laju_aus'] = df_ukur['delta_tebal'] / df_ukur['delta_hari'] # mm/hari # Suhu rata-rata selama periode antar pengukuran for i in range(1, len(df_ukur)): h_start = df_ukur.loc[i-1, 'hari_ke'] h_end = df_ukur.loc[i, 'hari_ke'] mask = (df['hari_ke'] >= h_start) & (df['hari_ke'] < h_end) df_ukur.loc[i, 'suhu_avg_periode'] = df.loc[mask, 'cone_depan'].add( df.loc[mask, 'body_tengah']).add(df.loc[mask, 'cone_belakang']).div(3).mean() df_ukur = df_ukur.dropna(subset=['laju_aus']) # Laju aus statistik laju_mean = df_ukur['laju_aus'].mean() # mm/hari (negatif) laju_recent = df_ukur['laju_aus'].tail(5).mean() # laju 5 periode terakhir print(f"\n📊 Analisis Laju Aus:") print(f" Rata-rata historis : {laju_mean:.4f} mm/hari") print(f" Rata-rata 5 terbaru : {laju_recent:.4f} mm/hari") print(f" Pengukuran terakhir : {df_ukur['ketebalan'].iloc[-1]:.0f}mm " f"pada hari ke-{df_ukur['hari_ke'].iloc[-1]}") print(f"\n{df_ukur[['tanggal','ketebalan','delta_hari','laju_aus','suhu_avg_periode']].to_string(index=False)}") # ============================================================ # CELL 4: Model XGBoost — Prediksi Laju Aus dari Suhu # ============================================================ # # Karena laju aus antar periode bervariasi, kita gunakan XGBoost # untuk mempelajari: "Seberapa cepat BTA menipis berdasarkan suhu?" # Lalu gunakan laju ini untuk proyeksi ke depan. # # Fitur: suhu rata-rata & karakteristik suhu selama satu periode X_rate = df_ukur[['suhu_avg_periode']].fillna(df_ukur['suhu_avg_periode'].mean()) y_rate = df_ukur['laju_aus'] # target: laju aus (mm/hari) model_rate = XGBRegressor( n_estimators = 200, learning_rate = 0.05, max_depth = 3, subsample = 0.8, random_state = 42 ) model_rate.fit(X_rate, y_rate) print(f"\n✅ Model laju aus dilatih pada {len(df_ukur)} titik pengukuran.") # ============================================================ # CELL 5: Fungsi Prediksi Harian (PRODUCTION READY) # ============================================================ # State terakhir yang diketahui TEBAL_TERAKHIR = float(df['ketebalan'].iloc[-1]) # mm TANGGAL_UKUR = df['tanggal'].iloc[-1] # tanggal pengukuran aktual terakhir HARI_UKUR = int(df['hari_ke'].iloc[-1]) # hari ke- dari pengukuran tsb # Batas operasi BATAS_KRITIS = 115.0 # mm BATAS_WARNING = 130.0 # mm BATAS_SUHU = 400.0 # °C WARN_SUHU = 375.0 # °C def predict_bta_daily(t_depan: float, t_tengah: float, t_belakang: float, tanggal_cek: str = None, tebal_aktual: float = None): """ Prediksi ketebalan BTA untuk monitoring harian produksi. Parameters ---------- t_depan : Suhu Cone Depan (°C) hari ini t_tengah : Suhu Bodi Tengah (°C) hari ini t_belakang : Suhu Cone Belakang (°C) hari ini tanggal_cek : Tanggal pengecekan 'DD/MM/YYYY' (default: hari ini) tebal_aktual : (Opsional) Jika ada hasil pengukuran BTA hari ini, masukkan di sini untuk update anchor secara otomatis """ global TEBAL_TERAKHIR, TANGGAL_UKUR, HARI_UKUR # Update anchor jika ada pengukuran aktual baru if tebal_aktual is not None: TEBAL_TERAKHIR = float(tebal_aktual) TANGGAL_UKUR = datetime.now() if tanggal_cek is None else datetime.strptime(tanggal_cek, '%d/%m/%Y') HARI_UKUR = int((TANGGAL_UKUR - T0).days) print(f"🔄 Anchor diperbarui: {TEBAL_TERAKHIR}mm pada {TANGGAL_UKUR.date()}") # Tanggal hari ini tgl_cek = datetime.now() if tanggal_cek is None else datetime.strptime(tanggal_cek, '%d/%m/%Y') hari_sejak_ukur = (tgl_cek - (TANGGAL_UKUR if isinstance(TANGGAL_UKUR, datetime) else pd.Timestamp(TANGGAL_UKUR).to_pydatetime())).days # Suhu hari ini suhu_avg = (t_depan + t_tengah + t_belakang) / 3 # Prediksi laju aus berdasarkan suhu hari ini laju_pred = model_rate.predict(pd.DataFrame([[suhu_avg]], columns=['suhu_avg_periode']))[0] # Koreksi: gunakan weighted average antara laju historis dan prediksi model # (karena data terbatas, beri bobot lebih ke historis) laju_efektif = 0.4 * laju_pred + 0.6 * laju_recent # Proyeksi ketebalan hari ini tebal_pred = TEBAL_TERAKHIR + (laju_efektif * hari_sejak_ukur) tebal_pred = max(tebal_pred, 100.0) # floor fisik # Estimasi sisa hari ke batas kritis if tebal_pred > BATAS_KRITIS and laju_efektif < 0: sisa_tebal = tebal_pred - BATAS_KRITIS sisa_hari = int(sisa_tebal / abs(laju_efektif)) tgl_kritis = tgl_cek + timedelta(days=sisa_hari) else: sisa_hari = 0 tgl_kritis = tgl_cek # Status & alert if suhu_avg > BATAS_SUHU or tebal_pred < BATAS_KRITIS: status = "🔴 CRITICAL" aksi = "SEGERA PERBAIKAN / SLAGGING — Koordinasi maintenance sekarang!" border = "!"*52 elif suhu_avg > WARN_SUHU or tebal_pred < BATAS_WARNING: status = "🟡 WARNING" aksi = "Siapkan jadwal maintenance. Monitor lebih sering." border = "="*52 else: status = "🟢 AMAN" aksi = "Operasi normal. Lanjutkan monitoring rutin." border = "-"*52 print(f"\n{border}") print(f" 🏭 BTA DAILY MONITORING — {tgl_cek.strftime('%d %B %Y')}") print(f"{border}") print(f" 📡 Input Suhu : Depan={t_depan}°C | Tengah={t_tengah}°C | Belakang={t_belakang}°C") print(f" 🌡️ Suhu Rata-rata : {suhu_avg:.1f}°C") print(f" 📅 Anchor Terakhir : {TEBAL_TERAKHIR:.0f}mm ({(TANGGAL_UKUR if isinstance(TANGGAL_UKUR, datetime) else pd.Timestamp(TANGGAL_UKUR).to_pydatetime()).strftime('%d %b %Y')})") print(f" ⏱️ Hari sejak ukur : {hari_sejak_ukur} hari") print(f" 📉 Laju Aus Est. : {laju_efektif:.4f} mm/hari") print(f"{'='*52}") print(f" 🧱 PREDIKSI TEBAL : {tebal_pred:.1f} mm") print(f" 📊 Status : {status}") print(f" ⚡ Aksi : {aksi}") print(f"{'-'*52}") if tebal_pred > BATAS_KRITIS: print(f" ⏳ Estimasi Sisa : ± {sisa_hari} hari lagi") print(f" 🔧 Est. Kritis : {tgl_kritis.strftime('%d %B %Y')}") else: print(f" ⏳ Status : Ketebalan sudah di/bawah batas kritis!") print(f"{border}\n") return { 'tebal_prediksi' : round(tebal_pred, 1), 'laju_aus' : round(laju_efektif, 4), 'sisa_hari' : sisa_hari, 'est_kritis' : tgl_kritis.strftime('%d %B %Y'), 'status' : status } # ============================================================ # CELL 6: Uji Coba Prediksi # ============================================================ print("=" * 60) print("🧪 UJI COBA PREDIKSI HARIAN") print("=" * 60) # Contoh 1: Suhu normal _ = predict_bta_daily(t_depan=378, t_tengah=310, t_belakang=355) # Contoh 2: Suhu tinggi (mendekati kritis) _ = predict_bta_daily(t_depan=435, t_tengah=410, t_belakang=372) # Contoh 3: Suhu rendah (kondisi baik) _ = predict_bta_daily(t_depan=320, t_tengah=295, t_belakang=340) # Contoh 4: Ada pengukuran BTA baru hari ini (misal diukur = 118mm) # Ini akan update anchor sehingga prediksi ke depan lebih akurat # _ = predict_bta_daily(t_depan=350, t_tengah=320, t_belakang=360, tebal_aktual=118) # ============================================================ # CELL 7: Visualisasi Historis + Proyeksi # ============================================================ sns.set_theme(style='whitegrid') fig, axes = plt.subplots(2, 1, figsize=(16, 12)) fig.suptitle('BTA Thickness Monitoring — Rotary Furnace', fontsize=15, fontweight='bold') # --- Panel 1: History + Proyeksi --- ax1 = axes[0] # Data aktual (stepwise — karena nilai hanya berubah saat diukur) ax1.step(df['tanggal'], df['ketebalan'], where='post', color='royalblue', linewidth=2.5, label='Ketebalan Aktual (Pengukuran Manual)', zorder=3) # Titik pengukuran aktual ax1.scatter(df_ukur['tanggal'], df_ukur['ketebalan'], color='royalblue', s=80, zorder=5, label='Titik Pengukuran Aktual') # Proyeksi 90 hari ke depan tgl_terakhir = df['tanggal'].max() proj_hari = 90 proj_dates = [tgl_terakhir + timedelta(days=i) for i in range(0, proj_hari+1)] # Gunakan laju recent untuk proyeksi suhu_asumsi = df['cone_depan'].add(df['body_tengah']).add(df['cone_belakang']).div(3).tail(14).mean() laju_proj = model_rate.predict(pd.DataFrame([[suhu_asumsi]], columns=['suhu_avg_periode']))[0] laju_proj = 0.4 * laju_proj + 0.6 * laju_recent # weighted proj_tebal = [max(TEBAL_TERAKHIR + laju_proj * i, 95) for i in range(0, proj_hari+1)] ax1.plot(proj_dates, proj_tebal, color='darkorange', linewidth=2, linestyle='--', label=f'Proyeksi 90 Hari (laju={laju_proj:.4f} mm/hari)', zorder=4) # Confidence band proyeksi laju_hi = laju_proj * 0.7 # lebih lambat (optimis) laju_lo = laju_proj * 1.3 # lebih cepat (pesimis) proj_hi = [max(TEBAL_TERAKHIR + laju_hi * i, 95) for i in range(0, proj_hari+1)] proj_lo = [max(TEBAL_TERAKHIR + laju_lo * i, 95) for i in range(0, proj_hari+1)] ax1.fill_between(proj_dates, proj_lo, proj_hi, alpha=0.15, color='darkorange', label='Range Proyeksi (±30%)') # Garis batas ax1.axhline(y=BATAS_KRITIS, color='red', linestyle=':', linewidth=2, label=f'Batas Kritis ({BATAS_KRITIS}mm)') ax1.axhline(y=BATAS_WARNING, color='orange', linestyle='--', linewidth=1.5, label=f'Batas Warning ({BATAS_WARNING}mm)') ax1.axvline(x=tgl_terakhir, color='gray', linestyle=':', linewidth=1, label='Data Terakhir') # Annotasi kapan kritis for i, (tgl, tp) in enumerate(zip(proj_dates, proj_tebal)): if tp <= BATAS_KRITIS: ax1.annotate(f'Est. Kritis:\n{tgl.strftime("%d %b %Y")}', xy=(tgl, BATAS_KRITIS), xytext=(-100, 30), textcoords='offset points', color='red', fontsize=9, fontweight='bold', arrowprops=dict(arrowstyle='->', color='red', lw=1.5)) break ax1.set_title('Historis Ketebalan BTA & Proyeksi ke Depan', fontsize=12) ax1.set_ylabel('Ketebalan (mm)') ax1.legend(loc='upper right', fontsize=8.5) ax1.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y')) ax1.xaxis.set_major_locator(mdates.MonthLocator(interval=2)) plt.setp(ax1.xaxis.get_majorticklabels(), rotation=30) ax1.set_ylim(90, 245) # --- Panel 2: Suhu Monitoring --- ax2 = axes[1] suhu_avg_series = (df['cone_depan'] + df['body_tengah'] + df['cone_belakang']) / 3 ma7 = suhu_avg_series.rolling(7, min_periods=1).mean() ax2.plot(df['tanggal'], suhu_avg_series, color='lightcoral', alpha=0.35, linewidth=1, label='Suhu Avg (Raw)') ax2.plot(df['tanggal'], ma7, color='crimson', linewidth=2, label='Suhu Avg MA-7') ax2.axhline(y=BATAS_SUHU, color='darkred', linestyle='--', linewidth=1.5, label=f'Batas Suhu ({BATAS_SUHU}°C)') ax2.axhline(y=WARN_SUHU, color='orange', linestyle=':', linewidth=1.2, label=f'Warning Suhu ({WARN_SUHU}°C)') ax2.axvline(x=tgl_terakhir, color='gray', linestyle=':', linewidth=1) ax2.set_title('Monitoring Suhu Harian', fontsize=12) ax2.set_ylabel('Suhu (°C)') ax2.set_xlabel('Tanggal') ax2.legend(fontsize=8.5) ax2.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y')) ax2.xaxis.set_major_locator(mdates.MonthLocator(interval=2)) plt.setp(ax2.xaxis.get_majorticklabels(), rotation=30) plt.tight_layout() output_monitoring = f'bta_monitoring_{base_name}.png' plt.savefig(output_monitoring, dpi=150, bbox_inches='tight') plt.show() print(f"✅ Grafik disimpan: '{output_monitoring}'") # ============================================================ # CELL 8: Simpan Model & State # ============================================================ import pickle output_model = f'model_rate_{base_name}.pkl' with open(output_model, 'wb') as f: pickle.dump(model_rate, f) # Simpan juga dalam format native JSON untuk kompatibilitas Hugging Face output_model_json = f'model_rate_{base_name}.json' model_rate.save_model(output_model_json) model_rate.save_model('xgboost_bta.json') state = { 'T0' : T0, 'tebal_terakhir' : TEBAL_TERAKHIR, 'tanggal_ukur' : TANGGAL_UKUR, 'hari_ukur' : HARI_UKUR, 'laju_mean' : laju_mean, 'laju_recent' : laju_recent, 'batas_kritis' : BATAS_KRITIS, 'batas_warning' : BATAS_WARNING, } output_state = f'bta_state_{base_name}.pkl' with open(output_state, 'wb') as f: pickle.dump(state, f) print("\n✅ Model & State disimpan:") print(f" 📦 {output_model} — Model laju aus XGBoost (Pickle)") print(f" 📦 xgboost_bta.json — Model laju aus XGBoost (JSON - HF compatible)") print(f" 📦 {output_state} — State pengukuran terakhir") print("\n💡 TIP PENGGUNAAN HARIAN:") print(" Setiap ada pengukuran BTA baru, gunakan parameter tebal_aktual=XXX") print(" agar anchor diperbarui dan prediksi makin akurat.") print(" Contoh: predict_bta_daily(350, 320, 360, tebal_aktual=118)") """##ACTUAL VS PREDICT""" import pandas as pd pred_values = [] pred_dates = [] # set patokan awal dari data pertama current_anchor = df['ketebalan'].iloc[0] anchor_date = df['tanggal'].iloc[0] for index, row in df.iterrows(): # update patokan kalau ada data ukur manual di hari itu if row['tanggal'] in df_ukur['tanggal'].values: current_anchor = row['ketebalan'] anchor_date = row['tanggal'] suhu_avg = (row['cone_depan'] + row['body_tengah'] + row['cone_belakang']) / 3 # jalanin modelnya buat nebak laju aus laju_pred = model_rate.predict(pd.DataFrame([[suhu_avg]], columns=['suhu_avg_periode']))[0] # gabungin tebakan model sama rata-rata laju terbaru biar stabil laju_efektif = 0.4 * laju_pred + 0.6 * laju_recent hari_sejak = (row['tanggal'] - anchor_date).days tebal_pred = current_anchor + (laju_efektif * hari_sejak) pred_values.append(tebal_pred) pred_dates.append(row['tanggal']) # jadiin format series sesuai yang diminta matplotlib kamu historical_predictions = pd.Series(pred_values, index=pred_dates) import matplotlib.pyplot as plt import matplotlib.dates as mdates import pandas as pd # Ensure necessary variables are available from previous cells. # df, df_ukur, historical_predictions, BATAS_KRITIS, BATAS_WARNING # Plotting the comparison fig, ax = plt.subplots(figsize=(16, 8)) # Actual BTA thickness (stepwise) ax.step(df['tanggal'], df['ketebalan'], where='post', color='royalblue', linewidth=2.5, label='Ketebalan Aktual (Pengukuran Manual)', zorder=3) ax.scatter(df_ukur['tanggal'], df_ukur['ketebalan'], color='royalblue', s=80, zorder=5, label='Titik Pengukuran Aktual') # Predicted BTA thickness (model-based) - using the pre-calculated historical_predictions ax.plot(historical_predictions.index, historical_predictions.values, color='darkgreen', linestyle='-', linewidth=1.5, label='Prediksi Model Harian', zorder=2) # Add thresholds ax.axhline(y=BATAS_KRITIS, color='red', linestyle=':', linewidth=2, label=f'Batas Kritis ({BATAS_KRITIS}mm)') ax.axhline(y=BATAS_WARNING, color='orange', linestyle='--', linewidth=1.5, label=f'Batas Warning ({BATAS_WARNING}mm)') ax.set_title('Perbandingan Ketebalan BTA Aktual vs. Prediksi Model Historis', fontsize=15, fontweight='bold') ax.set_ylabel('Ketebalan (mm)') ax.set_xlabel('Tanggal') ax.legend(loc='upper right', fontsize=10) ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y')) ax.xaxis.set_major_locator(mdates.MonthLocator(interval=2)) plt.setp(ax.xaxis.get_majorticklabels(), rotation=30) ax.set_ylim(90, 245) # Consistent y-axis with previous plots plt.grid(True) plt.tight_layout() output_comparison = f'bta_comparison_{base_name}.png' plt.savefig(output_comparison, dpi=150, bbox_inches='tight') plt.show() print(f"✅ Grafik perbandingan aktual vs. prediksi historis ditampilkan dan disimpan ke '{output_comparison}'.") # Input suhu dari pengguna t_depan_input = float(input("Masukkan suhu Cone Depan (°C): ")) t_tengah_input = float(input("Masukkan suhu Bodi Tengah (°C): ")) t_belakang_input = float(input("Masukkan suhu Cone Belakang (°C): ")) # Panggil fungsi prediksi dengan input suhu result = predict_bta_daily(t_depan=t_depan_input, t_tengah=t_tengah_input, t_belakang=t_belakang_input) print("\nRingkasan Prediksi:") for key, value in result.items(): print(f"- {key.replace('_', ' ').title()}: {value}")