import streamlit as st import pandas as pd import numpy as np from catboost import CatBoostClassifier import joblib import shap import matplotlib.pyplot as plt import os # --- KONFIGURASI HALAMAN --- st.set_page_config( page_title="EWS Prediksi Dropout", page_icon="🎓", layout="wide" ) # --- FUNGSI LOAD MODEL --- @st.cache_resource def load_resources(): current_dir = os.path.dirname(os.path.abspath(__file__)) model_path = os.path.join(current_dir, "catboost_dropout_model.cbm") scaler_path = os.path.join(current_dir, "scaler.pkl") if not os.path.exists(model_path): st.error(f"❌ File model tidak ditemukan di: {model_path}") st.stop() model = CatBoostClassifier() model.load_model(model_path) scaler = joblib.load(scaler_path) return model, scaler try: model, scaler = load_resources() except Exception as e: st.error(f"Error loading resources: {e}") st.stop() # --- JUDUL --- st.title("🎓 Early Warning System: Student Dropout Prediction") st.markdown("Aplikasi ini menggunakan **Machine Learning (CatBoost)** untuk mendeteksi dini mahasiswa yang berisiko *dropout*.") # --- SIDEBAR INPUT --- st.sidebar.header("📝 Input Data Mahasiswa") def user_input_features(): st.sidebar.subheader("1. Data Administratif") jalur_masuk = st.sidebar.selectbox("Jalur Masuk (Kode)", options=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]) provinsi = st.sidebar.number_input("Kode Provinsi", min_value=0, max_value=50, value=35) kurikulum = st.sidebar.selectbox("Kurikulum", options=[0, 1, 2, 3]) st.sidebar.subheader("2. Riwayat Akademik") ips1 = st.sidebar.slider("IPS Semester 1", 0.0, 4.0, 3.5) ips2 = st.sidebar.slider("IPS Semester 2", 0.0, 4.0, 3.4) ips3 = st.sidebar.slider("IPS Semester 3", 0.0, 4.0, 3.2) ips4 = st.sidebar.slider("IPS Semester 4", 0.0, 4.0, 3.0) nilai1 = st.sidebar.number_input("Nilai Angka Sem 1", 0.0, 100.0, 80.0) nilai2 = st.sidebar.number_input("Nilai Angka Sem 2", 0.0, 100.0, 78.0) nilai3 = st.sidebar.number_input("Nilai Angka Sem 3", 0.0, 100.0, 75.0) nilai4 = st.sidebar.number_input("Nilai Angka Sem 4", 0.0, 100.0, 70.0) status1 = st.sidebar.selectbox("Status Sem 1 (1=Aktif)", [0, 1], index=1) status2 = st.sidebar.selectbox("Status Sem 2 (1=Aktif)", [0, 1], index=1) status3 = st.sidebar.selectbox("Status Sem 3 (1=Aktif)", [0, 1], index=1) status4 = st.sidebar.selectbox("Status Sem 4 (1=Aktif)", [0, 1], index=1) st.sidebar.subheader("3. Beban Studi") jumlah_mk = st.sidebar.number_input("Total MK Diambil", min_value=10, max_value=150, value=40) banyak_mk_ulang = st.sidebar.number_input("Total MK Mengulang", min_value=0, max_value=50, value=0) # Feature Engineering delta_ips_1_4 = ips4 - ips1 delta_ips_3_4 = ips4 - ips3 rata_rata_total = (nilai1 + nilai2 + nilai3 + nilai4) / 4 rasio_mengulang = banyak_mk_ulang / (jumlah_mk + 1e-5) data = { 'provinsi': provinsi, 'jalur masuk': jalur_masuk, 'kurikulum': kurikulum, 'jumlah mk diambil': jumlah_mk, 'banyak mk mengulang': banyak_mk_ulang, 'rata rata nilai semester 1': nilai1, 'rata rata nilai semester 2': nilai2, 'rata rata nilai semester 3': nilai3, 'rata rata nilai semester 4': nilai4, 'ips semester 1': ips1, 'ips semester 2': ips2, 'ips semester 3': ips3, 'ips semester 4': ips4, 'status semester 1': status1, 'status semester 2': status2, 'status semester 3': status3, 'status semester 4': status4, 'delta_ips_1_4': delta_ips_1_4, 'delta_ips_3_4': delta_ips_3_4, 'rata_rata_total': rata_rata_total, 'rasio_mengulang': rasio_mengulang } return pd.DataFrame(data, index=[0]) input_df = user_input_features() # --- MAIN LAYOUT --- col1, col2 = st.columns([1, 2]) with col1: st.info("Pastikan data di sidebar sudah benar sebelum menekan tombol analisis.") st.write("**Preview Data Input:**") st.dataframe(input_df.T, height=400) with col2: if st.button("🚀 Analisis Risiko Dropout", type="primary"): with st.spinner('Sedang menganalisis...'): try: # 1. Transform Data input_scaled = scaler.transform(input_df) # 2. Prediksi prediction = model.predict(input_scaled) proba = model.predict_proba(input_scaled)[0][1] # 3. Tampilkan Hasil st.subheader("Hasil Analisis") if prediction[0] == 1: st.error(f"⚠️ **STATUS: BERISIKO DROPOUT**") st.metric("Probabilitas Dropout", f"{proba*100:.1f}%") st.warning("Mahasiswa ini memerlukan perhatian akademik khusus.") else: st.success(f"✅ **STATUS: AMAN (Pass)**") st.metric("Probabilitas Dropout", f"{proba*100:.1f}%") st.info("Performa akademik mahasiswa terpantau stabil.") # 4. Visualisasi SHAP (Waterfall Plot) st.markdown("---") st.subheader("📊 Faktor Penentu Keputusan") st.caption("Grafik ini menunjukkan fitur apa yang paling mendorong (+) atau mengurangi (-) risiko dropout.") explainer = shap.TreeExplainer(model) shap_values = explainer.shap_values(input_scaled) # Menggunakan Waterfall Plot (Lebih stabil & informatif) fig, ax = plt.subplots(figsize=(8, 6)) shap.waterfall_plot( shap.Explanation( values=shap_values[0], base_values=explainer.expected_value, data=input_df.iloc[0], feature_names=input_df.columns ), max_display=10, show=False ) st.pyplot(fig) # Menggambar figure waterfall except Exception as e: st.error(f"Terjadi kesalahan: {e}") # --- FOOTER --- st.markdown("---") st.markdown("© 2025 EWS System | Institut Teknologi Sepuluh Nopember")