Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| from catboost import CatBoostClassifier | |
| import joblib | |
| import shap | |
| import matplotlib.pyplot as plt | |
| import os | |
| # --- KONFIGURASI HALAMAN --- | |
| st.set_page_config( | |
| page_title="EWS Prediksi Dropout", | |
| page_icon="π", | |
| layout="wide" | |
| ) | |
| # --- FUNGSI LOAD MODEL --- | |
| def load_resources(): | |
| current_dir = os.path.dirname(os.path.abspath(__file__)) | |
| model_path = os.path.join(current_dir, "catboost_dropout_model.cbm") | |
| scaler_path = os.path.join(current_dir, "scaler.pkl") | |
| if not os.path.exists(model_path): | |
| st.error(f"β File model tidak ditemukan di: {model_path}") | |
| st.stop() | |
| model = CatBoostClassifier() | |
| model.load_model(model_path) | |
| scaler = joblib.load(scaler_path) | |
| return model, scaler | |
| try: | |
| model, scaler = load_resources() | |
| except Exception as e: | |
| st.error(f"Error loading resources: {e}") | |
| st.stop() | |
| # --- JUDUL --- | |
| st.title("π Early Warning System: Student Dropout Prediction") | |
| st.markdown("Aplikasi ini menggunakan **Machine Learning (CatBoost)** untuk mendeteksi dini mahasiswa yang berisiko *dropout*.") | |
| # --- SIDEBAR INPUT --- | |
| st.sidebar.header("π Input Data Mahasiswa") | |
| def user_input_features(): | |
| st.sidebar.subheader("1. Data Administratif") | |
| jalur_masuk = st.sidebar.selectbox("Jalur Masuk (Kode)", options=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]) | |
| provinsi = st.sidebar.number_input("Kode Provinsi", min_value=0, max_value=50, value=35) | |
| kurikulum = st.sidebar.selectbox("Kurikulum", options=[0, 1, 2, 3]) | |
| st.sidebar.subheader("2. Riwayat Akademik") | |
| ips1 = st.sidebar.slider("IPS Semester 1", 0.0, 4.0, 3.5) | |
| ips2 = st.sidebar.slider("IPS Semester 2", 0.0, 4.0, 3.4) | |
| ips3 = st.sidebar.slider("IPS Semester 3", 0.0, 4.0, 3.2) | |
| ips4 = st.sidebar.slider("IPS Semester 4", 0.0, 4.0, 3.0) | |
| nilai1 = st.sidebar.number_input("Nilai Angka Sem 1", 0.0, 100.0, 80.0) | |
| nilai2 = st.sidebar.number_input("Nilai Angka Sem 2", 0.0, 100.0, 78.0) | |
| nilai3 = st.sidebar.number_input("Nilai Angka Sem 3", 0.0, 100.0, 75.0) | |
| nilai4 = st.sidebar.number_input("Nilai Angka Sem 4", 0.0, 100.0, 70.0) | |
| status1 = st.sidebar.selectbox("Status Sem 1 (1=Aktif)", [0, 1], index=1) | |
| status2 = st.sidebar.selectbox("Status Sem 2 (1=Aktif)", [0, 1], index=1) | |
| status3 = st.sidebar.selectbox("Status Sem 3 (1=Aktif)", [0, 1], index=1) | |
| status4 = st.sidebar.selectbox("Status Sem 4 (1=Aktif)", [0, 1], index=1) | |
| st.sidebar.subheader("3. Beban Studi") | |
| jumlah_mk = st.sidebar.number_input("Total MK Diambil", min_value=10, max_value=150, value=40) | |
| banyak_mk_ulang = st.sidebar.number_input("Total MK Mengulang", min_value=0, max_value=50, value=0) | |
| # Feature Engineering | |
| delta_ips_1_4 = ips4 - ips1 | |
| delta_ips_3_4 = ips4 - ips3 | |
| rata_rata_total = (nilai1 + nilai2 + nilai3 + nilai4) / 4 | |
| rasio_mengulang = banyak_mk_ulang / (jumlah_mk + 1e-5) | |
| data = { | |
| 'provinsi': provinsi, | |
| 'jalur masuk': jalur_masuk, | |
| 'kurikulum': kurikulum, | |
| 'jumlah mk diambil': jumlah_mk, | |
| 'banyak mk mengulang': banyak_mk_ulang, | |
| 'rata rata nilai semester 1': nilai1, | |
| 'rata rata nilai semester 2': nilai2, | |
| 'rata rata nilai semester 3': nilai3, | |
| 'rata rata nilai semester 4': nilai4, | |
| 'ips semester 1': ips1, | |
| 'ips semester 2': ips2, | |
| 'ips semester 3': ips3, | |
| 'ips semester 4': ips4, | |
| 'status semester 1': status1, | |
| 'status semester 2': status2, | |
| 'status semester 3': status3, | |
| 'status semester 4': status4, | |
| 'delta_ips_1_4': delta_ips_1_4, | |
| 'delta_ips_3_4': delta_ips_3_4, | |
| 'rata_rata_total': rata_rata_total, | |
| 'rasio_mengulang': rasio_mengulang | |
| } | |
| return pd.DataFrame(data, index=[0]) | |
| input_df = user_input_features() | |
| # --- MAIN LAYOUT --- | |
| col1, col2 = st.columns([1, 2]) | |
| with col1: | |
| st.info("Pastikan data di sidebar sudah benar sebelum menekan tombol analisis.") | |
| st.write("**Preview Data Input:**") | |
| st.dataframe(input_df.T, height=400) | |
| with col2: | |
| if st.button("π Analisis Risiko Dropout", type="primary"): | |
| with st.spinner('Sedang menganalisis...'): | |
| try: | |
| # 1. Transform Data | |
| input_scaled = scaler.transform(input_df) | |
| # 2. Prediksi | |
| prediction = model.predict(input_scaled) | |
| proba = model.predict_proba(input_scaled)[0][1] | |
| # 3. Tampilkan Hasil | |
| st.subheader("Hasil Analisis") | |
| if prediction[0] == 1: | |
| st.error(f"β οΈ **STATUS: BERISIKO DROPOUT**") | |
| st.metric("Probabilitas Dropout", f"{proba*100:.1f}%") | |
| st.warning("Mahasiswa ini memerlukan perhatian akademik khusus.") | |
| else: | |
| st.success(f"β **STATUS: AMAN (Pass)**") | |
| st.metric("Probabilitas Dropout", f"{proba*100:.1f}%") | |
| st.info("Performa akademik mahasiswa terpantau stabil.") | |
| # 4. Visualisasi SHAP (Waterfall Plot) | |
| st.markdown("---") | |
| st.subheader("π Faktor Penentu Keputusan") | |
| st.caption("Grafik ini menunjukkan fitur apa yang paling mendorong (+) atau mengurangi (-) risiko dropout.") | |
| explainer = shap.TreeExplainer(model) | |
| shap_values = explainer.shap_values(input_scaled) | |
| # Menggunakan Waterfall Plot (Lebih stabil & informatif) | |
| fig, ax = plt.subplots(figsize=(8, 6)) | |
| shap.waterfall_plot( | |
| shap.Explanation( | |
| values=shap_values[0], | |
| base_values=explainer.expected_value, | |
| data=input_df.iloc[0], | |
| feature_names=input_df.columns | |
| ), | |
| max_display=10, | |
| show=False | |
| ) | |
| st.pyplot(fig) # Menggambar figure waterfall | |
| except Exception as e: | |
| st.error(f"Terjadi kesalahan: {e}") | |
| # --- FOOTER --- | |
| st.markdown("---") | |
| st.markdown("Β© 2025 EWS System | Institut Teknologi Sepuluh Nopember") |