ITS-Dropout-EWS / src /streamlit_app.py
Ferli28's picture
Update src/streamlit_app.py
704fb57 verified
import streamlit as st
import pandas as pd
import numpy as np
from catboost import CatBoostClassifier
import joblib
import shap
import matplotlib.pyplot as plt
import os
# --- KONFIGURASI HALAMAN ---
st.set_page_config(
page_title="EWS Prediksi Dropout",
page_icon="πŸŽ“",
layout="wide"
)
# --- FUNGSI LOAD MODEL ---
@st.cache_resource
def load_resources():
current_dir = os.path.dirname(os.path.abspath(__file__))
model_path = os.path.join(current_dir, "catboost_dropout_model.cbm")
scaler_path = os.path.join(current_dir, "scaler.pkl")
if not os.path.exists(model_path):
st.error(f"❌ File model tidak ditemukan di: {model_path}")
st.stop()
model = CatBoostClassifier()
model.load_model(model_path)
scaler = joblib.load(scaler_path)
return model, scaler
try:
model, scaler = load_resources()
except Exception as e:
st.error(f"Error loading resources: {e}")
st.stop()
# --- JUDUL ---
st.title("πŸŽ“ Early Warning System: Student Dropout Prediction")
st.markdown("Aplikasi ini menggunakan **Machine Learning (CatBoost)** untuk mendeteksi dini mahasiswa yang berisiko *dropout*.")
# --- SIDEBAR INPUT ---
st.sidebar.header("πŸ“ Input Data Mahasiswa")
def user_input_features():
st.sidebar.subheader("1. Data Administratif")
jalur_masuk = st.sidebar.selectbox("Jalur Masuk (Kode)", options=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
provinsi = st.sidebar.number_input("Kode Provinsi", min_value=0, max_value=50, value=35)
kurikulum = st.sidebar.selectbox("Kurikulum", options=[0, 1, 2, 3])
st.sidebar.subheader("2. Riwayat Akademik")
ips1 = st.sidebar.slider("IPS Semester 1", 0.0, 4.0, 3.5)
ips2 = st.sidebar.slider("IPS Semester 2", 0.0, 4.0, 3.4)
ips3 = st.sidebar.slider("IPS Semester 3", 0.0, 4.0, 3.2)
ips4 = st.sidebar.slider("IPS Semester 4", 0.0, 4.0, 3.0)
nilai1 = st.sidebar.number_input("Nilai Angka Sem 1", 0.0, 100.0, 80.0)
nilai2 = st.sidebar.number_input("Nilai Angka Sem 2", 0.0, 100.0, 78.0)
nilai3 = st.sidebar.number_input("Nilai Angka Sem 3", 0.0, 100.0, 75.0)
nilai4 = st.sidebar.number_input("Nilai Angka Sem 4", 0.0, 100.0, 70.0)
status1 = st.sidebar.selectbox("Status Sem 1 (1=Aktif)", [0, 1], index=1)
status2 = st.sidebar.selectbox("Status Sem 2 (1=Aktif)", [0, 1], index=1)
status3 = st.sidebar.selectbox("Status Sem 3 (1=Aktif)", [0, 1], index=1)
status4 = st.sidebar.selectbox("Status Sem 4 (1=Aktif)", [0, 1], index=1)
st.sidebar.subheader("3. Beban Studi")
jumlah_mk = st.sidebar.number_input("Total MK Diambil", min_value=10, max_value=150, value=40)
banyak_mk_ulang = st.sidebar.number_input("Total MK Mengulang", min_value=0, max_value=50, value=0)
# Feature Engineering
delta_ips_1_4 = ips4 - ips1
delta_ips_3_4 = ips4 - ips3
rata_rata_total = (nilai1 + nilai2 + nilai3 + nilai4) / 4
rasio_mengulang = banyak_mk_ulang / (jumlah_mk + 1e-5)
data = {
'provinsi': provinsi,
'jalur masuk': jalur_masuk,
'kurikulum': kurikulum,
'jumlah mk diambil': jumlah_mk,
'banyak mk mengulang': banyak_mk_ulang,
'rata rata nilai semester 1': nilai1,
'rata rata nilai semester 2': nilai2,
'rata rata nilai semester 3': nilai3,
'rata rata nilai semester 4': nilai4,
'ips semester 1': ips1,
'ips semester 2': ips2,
'ips semester 3': ips3,
'ips semester 4': ips4,
'status semester 1': status1,
'status semester 2': status2,
'status semester 3': status3,
'status semester 4': status4,
'delta_ips_1_4': delta_ips_1_4,
'delta_ips_3_4': delta_ips_3_4,
'rata_rata_total': rata_rata_total,
'rasio_mengulang': rasio_mengulang
}
return pd.DataFrame(data, index=[0])
input_df = user_input_features()
# --- MAIN LAYOUT ---
col1, col2 = st.columns([1, 2])
with col1:
st.info("Pastikan data di sidebar sudah benar sebelum menekan tombol analisis.")
st.write("**Preview Data Input:**")
st.dataframe(input_df.T, height=400)
with col2:
if st.button("πŸš€ Analisis Risiko Dropout", type="primary"):
with st.spinner('Sedang menganalisis...'):
try:
# 1. Transform Data
input_scaled = scaler.transform(input_df)
# 2. Prediksi
prediction = model.predict(input_scaled)
proba = model.predict_proba(input_scaled)[0][1]
# 3. Tampilkan Hasil
st.subheader("Hasil Analisis")
if prediction[0] == 1:
st.error(f"⚠️ **STATUS: BERISIKO DROPOUT**")
st.metric("Probabilitas Dropout", f"{proba*100:.1f}%")
st.warning("Mahasiswa ini memerlukan perhatian akademik khusus.")
else:
st.success(f"βœ… **STATUS: AMAN (Pass)**")
st.metric("Probabilitas Dropout", f"{proba*100:.1f}%")
st.info("Performa akademik mahasiswa terpantau stabil.")
# 4. Visualisasi SHAP (Waterfall Plot)
st.markdown("---")
st.subheader("πŸ“Š Faktor Penentu Keputusan")
st.caption("Grafik ini menunjukkan fitur apa yang paling mendorong (+) atau mengurangi (-) risiko dropout.")
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(input_scaled)
# Menggunakan Waterfall Plot (Lebih stabil & informatif)
fig, ax = plt.subplots(figsize=(8, 6))
shap.waterfall_plot(
shap.Explanation(
values=shap_values[0],
base_values=explainer.expected_value,
data=input_df.iloc[0],
feature_names=input_df.columns
),
max_display=10,
show=False
)
st.pyplot(fig) # Menggambar figure waterfall
except Exception as e:
st.error(f"Terjadi kesalahan: {e}")
# --- FOOTER ---
st.markdown("---")
st.markdown("Β© 2025 EWS System | Institut Teknologi Sepuluh Nopember")