Spaces:
Build error
Build error
Upload 4 files
Browse files- best_pipeline.pkl +3 -0
- eda.py +88 -0
- prediction.py +70 -0
- requirements.txt +6 -0
best_pipeline.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ec189ca71f8636f9cd279ce9d142de9bba3622154a42b0e4c9a18a38c9d2b33c
|
| 3 |
+
size 1100816
|
eda.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#import libraries
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
import streamlit as st
|
| 5 |
+
import matplotlib.pyplot as plt
|
| 6 |
+
import seaborn as sns
|
| 7 |
+
import plotly.express as px
|
| 8 |
+
|
| 9 |
+
def run():
|
| 10 |
+
#introduction
|
| 11 |
+
st.title("Exploratory Data Analysis of Student Performance")
|
| 12 |
+
#konsep markdown
|
| 13 |
+
st.write('This page containts Exploratory Data Analysis of Student Performance based on previous model')
|
| 14 |
+
st.write('---')
|
| 15 |
+
|
| 16 |
+
#memasukan gambar
|
| 17 |
+
link_gambar = 'https://tse4.mm.bing.net/th?id=OIP.2z-TeTNwM-ATHuzdStRADgHaDt&pid=Api&P=0&h=220'
|
| 18 |
+
st.image(link_gambar, caption='source:google.com')
|
| 19 |
+
|
| 20 |
+
#menampilkan dataframe
|
| 21 |
+
st.write('## Dataframe')
|
| 22 |
+
|
| 23 |
+
teams = pd.read_csv('StudentPerformanceFactors.csv')
|
| 24 |
+
st.dataframe(teams.head())
|
| 25 |
+
|
| 26 |
+
st.write('''
|
| 27 |
+
Indonesia menduduki peringkat 68 dari 81 negara di dunia dalam kemampuan matematika yang erat hubungannya dengan kemampuan bernalar.
|
| 28 |
+
Hal ini cukup mengkhawatirkan karena Indonesia sendiri digadang-gadang akan memiliki bonus demografi di tahun 2045 di mana
|
| 29 |
+
jumlah penduduk yang berusia muda akan lebih banyak dari jumlah penduduk yang berusia senja. Generasi muda akan mulai mengambil alih kendali
|
| 30 |
+
atas negeri ini yang mana akan sangat berbahaya apabila generasi penerus tidak memiliki kemampuan bernalar yang mumpuni. Sebagai tambahan,
|
| 31 |
+
tidak seimbangnya kemampuan bernalar dengan kebutuhan industri di masa depan akan menyebabkan lebih banyak pengangguran yang akan membebani ekonomi
|
| 32 |
+
baik bagi negara maupun bagi individu itu sendiri. Program yang akan dibuat ini akan membantu untuk memprediksi performa matematika seorang pelajar yang
|
| 33 |
+
dinyatakan dalam skor berdasarkan beberapa faktor diantaranya latar belakang keluarga, pengaruh teman sejawat, jarak antara rumah dan sekolah, serta fasilitas
|
| 34 |
+
pendukung proses belajar anak seperti diadakannya kegiatan ektrakurikuler.
|
| 35 |
+
- Bagi orang tua, program ini bisa dijadikan acuan untuk pertimbangan memilih sekolah dan acuan untuk lebih memperhatikan fasilitas dan dukungan mental yang dapat meningkatkan performa anak-anaknya.
|
| 36 |
+
- Bagi pelajar, program ini bisa dijadikan acuan untuk menjaga diri dari hal-hal yang mungkin dapat berpengaruh terhadap performanya di sekolah, khususnya di bidang matematika.
|
| 37 |
+
''')
|
| 38 |
+
|
| 39 |
+
#sub bab visualisasi
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
# Contoh data
|
| 43 |
+
# data = teams['Distance_from_Home']
|
| 44 |
+
# df = pd.DataFrame(data)
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
percentage = teams['Distance_from_Home'].value_counts(normalize=True) * 100
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
fig, ax = plt.subplots(figsize=(6, 6))
|
| 51 |
+
ax.pie(percentage, labels=percentage.index, autopct='%1.1f%%', startangle=90,
|
| 52 |
+
colors=['#66c2a5', '#fc8d62', '#8da0cb'])
|
| 53 |
+
|
| 54 |
+
ax.set_title('Percentage of Home-to-School Distance')
|
| 55 |
+
ax.set_ylabel('')
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
st.pyplot(fig)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
st.write('**Insight:**')
|
| 62 |
+
st.write('Sebanyak 10.1% orang tua memilih menyekolahkan anaknya di sekolah yang jauh dan sebanyak 30.6% memilih sekolah yang tidak terlalu jauh maupun tidak terlalu dekat dengan rumah. Hal ini bisa jadi karena faktor kualitas sekolah menjadi hal utama dalam pengambilan keputusan')
|
| 63 |
+
|
| 64 |
+
# Visualisasi menggunakan boxplot
|
| 65 |
+
plt.figure(figsize=(8, 6))
|
| 66 |
+
sns.boxplot(x='Parental_Involvement', y='Exam_Score', data=teams, palette='Set2')
|
| 67 |
+
plt.title('Distribusi Skor Ujian Berdasarkan Keterlibatan Orang Tua', fontsize=16)
|
| 68 |
+
plt.xlabel('Tingkat Keterlibatan Orang Tua', fontsize=12)
|
| 69 |
+
plt.ylabel('Skor Ujian', fontsize=12)
|
| 70 |
+
plt.show()
|
| 71 |
+
|
| 72 |
+
#dynamic hystogram
|
| 73 |
+
st.write('### Distribusi Data')
|
| 74 |
+
|
| 75 |
+
opsi=st.selectbox('Pilih Data:',
|
| 76 |
+
('Hours_Studied', 'Attendance', 'Parental_Involvement',
|
| 77 |
+
'Access_to_Resources', 'Extracurricular_Activities', 'Sleep_Hours',
|
| 78 |
+
'Previous_Scores', 'Motivation_Level', 'Tutoring_Sessions',
|
| 79 |
+
'Family_Income', 'Teacher_Quality', 'School_Type', 'Peer_Influence',
|
| 80 |
+
'Physical_Activity', 'Learning_Disabilities',
|
| 81 |
+
'Parental_Education_Level', 'Distance_from_Home'))
|
| 82 |
+
|
| 83 |
+
fig = plt.figure(figsize=(8,5))
|
| 84 |
+
sns.histplot(teams[opsi], bins=20, kde=True)
|
| 85 |
+
st.pyplot(fig)
|
| 86 |
+
|
| 87 |
+
if __name__ == '__main__':
|
| 88 |
+
run()
|
prediction.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
import streamlit as st
|
| 4 |
+
import pickle
|
| 5 |
+
|
| 6 |
+
#load model
|
| 7 |
+
with open('best_pipeline.pkl', 'rb') as file:
|
| 8 |
+
model = pickle.load(file)
|
| 9 |
+
|
| 10 |
+
def run():
|
| 11 |
+
#set title
|
| 12 |
+
st.title('Student Performance')
|
| 13 |
+
st.write('---')
|
| 14 |
+
|
| 15 |
+
link_gambar = 'https://tse2.mm.bing.net/th?id=OIP.-i29HcwAtH6ZUfKAXVLYSQHaEK&pid=Api&P=0&h=220'
|
| 16 |
+
st.image(link_gambar, caption = 'source: google.com')
|
| 17 |
+
|
| 18 |
+
#deskripsi
|
| 19 |
+
st.write('This page contents prediction model that can predict student performance based on attributes required')
|
| 20 |
+
|
| 21 |
+
#buat form
|
| 22 |
+
with st.form(key='form parameters'):
|
| 23 |
+
jam_belajar = st.number_input('Hours Studied: ', min_value=1, max_value=44, value=8)
|
| 24 |
+
kehadiran = st.slider('Attendences: ', min_value=48, max_value=100, value=60)
|
| 25 |
+
keterlibatan_ortu = st.selectbox('Parental Involvement: ', ('Low','Medium', 'High'))
|
| 26 |
+
akses_sumber = st.selectbox('Access to Resources: ', ('Low','Medium', 'High'))
|
| 27 |
+
ekstrakurikuler = st.selectbox('Extracuriccular Activities: ', ('Yes', 'No'))
|
| 28 |
+
jam_tidur = st.slider('Sleep Hours: ', min_value=4, max_value=10, value=6)
|
| 29 |
+
nilai_sebelumnya = st.number_input('Previous Scores: ', min_value=50, max_value=100, value=70)
|
| 30 |
+
motivasi = st.selectbox('Motivation Level: ', ('Low','Medium', 'High'))
|
| 31 |
+
sesi_tutor = st.slider('Tutoring Sessions: ', min_value=0, max_value=8, value=0)
|
| 32 |
+
income = st.selectbox('Family Income: ', ('Low','Medium', 'High'))
|
| 33 |
+
kualitas_guru = st.selectbox('Teacher Quality: ', ('Low','Medium', 'High'))
|
| 34 |
+
sekolah = st.selectbox('School Type: ', ('Private', 'Public'))
|
| 35 |
+
pengaruh = st.selectbox('Peer Influence: ', ('Positive', 'Negative'))
|
| 36 |
+
aktivitas_fisik = st.slider('Physical Activity: ', min_value=0, max_value=6, value=3)
|
| 37 |
+
disabilitas = st.selectbox('Learning Disabilities: ', ('Yes', 'No'))
|
| 38 |
+
pendidikan_orang_tua = st.selectbox('Parental Educational Level: ', ('High School', 'College', 'Postgraduate'))
|
| 39 |
+
jarak = st.selectbox('Distances from Home: ', ('Near', 'Moderate', 'Far'))
|
| 40 |
+
|
| 41 |
+
submit=st.form_submit_button('Prediksi')
|
| 42 |
+
|
| 43 |
+
data_raw={'Hours_Studied': jam_belajar,
|
| 44 |
+
'Attendance': kehadiran,
|
| 45 |
+
'Parental_Involvement': keterlibatan_ortu,
|
| 46 |
+
'Access_to_Resources': akses_sumber,
|
| 47 |
+
'Extracurricular_Activities': ekstrakurikuler,
|
| 48 |
+
'Sleep_Hours': jam_tidur,
|
| 49 |
+
'Previous_Scores': nilai_sebelumnya,
|
| 50 |
+
'Motivation_Level': motivasi,
|
| 51 |
+
'Tutoring_Sessions': sesi_tutor,
|
| 52 |
+
'Family_Income': income,
|
| 53 |
+
'Teacher_Quality': kualitas_guru,
|
| 54 |
+
'School_Type': sekolah,
|
| 55 |
+
'Peer_Influence': pengaruh,
|
| 56 |
+
'Physical_Activity': aktivitas_fisik,
|
| 57 |
+
'Learning_Disabilities': disabilitas,
|
| 58 |
+
'Parental_Education_Level': pendidikan_orang_tua,
|
| 59 |
+
'Distance_from_Home': jarak
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
data = pd.DataFrame([data_raw])
|
| 63 |
+
st.dataframe(data)
|
| 64 |
+
|
| 65 |
+
if submit:
|
| 66 |
+
result = model.predict(data)
|
| 67 |
+
st.write(f'### Scores predicted: {result[0]:.2f}')
|
| 68 |
+
|
| 69 |
+
if __name__ == '__main__':
|
| 70 |
+
run()
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pandas
|
| 2 |
+
numpy
|
| 3 |
+
plotly
|
| 4 |
+
seaborn
|
| 5 |
+
matplotlib
|
| 6 |
+
scikit-learn
|