usernameiskheejay
fix things
d1cbdd0
import pickle
import streamlit as st
import pandas as pd
# Load the trained model
with open("model.pkl", "rb") as file:
model = pickle.load(file)
# Load sample dataset
df = pd.read_csv("student-scores.csv") # Change to the actual dataset path
# Define career aspiration categories
career_options = [
"Artist", "Banker", "Business Owner", "Construction Engineer", "Designer",
"Doctor", "Game Developer", "Government Officer", "Lawyer", "Real Estate Developer",
"Scientist", "Software Engineer", "Stock Investor", "Teacher", "Unknown", "Writer"
]
career_columns = [f"career_aspiration_{c}" for c in career_options]
# Streamlit UI
st.title("Multivariate Linear Regression Model: Student Scores Prediction")
# About section in dropdown
with st.expander("About This App"):
st.markdown(
"""
This app uses a **Multivariate Linear Regression** model that can predict multiple student scores
based on independent variables: **absence days, weekly self-study hours, extracurricular activities,
part-time job, and career aspiration**. The model predicts the following dependent variables:
**math score, history score, physics score, chemistry score, biology score, english score, and geography score**.
"""
)
st.markdown("""Model's notebook is located in the files :)""")
# Display dataset sample
st.subheader("Dataset Sample")
st.dataframe(df.head())
# User inputs
absence_days = st.number_input("Absence Days", min_value=0, step=1)
weekly_self_study_hours = st.number_input("Weekly Self-Study Hours", min_value=0.0, step=0.5)
extracurricular_activities = st.selectbox("Extracurricular Activities", ['Yes', 'No'])
part_time_job = st.selectbox("Part-time Job", ['Yes', 'No'])
career_aspiration = st.selectbox("Career Aspiration", career_options)
part_time_job = 1 if part_time_job == "Yes" else 0
extracurricular_activities = 1 if extracurricular_activities == "Yes" else 0
# Initialize one-hot encoded dictionary
career_encoded = {col: 0 for col in career_columns}
career_encoded[f"career_aspiration_{career_aspiration}"] = 1
# Create input DataFrame
input_features = {
"absence_days": absence_days,
"weekly_self_study_hours": weekly_self_study_hours,
"extracurricular_activities": extracurricular_activities,
"part_time_job": part_time_job,
**career_encoded
}
input_df = pd.DataFrame([input_features])
# Ensure the column order matches the training data
expected_columns = ['absence_days', 'weekly_self_study_hours', 'extracurricular_activities', 'part_time_job'] + career_columns
input_df = input_df[expected_columns]
# Prediction
if st.button("Predict Scores"):
prediction = model.predict(input_df)[0]
subjects = ['Math', 'History', 'Physics', 'Chemistry', 'Biology', 'English', 'Geography']
st.write("### Average Score:")
for subject, score in zip(subjects, prediction):
st.write(f"{subject}: {round(score, 2)}")
st.write("### Predicted Total Score:")
st.write(round((sum(prediction)/len(prediction)), 2))