Spaces:
Build error
Build error
| import pickle | |
| import streamlit as st | |
| import pandas as pd | |
| # Load the trained model | |
| with open("model.pkl", "rb") as file: | |
| model = pickle.load(file) | |
| # Load sample dataset | |
| df = pd.read_csv("student-scores.csv") # Change to the actual dataset path | |
| # Define career aspiration categories | |
| career_options = [ | |
| "Artist", "Banker", "Business Owner", "Construction Engineer", "Designer", | |
| "Doctor", "Game Developer", "Government Officer", "Lawyer", "Real Estate Developer", | |
| "Scientist", "Software Engineer", "Stock Investor", "Teacher", "Unknown", "Writer" | |
| ] | |
| career_columns = [f"career_aspiration_{c}" for c in career_options] | |
| # Streamlit UI | |
| st.title("Multivariate Linear Regression Model: Student Scores Prediction") | |
| # About section in dropdown | |
| with st.expander("About This App"): | |
| st.markdown( | |
| """ | |
| This app uses a **Multivariate Linear Regression** model that can predict multiple student scores | |
| based on independent variables: **absence days, weekly self-study hours, extracurricular activities, | |
| part-time job, and career aspiration**. The model predicts the following dependent variables: | |
| **math score, history score, physics score, chemistry score, biology score, english score, and geography score**. | |
| """ | |
| ) | |
| st.markdown("""Model's notebook is located in the files :)""") | |
| # Display dataset sample | |
| st.subheader("Dataset Sample") | |
| st.dataframe(df.head()) | |
| # User inputs | |
| absence_days = st.number_input("Absence Days", min_value=0, step=1) | |
| weekly_self_study_hours = st.number_input("Weekly Self-Study Hours", min_value=0.0, step=0.5) | |
| extracurricular_activities = st.selectbox("Extracurricular Activities", ['Yes', 'No']) | |
| part_time_job = st.selectbox("Part-time Job", ['Yes', 'No']) | |
| career_aspiration = st.selectbox("Career Aspiration", career_options) | |
| part_time_job = 1 if part_time_job == "Yes" else 0 | |
| extracurricular_activities = 1 if extracurricular_activities == "Yes" else 0 | |
| # Initialize one-hot encoded dictionary | |
| career_encoded = {col: 0 for col in career_columns} | |
| career_encoded[f"career_aspiration_{career_aspiration}"] = 1 | |
| # Create input DataFrame | |
| input_features = { | |
| "absence_days": absence_days, | |
| "weekly_self_study_hours": weekly_self_study_hours, | |
| "extracurricular_activities": extracurricular_activities, | |
| "part_time_job": part_time_job, | |
| **career_encoded | |
| } | |
| input_df = pd.DataFrame([input_features]) | |
| # Ensure the column order matches the training data | |
| expected_columns = ['absence_days', 'weekly_self_study_hours', 'extracurricular_activities', 'part_time_job'] + career_columns | |
| input_df = input_df[expected_columns] | |
| # Prediction | |
| if st.button("Predict Scores"): | |
| prediction = model.predict(input_df)[0] | |
| subjects = ['Math', 'History', 'Physics', 'Chemistry', 'Biology', 'English', 'Geography'] | |
| st.write("### Average Score:") | |
| for subject, score in zip(subjects, prediction): | |
| st.write(f"{subject}: {round(score, 2)}") | |
| st.write("### Predicted Total Score:") | |
| st.write(round((sum(prediction)/len(prediction)), 2)) | |