Spaces:

kheejay88
/

linear-regression-student-scores

Build error

linear-regression-student-scores / app.py

usernameiskheejay

fix things

d1cbdd0 about 1 year ago

3.12 kB

	import pickle
	import streamlit as st
	import pandas as pd

	# Load the trained model
	with open("model.pkl", "rb") as file:
	model = pickle.load(file)

	# Load sample dataset
	df = pd.read_csv("student-scores.csv") # Change to the actual dataset path

	# Define career aspiration categories
	career_options = [
	"Artist", "Banker", "Business Owner", "Construction Engineer", "Designer",
	"Doctor", "Game Developer", "Government Officer", "Lawyer", "Real Estate Developer",
	"Scientist", "Software Engineer", "Stock Investor", "Teacher", "Unknown", "Writer"
	]
	career_columns = [f"career_aspiration_{c}" for c in career_options]

	# Streamlit UI
	st.title("Multivariate Linear Regression Model: Student Scores Prediction")

	# About section in dropdown
	with st.expander("About This App"):
	st.markdown(
	"""
	This app uses a Multivariate Linear Regression model that can predict multiple student scores
	based on independent variables: **absence days, weekly self-study hours, extracurricular activities,
	part-time job, and career aspiration**. The model predicts the following dependent variables:
	math score, history score, physics score, chemistry score, biology score, english score, and geography score.
	"""
	)
	st.markdown("""Model's notebook is located in the files :)""")

	# Display dataset sample
	st.subheader("Dataset Sample")
	st.dataframe(df.head())

	# User inputs
	absence_days = st.number_input("Absence Days", min_value=0, step=1)
	weekly_self_study_hours = st.number_input("Weekly Self-Study Hours", min_value=0.0, step=0.5)
	extracurricular_activities = st.selectbox("Extracurricular Activities", ['Yes', 'No'])
	part_time_job = st.selectbox("Part-time Job", ['Yes', 'No'])
	career_aspiration = st.selectbox("Career Aspiration", career_options)

	part_time_job = 1 if part_time_job == "Yes" else 0
	extracurricular_activities = 1 if extracurricular_activities == "Yes" else 0

	# Initialize one-hot encoded dictionary
	career_encoded = {col: 0 for col in career_columns}
	career_encoded[f"career_aspiration_{career_aspiration}"] = 1

	# Create input DataFrame
	input_features = {
	"absence_days": absence_days,
	"weekly_self_study_hours": weekly_self_study_hours,
	"extracurricular_activities": extracurricular_activities,
	"part_time_job": part_time_job,
	**career_encoded
	}

	input_df = pd.DataFrame([input_features])

	# Ensure the column order matches the training data
	expected_columns = ['absence_days', 'weekly_self_study_hours', 'extracurricular_activities', 'part_time_job'] + career_columns
	input_df = input_df[expected_columns]

	# Prediction
	if st.button("Predict Scores"):
	prediction = model.predict(input_df)[0]
	subjects = ['Math', 'History', 'Physics', 'Chemistry', 'Biology', 'English', 'Geography']

	st.write("### Average Score:")
	for subject, score in zip(subjects, prediction):
	st.write(f"{subject}: {round(score, 2)}")

	st.write("### Predicted Total Score:")
	st.write(round((sum(prediction)/len(prediction)), 2))