Spaces:

V8055
/

project4

Sleeping

App Files Files Community

project4 / app.py

V8055

Update app.py

faa3ed6 verified about 1 year ago

raw

history blame contribute delete

6.45 kB

	# Comprehensive imports with error handling
	import sys
	import subprocess

	# Ensure required packages are installed
	def install(package):
	subprocess.check_call([sys.executable, "-m", "pip", "install", package])

	try:
	import streamlit as st
	import pandas as pd
	import numpy as np
	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import StandardScaler
	from sklearn.ensemble import RandomForestRegressor
	import matplotlib.pyplot as plt
	import seaborn as sns
	except ImportError as e:
	print(f"Missing package: {e}")
	package = str(e).split("'")[1]
	install(package)
	# Retry imports
	import streamlit as st
	import pandas as pd
	import numpy as np
	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import StandardScaler
	from sklearn.ensemble import RandomForestRegressor
	import matplotlib.pyplot as plt
	import seaborn as sns

	# Seed for reproducibility
	np.random.seed(42)

	# Function to generate student data
	def generate_student_data(n_samples=500):
	data = {
	'attendance_rate': np.random.uniform(0.5, 1, n_samples),
	'study_hours_per_week': np.random.uniform(0, 40, n_samples),
	'previous_semester_gpa': np.random.uniform(2.0, 4.0, n_samples),
	'socioeconomic_status': np.random.choice([1, 2, 3], n_samples),
	'extracurricular_activities': np.random.randint(0, 5, n_samples),
	'family_support': np.random.choice([0, 1], n_samples),
	'final_grade': None
	}

	# Create target variable with dependencies
	data['final_grade'] = (
	0.3 * data['attendance_rate'] * 10 +
	0.25 * data['study_hours_per_week'] +
	0.2 * data['previous_semester_gpa'] * 2 +
	0.1 * (data['socioeconomic_status'] * 2) +
	0.05 * data['extracurricular_activities'] +
	0.1 * (data['family_support'] * 3) +
	np.random.normal(0, 1, n_samples)
	)

	# Normalize final grade to be between 0 and 10
	data['final_grade'] = np.clip((data['final_grade'] - np.min(data['final_grade'])) /
	(np.max(data['final_grade']) - np.min(data['final_grade'])) * 10, 0, 10)

	return pd.DataFrame(data)

	# Main application logic
	def main():
	# Generate dataset and prepare model
	df = generate_student_data()
	X = df.drop('final_grade', axis=1)
	y = df['final_grade']

	# Split and scale data
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
	scaler = StandardScaler()
	X_train_scaled = scaler.fit_transform(X_train)
	X_test_scaled = scaler.transform(X_test)

	# Train Random Forest model
	rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
	rf_model.fit(X_train_scaled, y_train)

	# Streamlit App Configuration
	st.set_page_config(page_title="Student Performance Predictor", page_icon="🎓")

	# App Title
	st.title("🎓 Student Performance Prediction Model")

	# Sidebar for Input
	st.sidebar.header("Student Information")

	# Input Sliders and Selectors
	attendance_rate = st.sidebar.slider(
	"Attendance Rate",
	min_value=0.5,
	max_value=1.0,
	value=0.85,
	step=0.05
	)

	study_hours = st.sidebar.slider(
	"Weekly Study Hours",
	min_value=0,
	max_value=40,
	value=25
	)

	prev_gpa = st.sidebar.slider(
	"Previous Semester GPA",
	min_value=2.0,
	max_value=4.0,
	value=3.5,
	step=0.1
	)

	socioeconomic_status = st.sidebar.selectbox(
	"Socioeconomic Status",
	[1, 2, 3],
	index=1,
	format_func=lambda x: {1:"Low", 2:"Medium", 3:"High"}[x]
	)

	extracurricular_activities = st.sidebar.slider(
	"Extracurricular Activities",
	min_value=0,
	max_value=4,
	value=2
	)

	family_support = st.sidebar.selectbox(
	"Family Support",
	[0, 1],
	index=1,
	format_func=lambda x: "Yes" if x == 1 else "No"
	)

	# Prediction Function
	def predict_student_performance(input_data):
	input_scaled = scaler.transform(input_data)
	prediction = rf_model.predict(input_scaled)[0]
	return np.round(prediction, 2)

	# Predict Button
	if st.sidebar.button("Predict Performance"):
	# Prepare input data
	input_data = np.array([[
	attendance_rate,
	study_hours,
	prev_gpa,
	socioeconomic_status,
	extracurricular_activities,
	family_support
	]])

	# Make prediction
	predicted_grade = predict_student_performance(input_data)

	# Display prediction
	st.subheader("Prediction Results")

	# Color-coded performance indicator
	if predicted_grade >= 8:
	color = "green"
	performance = "Excellent"
	elif predicted_grade >= 6:
	color = "blue"
	performance = "Good"
	elif predicted_grade >= 4:
	color = "orange"
	performance = "Average"
	else:
	color = "red"
	performance = "Needs Improvement"

	st.markdown(f"""
	<div style="background-color:{color}; padding:10px; border-radius:10px;">
	<h2 style="color:white; text-align:center;">
	Predicted Final Grade: {predicted_grade}/10
	<br>Performance: {performance}
	</h2>
	</div>
	""", unsafe_allow_html=True)

	# Feature Importance Visualization
	st.subheader("Feature Importance")
	feature_importance = pd.DataFrame({
	'feature': X.columns,
	'importance': rf_model.feature_importances_
	}).sort_values('importance', ascending=False)

	fig, ax = plt.subplots(figsize=(10, 6))
	sns.barplot(x='importance', y='feature', data=feature_importance, ax=ax)
	plt.title('Factors Influencing Student Performance')
	plt.xlabel('Importance Score')
	st.pyplot(fig)

	# Additional Information
	st.markdown("""
	### About the Model
	This machine learning model predicts student performance based on:
	- Attendance Rate
	- Weekly Study Hours
	- Previous Semester GPA
	- Socioeconomic Status
	- Extracurricular Activities
	- Family Support
	""")

	# Run the main application
	if __name__ == "__main__":
	main()