Spaces:

Shortheadband
/

GradeOP

Sleeping

App Files Files Community

GradeOP / app.py

Shortheadband

Update app.py

a0265a1 verified 4 months ago

raw

history blame contribute delete

2.55 kB

	import pandas as pd
	from sklearn.ensemble import RandomForestRegressor
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import r2_score
	import gradio as gr

	# ----------------------------
	# Load dataset
	CSV_PATH = "expanded_test_score_dataset_filled.csv"
	df = pd.read_csv(CSV_PATH)

	# Normalize column names (remove spaces, fix symbols)
	df.columns = df.columns.str.strip().str.replace(" ", "_").str.replace("-", "_")

	print("Columns in dataset:", df.columns.tolist())

	# Define features EXACTLY as they appear after normalization
	feature_cols = [
	"Weighted", # Weighted GPA (column looked cut off before, check real name)
	"Credits_Ea",
	"AP",
	"Honors",
	"Teacher_Experience",
	"Study_Hours",
	"Confidence",
	"Procrastination",
	"Field_Average_F",
	"Study_Quality",
	"HW_Hour",
	"Participation",
	"Attendance",
	"Workday_Positivity",
	"Incentive",
	"Field_Proficiency"
	]

	# Ensure numeric
	for col in feature_cols:
	if col in df.columns:
	df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)
	else:
	print(f"⚠️ Warning: column {col} not found in dataset")

	# Target
	if "Predicted_Test_Score" in df.columns:
	y = pd.to_numeric(df["Predicted_Test_Score"], errors="coerce").fillna(0)
	else:
	y = pd.Series([0] * len(df)) # placeholder target if missing

	X = df[[c for c in feature_cols if c in df.columns]]

	# Train/test split
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

	# Train model
	model = RandomForestRegressor(n_estimators=100, random_state=42)
	model.fit(X_train, y_train)

	# Accuracy
	train_r2 = r2_score(y_train, model.predict(X_train))
	test_r2 = r2_score(y_test, model.predict(X_test))

	# ----------------------------
	# Prediction function
	def predict(*vals):
	features = [list(vals)]
	prediction = model.predict(features)[0]
	return round(prediction, 2), round(train_r2, 3), round(test_r2, 3)

	# ----------------------------
	# Gradio UI
	with gr.Blocks() as demo:
	gr.Markdown("# 📊 Expanded Test Score Predictor")

	inputs = []
	for col in feature_cols:
	if col in df.columns:
	inputs.append(gr.Slider(1, 10, step=1, label=col))

	output_score = gr.Number(label="Predicted Test Score")
	output_train = gr.Number(label="Training R² Score")
	output_test = gr.Number(label="Testing R² Score")

	btn = gr.Button("Predict")
	btn.click(predict, inputs=inputs, outputs=[output_score, output_train, output_test])

	demo.launch()