GradeOP / app.py
Shortheadband's picture
Update app.py
a0265a1 verified
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import gradio as gr
# ----------------------------
# Load dataset
CSV_PATH = "expanded_test_score_dataset_filled.csv"
df = pd.read_csv(CSV_PATH)
# Normalize column names (remove spaces, fix symbols)
df.columns = df.columns.str.strip().str.replace(" ", "_").str.replace("-", "_")
print("Columns in dataset:", df.columns.tolist())
# Define features EXACTLY as they appear after normalization
feature_cols = [
"Weighted", # Weighted GPA (column looked cut off before, check real name)
"Credits_Ea",
"AP",
"Honors",
"Teacher_Experience",
"Study_Hours",
"Confidence",
"Procrastination",
"Field_Average_F",
"Study_Quality",
"HW_Hour",
"Participation",
"Attendance",
"Workday_Positivity",
"Incentive",
"Field_Proficiency"
]
# Ensure numeric
for col in feature_cols:
if col in df.columns:
df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)
else:
print(f"⚠️ Warning: column {col} not found in dataset")
# Target
if "Predicted_Test_Score" in df.columns:
y = pd.to_numeric(df["Predicted_Test_Score"], errors="coerce").fillna(0)
else:
y = pd.Series([0] * len(df)) # placeholder target if missing
X = df[[c for c in feature_cols if c in df.columns]]
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
# Accuracy
train_r2 = r2_score(y_train, model.predict(X_train))
test_r2 = r2_score(y_test, model.predict(X_test))
# ----------------------------
# Prediction function
def predict(*vals):
features = [list(vals)]
prediction = model.predict(features)[0]
return round(prediction, 2), round(train_r2, 3), round(test_r2, 3)
# ----------------------------
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("# 📊 Expanded Test Score Predictor")
inputs = []
for col in feature_cols:
if col in df.columns:
inputs.append(gr.Slider(1, 10, step=1, label=col))
output_score = gr.Number(label="Predicted Test Score")
output_train = gr.Number(label="Training R² Score")
output_test = gr.Number(label="Testing R² Score")
btn = gr.Button("Predict")
btn.click(predict, inputs=inputs, outputs=[output_score, output_train, output_test])
demo.launch()