Spaces:
Sleeping
Sleeping
File size: 2,548 Bytes
8ae80cf 5daf30d 9e6aa83 5daf30d 8ae80cf 2e17c37 a0265a1 8ae80cf a0265a1 55f6574 a0265a1 2e17c37 a0265a1 b6ca6f6 252a2ba a0265a1 2e17c37 a0265a1 252a2ba a0265a1 8ae80cf 2e17c37 8ae80cf 2e17c37 5daf30d 2e17c37 5daf30d 2e17c37 a0265a1 2e17c37 252a2ba 2e17c37 5daf30d 2e17c37 5daf30d 2e17c37 a0265a1 5daf30d 2e17c37 252a2ba 5daf30d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 | import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import gradio as gr
# ----------------------------
# Load dataset
CSV_PATH = "expanded_test_score_dataset_filled.csv"
df = pd.read_csv(CSV_PATH)
# Normalize column names (remove spaces, fix symbols)
df.columns = df.columns.str.strip().str.replace(" ", "_").str.replace("-", "_")
print("Columns in dataset:", df.columns.tolist())
# Define features EXACTLY as they appear after normalization
feature_cols = [
"Weighted", # Weighted GPA (column looked cut off before, check real name)
"Credits_Ea",
"AP",
"Honors",
"Teacher_Experience",
"Study_Hours",
"Confidence",
"Procrastination",
"Field_Average_F",
"Study_Quality",
"HW_Hour",
"Participation",
"Attendance",
"Workday_Positivity",
"Incentive",
"Field_Proficiency"
]
# Ensure numeric
for col in feature_cols:
if col in df.columns:
df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)
else:
print(f"⚠️ Warning: column {col} not found in dataset")
# Target
if "Predicted_Test_Score" in df.columns:
y = pd.to_numeric(df["Predicted_Test_Score"], errors="coerce").fillna(0)
else:
y = pd.Series([0] * len(df)) # placeholder target if missing
X = df[[c for c in feature_cols if c in df.columns]]
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
# Accuracy
train_r2 = r2_score(y_train, model.predict(X_train))
test_r2 = r2_score(y_test, model.predict(X_test))
# ----------------------------
# Prediction function
def predict(*vals):
features = [list(vals)]
prediction = model.predict(features)[0]
return round(prediction, 2), round(train_r2, 3), round(test_r2, 3)
# ----------------------------
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("# 📊 Expanded Test Score Predictor")
inputs = []
for col in feature_cols:
if col in df.columns:
inputs.append(gr.Slider(1, 10, step=1, label=col))
output_score = gr.Number(label="Predicted Test Score")
output_train = gr.Number(label="Training R² Score")
output_test = gr.Number(label="Testing R² Score")
btn = gr.Button("Predict")
btn.click(predict, inputs=inputs, outputs=[output_score, output_train, output_test])
demo.launch()
|