import pandas as pd from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import train_test_split from sklearn.metrics import r2_score import gradio as gr # ---------------------------- # Load dataset CSV_PATH = "expanded_test_score_dataset_filled.csv" df = pd.read_csv(CSV_PATH) # Normalize column names (remove spaces, fix symbols) df.columns = df.columns.str.strip().str.replace(" ", "_").str.replace("-", "_") print("Columns in dataset:", df.columns.tolist()) # Define features EXACTLY as they appear after normalization feature_cols = [ "Weighted", # Weighted GPA (column looked cut off before, check real name) "Credits_Ea", "AP", "Honors", "Teacher_Experience", "Study_Hours", "Confidence", "Procrastination", "Field_Average_F", "Study_Quality", "HW_Hour", "Participation", "Attendance", "Workday_Positivity", "Incentive", "Field_Proficiency" ] # Ensure numeric for col in feature_cols: if col in df.columns: df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0) else: print(f"⚠️ Warning: column {col} not found in dataset") # Target if "Predicted_Test_Score" in df.columns: y = pd.to_numeric(df["Predicted_Test_Score"], errors="coerce").fillna(0) else: y = pd.Series([0] * len(df)) # placeholder target if missing X = df[[c for c in feature_cols if c in df.columns]] # Train/test split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Train model model = RandomForestRegressor(n_estimators=100, random_state=42) model.fit(X_train, y_train) # Accuracy train_r2 = r2_score(y_train, model.predict(X_train)) test_r2 = r2_score(y_test, model.predict(X_test)) # ---------------------------- # Prediction function def predict(*vals): features = [list(vals)] prediction = model.predict(features)[0] return round(prediction, 2), round(train_r2, 3), round(test_r2, 3) # ---------------------------- # Gradio UI with gr.Blocks() as demo: gr.Markdown("# 📊 Expanded Test Score Predictor") inputs = [] for col in feature_cols: if col in df.columns: inputs.append(gr.Slider(1, 10, step=1, label=col)) output_score = gr.Number(label="Predicted Test Score") output_train = gr.Number(label="Training R² Score") output_test = gr.Number(label="Testing R² Score") btn = gr.Button("Predict") btn.click(predict, inputs=inputs, outputs=[output_score, output_train, output_test]) demo.launch()