Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| from sklearn.ensemble import RandomForestRegressor | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.metrics import r2_score | |
| import gradio as gr | |
| # ---------------------------- | |
| # Load dataset | |
| CSV_PATH = "expanded_test_score_dataset_filled.csv" | |
| df = pd.read_csv(CSV_PATH) | |
| # Normalize column names (remove spaces, fix symbols) | |
| df.columns = df.columns.str.strip().str.replace(" ", "_").str.replace("-", "_") | |
| print("Columns in dataset:", df.columns.tolist()) | |
| # Define features EXACTLY as they appear after normalization | |
| feature_cols = [ | |
| "Weighted", # Weighted GPA (column looked cut off before, check real name) | |
| "Credits_Ea", | |
| "AP", | |
| "Honors", | |
| "Teacher_Experience", | |
| "Study_Hours", | |
| "Confidence", | |
| "Procrastination", | |
| "Field_Average_F", | |
| "Study_Quality", | |
| "HW_Hour", | |
| "Participation", | |
| "Attendance", | |
| "Workday_Positivity", | |
| "Incentive", | |
| "Field_Proficiency" | |
| ] | |
| # Ensure numeric | |
| for col in feature_cols: | |
| if col in df.columns: | |
| df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0) | |
| else: | |
| print(f"⚠️ Warning: column {col} not found in dataset") | |
| # Target | |
| if "Predicted_Test_Score" in df.columns: | |
| y = pd.to_numeric(df["Predicted_Test_Score"], errors="coerce").fillna(0) | |
| else: | |
| y = pd.Series([0] * len(df)) # placeholder target if missing | |
| X = df[[c for c in feature_cols if c in df.columns]] | |
| # Train/test split | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
| # Train model | |
| model = RandomForestRegressor(n_estimators=100, random_state=42) | |
| model.fit(X_train, y_train) | |
| # Accuracy | |
| train_r2 = r2_score(y_train, model.predict(X_train)) | |
| test_r2 = r2_score(y_test, model.predict(X_test)) | |
| # ---------------------------- | |
| # Prediction function | |
| def predict(*vals): | |
| features = [list(vals)] | |
| prediction = model.predict(features)[0] | |
| return round(prediction, 2), round(train_r2, 3), round(test_r2, 3) | |
| # ---------------------------- | |
| # Gradio UI | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# 📊 Expanded Test Score Predictor") | |
| inputs = [] | |
| for col in feature_cols: | |
| if col in df.columns: | |
| inputs.append(gr.Slider(1, 10, step=1, label=col)) | |
| output_score = gr.Number(label="Predicted Test Score") | |
| output_train = gr.Number(label="Training R² Score") | |
| output_test = gr.Number(label="Testing R² Score") | |
| btn = gr.Button("Predict") | |
| btn.click(predict, inputs=inputs, outputs=[output_score, output_train, output_test]) | |
| demo.launch() | |