File size: 2,548 Bytes
8ae80cf
 
5daf30d
9e6aa83
5daf30d
8ae80cf
2e17c37
 
a0265a1
 
8ae80cf
a0265a1
55f6574
 
a0265a1
 
 
2e17c37
a0265a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b6ca6f6
252a2ba
a0265a1
2e17c37
a0265a1
 
 
 
 
 
 
 
 
 
252a2ba
a0265a1
8ae80cf
2e17c37
 
8ae80cf
2e17c37
 
 
5daf30d
2e17c37
 
 
5daf30d
2e17c37
 
a0265a1
 
2e17c37
 
252a2ba
2e17c37
 
5daf30d
2e17c37
5daf30d
2e17c37
 
a0265a1
 
5daf30d
 
 
 
 
 
2e17c37
252a2ba
5daf30d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import gradio as gr

# ----------------------------
# Load dataset
CSV_PATH = "expanded_test_score_dataset_filled.csv"
df = pd.read_csv(CSV_PATH)

# Normalize column names (remove spaces, fix symbols)
df.columns = df.columns.str.strip().str.replace(" ", "_").str.replace("-", "_")

print("Columns in dataset:", df.columns.tolist())

# Define features EXACTLY as they appear after normalization
feature_cols = [
    "Weighted",           # Weighted GPA (column looked cut off before, check real name)
    "Credits_Ea",
    "AP",
    "Honors",
    "Teacher_Experience",
    "Study_Hours",
    "Confidence",
    "Procrastination",
    "Field_Average_F",
    "Study_Quality",
    "HW_Hour",
    "Participation",
    "Attendance",
    "Workday_Positivity",
    "Incentive",
    "Field_Proficiency"
]

# Ensure numeric
for col in feature_cols:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)
    else:
        print(f"⚠️ Warning: column {col} not found in dataset")

# Target
if "Predicted_Test_Score" in df.columns:
    y = pd.to_numeric(df["Predicted_Test_Score"], errors="coerce").fillna(0)
else:
    y = pd.Series([0] * len(df))  # placeholder target if missing

X = df[[c for c in feature_cols if c in df.columns]]

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Accuracy
train_r2 = r2_score(y_train, model.predict(X_train))
test_r2 = r2_score(y_test, model.predict(X_test))

# ----------------------------
# Prediction function
def predict(*vals):
    features = [list(vals)]
    prediction = model.predict(features)[0]
    return round(prediction, 2), round(train_r2, 3), round(test_r2, 3)

# ----------------------------
# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# 📊 Expanded Test Score Predictor")

    inputs = []
    for col in feature_cols:
        if col in df.columns:
            inputs.append(gr.Slider(1, 10, step=1, label=col))

    output_score = gr.Number(label="Predicted Test Score")
    output_train = gr.Number(label="Training R² Score")
    output_test = gr.Number(label="Testing R² Score")

    btn = gr.Button("Predict")
    btn.click(predict, inputs=inputs, outputs=[output_score, output_train, output_test])

demo.launch()