import pandas as pd import gradio as gr from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestRegressor import joblib import os # --- Load Dataset --- # Make sure you upload your CSV into the Space with the same name df = pd.read_csv("course_history_cleaned.csv") # Convert True/False into integers for modeling df["AP"] = df["AP"].astype(int) df["Honors"] = df["Honors"].astype(int) # --- Features & Target --- X = df[["AP", "Honors", "Credits_Attempted", "Credits_Earned"]] y = df["Weighted_GPA_Points"] # Train-test split X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42 ) # --- Train Model (Random Forest as best baseline) --- model = RandomForestRegressor(n_estimators=100, random_state=42) model.fit(X_train, y_train) # Save model for persistence joblib.dump(model, "gpa_model.pkl") # Reload model (useful when restarting Space) model = joblib.load("gpa_model.pkl") # --- Prediction Function --- def predict(ap, honors, credits_attempted, credits_earned): features = [[int(ap), int(honors), float(credits_attempted), float(credits_earned)]] prediction = model.predict(features)[0] return round(prediction, 2) # --- Gradio UI --- demo = gr.Interface( fn=predict, inputs=[ gr.Checkbox(label="AP"), gr.Checkbox(label="Honors"), gr.Number(label="Credits Attempted"), gr.Number(label="Credits Earned") ], outputs=gr.Number(label="Predicted GPA Points"), title="GPA Prediction Model", description="Enter course details to predict GPA points (weighted)." ) if __name__ == "__main__": demo.launch()