import pandas as pd
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import joblib
import os

# --- Load Dataset ---
# Make sure you upload your CSV into the Space with the same name
df = pd.read_csv("course_history_cleaned.csv")

# Convert True/False into integers for modeling
df["AP"] = df["AP"].astype(int)
df["Honors"] = df["Honors"].astype(int)

# --- Features & Target ---
X = df[["AP", "Honors", "Credits_Attempted", "Credits_Earned"]]
y = df["Weighted_GPA_Points"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# --- Train Model (Random Forest as best baseline) ---
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Save model for persistence
joblib.dump(model, "gpa_model.pkl")

# Reload model (useful when restarting Space)
model = joblib.load("gpa_model.pkl")

# --- Prediction Function ---
def predict(ap, honors, credits_attempted, credits_earned):
    features = [[int(ap), int(honors), float(credits_attempted), float(credits_earned)]]
    prediction = model.predict(features)[0]
    return round(prediction, 2)

# --- Gradio UI ---
demo = gr.Interface(
    fn=predict,
    inputs=[
        gr.Checkbox(label="AP"),
        gr.Checkbox(label="Honors"),
        gr.Number(label="Credits Attempted"),
        gr.Number(label="Credits Earned")
    ],
    outputs=gr.Number(label="Predicted GPA Points"),
    title="GPA Prediction Model",
    description="Enter course details to predict GPA points (weighted)."
)

if __name__ == "__main__":
    demo.launch()