import gradio as gr
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

# ------------------- Load Data -------------------
df = pd.read_csv("Balanced_Placement_Data.csv")

features = [
    'ssc_percentage', 'hsc_percentage', 'undergrad_degree', 'Graduate_degree_percentage',
    'emp_test_percentage', 'Internship_Experience_Months', 'Certifications_Count',
    'Technical_Skills_Score', 'Soft_Skills_Score', 'Hackathons_Participated',
    'Resume_Score', 'Online_Course_Count', 'Social_Media_Presence'
]
target_columns = ['Placement_Status', 'Domain_of_Interest']

X = df[features]
y = df[target_columns]

categorical_features = ['undergrad_degree']
numerical_features = list(set(features) - set(categorical_features))

preprocessor = ColumnTransformer([
    ("num", StandardScaler(), numerical_features),
    ("cat", OneHotEncoder(drop="first"), categorical_features)
])

models = {
    "Random Forest": RandomForestClassifier(random_state=42),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "KNN": KNeighborsClassifier(),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "SVM": SVC(probability=True)
}

# ------------------- Train Models -------------------
def train_models():
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    results = {}
    for name, clf in models.items():
        pipe = Pipeline([
            ("preprocessor", preprocessor),
            ("classifier", MultiOutputClassifier(clf))
        ])
        pipe.fit(X_train, y_train)
        y_pred = pipe.predict(X_test)

        placement_acc = accuracy_score(y_test["Placement_Status"], y_pred[:,0])
        domain_acc = accuracy_score(y_test["Domain_of_Interest"], y_pred[:,1])

        results[name] = {
            "Placement Accuracy": placement_acc,
            "Domain Accuracy": domain_acc,
            "Model": pipe
        }
    return results

results = train_models()
best_model_name = max(results, key=lambda m: results[m]["Placement Accuracy"] + results[m]["Domain Accuracy"])
best_model = results[best_model_name]["Model"]

# ------------------- Prediction Function -------------------
def predict_placement_and_domain(
    ssc_percentage, hsc_percentage, undergrad_degree, Graduate_degree_percentage,
    emp_test_percentage, Internship_Experience_Months, Certifications_Count,
    Technical_Skills_Score, Soft_Skills_Score, Hackathons_Participated,
    Resume_Score, Online_Course_Count, Social_Media_Presence
):
    user_input = {
        "ssc_percentage": ssc_percentage,
        "hsc_percentage": hsc_percentage,
        "undergrad_degree": undergrad_degree,
        "Graduate_degree_percentage": Graduate_degree_percentage,
        "emp_test_percentage": emp_test_percentage,
        "Internship_Experience_Months": Internship_Experience_Months,
        "Certifications_Count": Certifications_Count,
        "Technical_Skills_Score": Technical_Skills_Score,
        "Soft_Skills_Score": Soft_Skills_Score,
        "Hackathons_Participated": Hackathons_Participated,
        "Resume_Score": Resume_Score,
        "Online_Course_Count": Online_Course_Count,
        "Social_Media_Presence": Social_Media_Presence
    }

    input_df = pd.DataFrame([user_input])
    prediction = best_model.predict(input_df)

    return {
        "Placement Status": prediction[0][0],
        "Domain of Interest": prediction[0][1],
        "Best Model": best_model_name
    }

# ------------------- Gradio UI -------------------
with gr.Blocks() as demo:
    gr.Markdown("# 🎯 Placement & Domain Predictor")

    with gr.Row():
        with gr.Column():
            ssc_percentage = gr.Number(label="SSC Percentage", value=70)
            hsc_percentage = gr.Number(label="HSC Percentage", value=65)
            undergrad_degree = gr.Dropdown(choices=list(df['undergrad_degree'].unique()), label="Undergrad Degree")
            Graduate_degree_percentage = gr.Number(label="Graduate Degree %", value=60)
            emp_test_percentage = gr.Number(label="Employment Test %", value=50)
            Internship_Experience_Months = gr.Number(label="Internship Months", value=0)
            Certifications_Count = gr.Number(label="Certifications Count", value=1)
            Technical_Skills_Score = gr.Number(label="Technical Skills Score", value=60)
            Soft_Skills_Score = gr.Number(label="Soft Skills Score", value=60)
            Hackathons_Participated = gr.Number(label="Hackathons Participated", value=1)
            Resume_Score = gr.Number(label="Resume Score", value=50)
            Online_Course_Count = gr.Number(label="Online Course Count", value=2)
            Social_Media_Presence = gr.Number(label="Social Media Presence (0/1)", value=1)

            btn = gr.Button("Predict")

        with gr.Column():
            output = gr.JSON(label="Prediction Result")

    btn.click(
        predict_placement_and_domain,
        inputs=[ssc_percentage, hsc_percentage, undergrad_degree, Graduate_degree_percentage,
                emp_test_percentage, Internship_Experience_Months, Certifications_Count,
                Technical_Skills_Score, Soft_Skills_Score, Hackathons_Participated,
                Resume_Score, Online_Course_Count, Social_Media_Presence],
        outputs=output
    )

demo.launch()