|
|
import gradio as gr |
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
|
|
|
from sklearn.model_selection import train_test_split |
|
|
from sklearn.preprocessing import StandardScaler, OneHotEncoder |
|
|
from sklearn.compose import ColumnTransformer |
|
|
from sklearn.pipeline import Pipeline |
|
|
from sklearn.multioutput import MultiOutputClassifier |
|
|
from sklearn.metrics import accuracy_score |
|
|
from sklearn.tree import DecisionTreeClassifier |
|
|
from sklearn.ensemble import RandomForestClassifier |
|
|
from sklearn.neighbors import KNeighborsClassifier |
|
|
from sklearn.linear_model import LogisticRegression |
|
|
from sklearn.svm import SVC |
|
|
|
|
|
|
|
|
df = pd.read_csv("Balanced_Placement_Data.csv") |
|
|
|
|
|
features = [ |
|
|
'ssc_percentage', 'hsc_percentage', 'undergrad_degree', 'Graduate_degree_percentage', |
|
|
'emp_test_percentage', 'Internship_Experience_Months', 'Certifications_Count', |
|
|
'Technical_Skills_Score', 'Soft_Skills_Score', 'Hackathons_Participated', |
|
|
'Resume_Score', 'Online_Course_Count', 'Social_Media_Presence' |
|
|
] |
|
|
target_columns = ['Placement_Status', 'Domain_of_Interest'] |
|
|
|
|
|
X = df[features] |
|
|
y = df[target_columns] |
|
|
|
|
|
categorical_features = ['undergrad_degree'] |
|
|
numerical_features = list(set(features) - set(categorical_features)) |
|
|
|
|
|
preprocessor = ColumnTransformer([ |
|
|
("num", StandardScaler(), numerical_features), |
|
|
("cat", OneHotEncoder(drop="first"), categorical_features) |
|
|
]) |
|
|
|
|
|
models = { |
|
|
"Random Forest": RandomForestClassifier(random_state=42), |
|
|
"Decision Tree": DecisionTreeClassifier(random_state=42), |
|
|
"KNN": KNeighborsClassifier(), |
|
|
"Logistic Regression": LogisticRegression(max_iter=1000), |
|
|
"SVM": SVC(probability=True) |
|
|
} |
|
|
|
|
|
|
|
|
def train_models(): |
|
|
X_train, X_test, y_train, y_test = train_test_split( |
|
|
X, y, test_size=0.2, random_state=42 |
|
|
) |
|
|
results = {} |
|
|
for name, clf in models.items(): |
|
|
pipe = Pipeline([ |
|
|
("preprocessor", preprocessor), |
|
|
("classifier", MultiOutputClassifier(clf)) |
|
|
]) |
|
|
pipe.fit(X_train, y_train) |
|
|
y_pred = pipe.predict(X_test) |
|
|
|
|
|
placement_acc = accuracy_score(y_test["Placement_Status"], y_pred[:,0]) |
|
|
domain_acc = accuracy_score(y_test["Domain_of_Interest"], y_pred[:,1]) |
|
|
|
|
|
results[name] = { |
|
|
"Placement Accuracy": placement_acc, |
|
|
"Domain Accuracy": domain_acc, |
|
|
"Model": pipe |
|
|
} |
|
|
return results |
|
|
|
|
|
results = train_models() |
|
|
best_model_name = max(results, key=lambda m: results[m]["Placement Accuracy"] + results[m]["Domain Accuracy"]) |
|
|
best_model = results[best_model_name]["Model"] |
|
|
|
|
|
|
|
|
def predict_placement_and_domain( |
|
|
ssc_percentage, hsc_percentage, undergrad_degree, Graduate_degree_percentage, |
|
|
emp_test_percentage, Internship_Experience_Months, Certifications_Count, |
|
|
Technical_Skills_Score, Soft_Skills_Score, Hackathons_Participated, |
|
|
Resume_Score, Online_Course_Count, Social_Media_Presence |
|
|
): |
|
|
user_input = { |
|
|
"ssc_percentage": ssc_percentage, |
|
|
"hsc_percentage": hsc_percentage, |
|
|
"undergrad_degree": undergrad_degree, |
|
|
"Graduate_degree_percentage": Graduate_degree_percentage, |
|
|
"emp_test_percentage": emp_test_percentage, |
|
|
"Internship_Experience_Months": Internship_Experience_Months, |
|
|
"Certifications_Count": Certifications_Count, |
|
|
"Technical_Skills_Score": Technical_Skills_Score, |
|
|
"Soft_Skills_Score": Soft_Skills_Score, |
|
|
"Hackathons_Participated": Hackathons_Participated, |
|
|
"Resume_Score": Resume_Score, |
|
|
"Online_Course_Count": Online_Course_Count, |
|
|
"Social_Media_Presence": Social_Media_Presence |
|
|
} |
|
|
|
|
|
input_df = pd.DataFrame([user_input]) |
|
|
prediction = best_model.predict(input_df) |
|
|
|
|
|
return { |
|
|
"Placement Status": prediction[0][0], |
|
|
"Domain of Interest": prediction[0][1], |
|
|
"Best Model": best_model_name |
|
|
} |
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("# 🎯 Placement & Domain Predictor") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
ssc_percentage = gr.Number(label="SSC Percentage", value=70) |
|
|
hsc_percentage = gr.Number(label="HSC Percentage", value=65) |
|
|
undergrad_degree = gr.Dropdown(choices=list(df['undergrad_degree'].unique()), label="Undergrad Degree") |
|
|
Graduate_degree_percentage = gr.Number(label="Graduate Degree %", value=60) |
|
|
emp_test_percentage = gr.Number(label="Employment Test %", value=50) |
|
|
Internship_Experience_Months = gr.Number(label="Internship Months", value=0) |
|
|
Certifications_Count = gr.Number(label="Certifications Count", value=1) |
|
|
Technical_Skills_Score = gr.Number(label="Technical Skills Score", value=60) |
|
|
Soft_Skills_Score = gr.Number(label="Soft Skills Score", value=60) |
|
|
Hackathons_Participated = gr.Number(label="Hackathons Participated", value=1) |
|
|
Resume_Score = gr.Number(label="Resume Score", value=50) |
|
|
Online_Course_Count = gr.Number(label="Online Course Count", value=2) |
|
|
Social_Media_Presence = gr.Number(label="Social Media Presence (0/1)", value=1) |
|
|
|
|
|
btn = gr.Button("Predict") |
|
|
|
|
|
with gr.Column(): |
|
|
output = gr.JSON(label="Prediction Result") |
|
|
|
|
|
btn.click( |
|
|
predict_placement_and_domain, |
|
|
inputs=[ssc_percentage, hsc_percentage, undergrad_degree, Graduate_degree_percentage, |
|
|
emp_test_percentage, Internship_Experience_Months, Certifications_Count, |
|
|
Technical_Skills_Score, Soft_Skills_Score, Hackathons_Participated, |
|
|
Resume_Score, Online_Course_Count, Social_Media_Presence], |
|
|
outputs=output |
|
|
) |
|
|
|
|
|
demo.launch() |
|
|
|