import gradio as gr import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.compose import ColumnTransformer from sklearn.pipeline import Pipeline from sklearn.multioutput import MultiOutputClassifier from sklearn.metrics import accuracy_score from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import RandomForestClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.linear_model import LogisticRegression from sklearn.svm import SVC # ------------------- Load Data ------------------- df = pd.read_csv("Balanced_Placement_Data.csv") features = [ 'ssc_percentage', 'hsc_percentage', 'undergrad_degree', 'Graduate_degree_percentage', 'emp_test_percentage', 'Internship_Experience_Months', 'Certifications_Count', 'Technical_Skills_Score', 'Soft_Skills_Score', 'Hackathons_Participated', 'Resume_Score', 'Online_Course_Count', 'Social_Media_Presence' ] target_columns = ['Placement_Status', 'Domain_of_Interest'] X = df[features] y = df[target_columns] categorical_features = ['undergrad_degree'] numerical_features = list(set(features) - set(categorical_features)) preprocessor = ColumnTransformer([ ("num", StandardScaler(), numerical_features), ("cat", OneHotEncoder(drop="first"), categorical_features) ]) models = { "Random Forest": RandomForestClassifier(random_state=42), "Decision Tree": DecisionTreeClassifier(random_state=42), "KNN": KNeighborsClassifier(), "Logistic Regression": LogisticRegression(max_iter=1000), "SVM": SVC(probability=True) } # ------------------- Train Models ------------------- def train_models(): X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42 ) results = {} for name, clf in models.items(): pipe = Pipeline([ ("preprocessor", preprocessor), ("classifier", MultiOutputClassifier(clf)) ]) pipe.fit(X_train, y_train) y_pred = pipe.predict(X_test) placement_acc = accuracy_score(y_test["Placement_Status"], y_pred[:,0]) domain_acc = accuracy_score(y_test["Domain_of_Interest"], y_pred[:,1]) results[name] = { "Placement Accuracy": placement_acc, "Domain Accuracy": domain_acc, "Model": pipe } return results results = train_models() best_model_name = max(results, key=lambda m: results[m]["Placement Accuracy"] + results[m]["Domain Accuracy"]) best_model = results[best_model_name]["Model"] # ------------------- Prediction Function ------------------- def predict_placement_and_domain( ssc_percentage, hsc_percentage, undergrad_degree, Graduate_degree_percentage, emp_test_percentage, Internship_Experience_Months, Certifications_Count, Technical_Skills_Score, Soft_Skills_Score, Hackathons_Participated, Resume_Score, Online_Course_Count, Social_Media_Presence ): user_input = { "ssc_percentage": ssc_percentage, "hsc_percentage": hsc_percentage, "undergrad_degree": undergrad_degree, "Graduate_degree_percentage": Graduate_degree_percentage, "emp_test_percentage": emp_test_percentage, "Internship_Experience_Months": Internship_Experience_Months, "Certifications_Count": Certifications_Count, "Technical_Skills_Score": Technical_Skills_Score, "Soft_Skills_Score": Soft_Skills_Score, "Hackathons_Participated": Hackathons_Participated, "Resume_Score": Resume_Score, "Online_Course_Count": Online_Course_Count, "Social_Media_Presence": Social_Media_Presence } input_df = pd.DataFrame([user_input]) prediction = best_model.predict(input_df) return { "Placement Status": prediction[0][0], "Domain of Interest": prediction[0][1], "Best Model": best_model_name } # ------------------- Gradio UI ------------------- with gr.Blocks() as demo: gr.Markdown("# 🎯 Placement & Domain Predictor") with gr.Row(): with gr.Column(): ssc_percentage = gr.Number(label="SSC Percentage", value=70) hsc_percentage = gr.Number(label="HSC Percentage", value=65) undergrad_degree = gr.Dropdown(choices=list(df['undergrad_degree'].unique()), label="Undergrad Degree") Graduate_degree_percentage = gr.Number(label="Graduate Degree %", value=60) emp_test_percentage = gr.Number(label="Employment Test %", value=50) Internship_Experience_Months = gr.Number(label="Internship Months", value=0) Certifications_Count = gr.Number(label="Certifications Count", value=1) Technical_Skills_Score = gr.Number(label="Technical Skills Score", value=60) Soft_Skills_Score = gr.Number(label="Soft Skills Score", value=60) Hackathons_Participated = gr.Number(label="Hackathons Participated", value=1) Resume_Score = gr.Number(label="Resume Score", value=50) Online_Course_Count = gr.Number(label="Online Course Count", value=2) Social_Media_Presence = gr.Number(label="Social Media Presence (0/1)", value=1) btn = gr.Button("Predict") with gr.Column(): output = gr.JSON(label="Prediction Result") btn.click( predict_placement_and_domain, inputs=[ssc_percentage, hsc_percentage, undergrad_degree, Graduate_degree_percentage, emp_test_percentage, Internship_Experience_Months, Certifications_Count, Technical_Skills_Score, Soft_Skills_Score, Hackathons_Participated, Resume_Score, Online_Course_Count, Social_Media_Presence], outputs=output ) demo.launch()