Spaces:

Bibek-Mukherjee
/

Syntax-Squad

Sleeping

App Files Files Community

Bibek Mukherjee commited on Apr 5, 2025

Commit

474ddf8

verified ·

1 Parent(s): f5af99d

Upload 3 files

Browse files

Files changed (3) hide show

app.py +227 -0
career_prediction_model.pkl +3 -0
requirements.txt +10 -0

app.py ADDED Viewed

	@@ -0,0 +1,227 @@

+import pandas as pd
+import numpy as np
+from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
+import matplotlib.pyplot as plt
+import seaborn as sns
+import pickle
+import gradio as gr
+import os
+# Load the model
+model_path = 'career_prediction_model.pkl'
+with open(model_path, 'rb') as f:
+    saved_data = pickle.load(f)
+model = saved_data['model']
+label_encoders = saved_data['label_encoders']
+target_encoder = saved_data['target_encoder']
+features = saved_data['features']
+target = 'What would you like to become when you grow up'
+# Function for individual prediction
+def predict_career(work_env, academic_perf, motivation, leadership, tech_savvy):
+    # Prepare input data
+    input_data = pd.DataFrame({
+        'Preferred Work Environment': [work_env],
+        'Academic Performance (CGPA/Percentage)': [float(academic_perf)],
+        'Motivation for Career Choice ': [motivation],  # Note the space at the end
+        'Leadership Experience': [leadership],
+        'Tech-Savviness': [tech_savvy]
+    })
+    # Encode categorical features
+    for feature in features:
+        if feature in label_encoders and input_data[feature].dtype == 'object':
+            try:
+                input_data[feature] = label_encoders[feature].transform(input_data[feature])
+            except ValueError:
+                # Handle unknown categories
+                print(f"Warning: Unknown category in {feature}. Using most frequent category.")
+                input_data[feature] = 0  # Default to first category
+    # Make prediction
+    prediction = model.predict(input_data)[0]
+    predicted_career = target_encoder.inverse_transform([int(prediction)])[0]
+    # Get probabilities for all classes
+    if hasattr(model, 'predict_proba'):
+        probabilities = model.predict_proba(input_data)[0]
+        class_probs = {target_encoder.inverse_transform([i])[0]: prob
+                      for i, prob in enumerate(probabilities)}
+        sorted_probs = dict(sorted(class_probs.items(), key=lambda x: x[1], reverse=True))
+        result = f"Predicted career: {predicted_career}\n\nProbabilities:\n"
+        for career, prob in sorted_probs.items():
+            result += f"{career}: {prob:.2f}\n"
+        return result
+    else:
+        return f"Predicted career: {predicted_career}"
+# Function for batch evaluation
+def evaluate_model_with_csv(csv_file):
+    try:
+        # Try different encodings
+        encodings = ['utf-8', 'latin1', 'ISO-8859-1', 'cp1252', 'utf-8-sig']
+        # Try each encoding until one works
+        for encoding in encodings:
+            try:
+                test_df = pd.read_csv(csv_file.name, encoding=encoding)
+                break
+            except UnicodeDecodeError:
+                if encoding == encodings[-1]:
+                    return ["Error: Could not decode the CSV file with any common encodings.", None]
+                continue
+            except Exception as e:
+                if encoding == encodings[-1]:
+                    return [f"Error reading CSV: {str(e)}", None]
+                continue
+        # Check if required columns exist
+        missing_cols = [col for col in features + [target] if col not in test_df.columns]
+        if missing_cols:
+            return [f"Error: The following required columns are missing in the CSV: {missing_cols}", None]
+        # Preprocess the test data
+        X_eval = test_df[features].copy()
+        # Handle missing values
+        X_eval = X_eval.fillna('Unknown')
+        # Convert Academic Performance to numeric
+        X_eval['Academic Performance (CGPA/Percentage)'] = pd.to_numeric(
+            X_eval['Academic Performance (CGPA/Percentage)'], errors='coerce')
+        X_eval['Academic Performance (CGPA/Percentage)'].fillna(
+            X_eval['Academic Performance (CGPA/Percentage)'].mean(), inplace=True)
+        # Encode categorical features
+        for feature in features:
+            if feature in label_encoders and X_eval[feature].dtype == 'object':
+                # Handle unknown categories by mapping them to 0
+                X_eval[feature] = X_eval[feature].apply(
+                    lambda x: label_encoders[feature].transform([x])[0]
+                    if x in label_encoders[feature].classes_ else 0
+                )
+        # Get the true labels
+        y_true = test_df[target].copy()
+        y_true = y_true.fillna('Corporate Employee')
+        # Encode the true labels
+        y_true_encoded = y_true.apply(
+            lambda x: target_encoder.transform([x])[0]
+            if x in target_encoder.classes_ else 0
+        ).values
+        # Make predictions
+        y_pred = model.predict(X_eval)
+        y_pred = np.array(y_pred).astype(int)
+        # Calculate accuracy
+        accuracy = accuracy_score(y_true_encoded, y_pred)
+        # Create a DataFrame with actual vs predicted values
+        results_df = pd.DataFrame({
+            'Actual Career': [target_encoder.classes_[i] for i in y_true_encoded],
+            'Predicted Career': [target_encoder.classes_[i] for i in y_pred]
+        })
+        # Count correct predictions
+        results_df['Correct'] = results_df['Actual Career'] == results_df['Predicted Career']
+        correct_count = results_df['Correct'].sum()
+        total_count = len(results_df)
+        # Create confusion matrix
+        plt.figure(figsize=(12, 10))
+        cm = pd.crosstab(results_df['Actual Career'], results_df['Predicted Career'])
+        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
+        plt.title('Confusion Matrix')
+        plt.ylabel('Actual Career')
+        plt.xlabel('Predicted Career')
+        plt.tight_layout()
+        # Save the confusion matrix
+        cm_path = 'confusion_matrix.png'
+        plt.savefig(cm_path)
+        # Prepare the results
+        result_text = f"Model Evaluation Results:\n\n"
+        result_text += f"Total samples: {total_count}\n"
+        result_text += f"Correct predictions: {correct_count}\n"
+        result_text += f"Accuracy: {accuracy:.4f}\n\n"
+        # Generate classification report
+        report = classification_report(y_true_encoded, y_pred,
+                                      target_names=target_encoder.classes_,
+                                      output_dict=True)
+        # Add class-wise metrics
+        result_text += "Class-wise Performance:\n"
+        for class_name in target_encoder.classes_:
+            if class_name in report:
+                result_text += f"\n{class_name}:\n"
+                result_text += f"  Precision: {report[class_name]['precision']:.4f}\n"
+                result_text += f"  Recall: {report[class_name]['recall']:.4f}\n"
+                result_text += f"  F1-score: {report[class_name]['f1-score']:.4f}\n"
+        return [result_text, cm_path]
+    except Exception as e:
+        import traceback
+        error_details = traceback.format_exc()
+        print(f"Error in evaluation: {str(e)}\n{error_details}")
+        # Create a simple error image
+        plt.figure(figsize=(6, 4))
+        plt.text(0.5, 0.5, f"Error: {str(e)}",
+                 horizontalalignment='center', verticalalignment='center', fontsize=12, color='red')
+        plt.axis('off')
+        error_path = 'error_image.png'
+        plt.savefig(error_path)
+        return [f"Error: {str(e)}", error_path]
+# Get unique values for dropdowns
+work_env_options = list(label_encoders['Preferred Work Environment'].classes_)
+motivation_options = list(label_encoders['Motivation for Career Choice '].classes_)
+leadership_options = list(label_encoders['Leadership Experience'].classes_)
+tech_savvy_options = list(label_encoders['Tech-Savviness'].classes_)
+# Create the Gradio interface
+iface = gr.Interface(
+    fn=predict_career,
+    inputs=[
+        gr.Dropdown(work_env_options, label="Preferred Work Environment"),
+        gr.Number(label="Academic Performance (CGPA/Percentage)", minimum=0, maximum=10),
+        gr.Dropdown(motivation_options, label="Motivation for Career Choice"),
+        gr.Dropdown(leadership_options, label="Leadership Experience"),
+        gr.Dropdown(tech_savvy_options, label="Tech-Savviness")
+    ],
+    outputs="text",
+    title="Career Prediction Model",
+    description="Enter your details to predict your future career path",
+    theme="huggingface"
+)
+# Create a separate interface for model evaluation
+eval_iface = gr.Interface(
+    fn=evaluate_model_with_csv,
+    inputs=gr.File(label="Upload Test CSV File"),
+    outputs=[
+        gr.Textbox(label="Evaluation Results"),
+        gr.Image(label="Confusion Matrix")
+    ],
+    title="Career Prediction Model Evaluation",
+    description="Upload a CSV file with test data to evaluate the model's performance",
+    theme="huggingface"
+)
+# Create a tabbed interface
+demo = gr.TabbedInterface(
+    [iface, eval_iface],
+    ["Individual Prediction", "Batch Evaluation"]
+)
+# Launch the interface
+demo.launch()

career_prediction_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ca2d2d50abdebdc3b64d365a7861ee745236482e9f9a3af3878fcedbf59b58be
+size 888869

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+pandas
+numpy
+scikit-learn
+xgboost
+lightgbm
+catboost
+matplotlib
+seaborn
+gradio