510app_rf / app.py
Ryan
update
d289b60
import gradio as gr
import joblib
# Load models and vectorizers
rf_hire = joblib.load('./hiring_model.pkl')
vectorizer_hire = joblib.load('./hire_vectorizer.pkl')
rf_pay = joblib.load('./pay_model.pkl')
vectorizer_pay = joblib.load('./pay_vectorizer.pkl')
def predict(skills, experience, grades, projects, extra_activities, previous_offer):
# Format input text
input_text = (
f"Skills: {skills} | "
f"Experience: {experience} years | "
f"Academic Performance: {grades}% | "
f"Projects Completed: {projects} | "
f"Extra Activities: {extra_activities} | "
f"Has Previous Offer: {'Yes' if previous_offer else 'No'}"
)
# Vectorize and predict for hiring decision
X_hire = vectorizer_hire.transform([input_text])
predicted_class_hire = rf_hire.predict(X_hire)[0]
probabilities_hire = rf_hire.predict_proba(X_hire)[0]
# Vectorize and predict for pay decision
X_pay = vectorizer_pay.transform([input_text])
predicted_class_pay = rf_pay.predict(X_pay)[0]
probabilities_pay = rf_pay.predict_proba(X_pay)[0]
# Process predictions
decision_map_hire = {0: "No", 1: "Interview", 2: "Yes"}
decision_map_pay = {0: "NoPay", 1: "125k", 2: "150k"}
predicted_class_hire = decision_map_hire[predicted_class_hire]
predicted_class_pay = decision_map_pay[predicted_class_pay]
# Format probabilities for display
probs_hire = {decision_map_hire[i]: f"{prob*100:.2f}%" for i, prob in enumerate(probabilities_hire)}
probs_pay = {decision_map_pay[i]: f"{prob*100:.2f}%" for i, prob in enumerate(probabilities_pay)}
# Create output string
result = f"Prediction:\nHiring Decision: {predicted_class_hire}\nPay: {predicted_class_pay}\n\nConfidence Levels:\n"
result += "Hiring Decision:\n"
for decision, prob in probs_hire.items():
result += f"{decision}: {prob}\n"
result += "\nPay:\n"
for decision, prob in probs_pay.items():
result += f"{decision}: {prob}\n"
return result
# Create the Gradio interface with tabs
with gr.Blocks() as demo:
gr.Markdown("# Job Candidate Evaluation System")
with gr.Tabs():
# Overview Tab
with gr.Tab("Overview"):
gr.Markdown("""
## Project Overview
This project demonstrates a machine learning system for evaluating job candidates.
### What is Supervised Learning?
Supervised learning is a machine learning approach where the model learns from labeled training data.
In this project, we use historical hiring decisions to teach our models to make predictions about new candidates.
### Multi-Label Classification
This project involves multi-label classification, predicting two different outcomes:
- Hiring decision (No/Interview/Yes)
- Salary recommendation
""")
# Dataset Tab
with gr.Tab("Dataset"):
gr.Markdown("""
## Dataset Information
The dataset contains information about job candidates including:
- Skills (semicolon-separated list)
- Years of experience
- Academic grades
- Number of completed projects
- Extra-curricular activities
- Previous job offers
### Labels
Each candidate has two labels:
1. Hiring decision: No (0), Interview (1), or Yes (2)
2. Salary category: NoPay, 125k, or 150k
""")
# Models Tab
with gr.Tab("Models"):
gr.Markdown("""
## Model Architecture
### Original Random Forest Model
- Uses scikit-learn's RandomForestClassifier
- Skills processed using CountVectorizer
- Numerical features used directly
- Multi-output classification for both labels
### Transformer Model
- Based on DistilBERT architecture
- Custom WeightedHiringTrainer for class imbalance
- Features combined into text format
- Fine-tuned for 5 epochs with evaluation
""")
# Model Comparison Tab
with gr.Tab("Comparison"):
gr.Markdown("""
## Model Comparison
### Random Forest Advantages
- Faster training time
- More interpretable decisions
- Handles numerical features naturally
- Less computational resources needed
### Transformer Advantages
- Better handling of text data (skills)
- Can capture complex patterns
- Weighted loss for class imbalance
- More sophisticated feature interactions
""")
# Try It Tab
with gr.Tab("Try It"):
with gr.Row():
with gr.Column():
skills = gr.Textbox(
label="Skills (semicolon-separated)",
placeholder="Python;Java;SQL"
)
experience = gr.Number(
label="Years of Experience",
minimum=0,
maximum=5
)
grades = gr.Number(
label="Academic Grades (%)",
minimum=0,
maximum=100
)
projects = gr.Number(
label="Number of Projects",
minimum=0,
maximum=9
)
extra = gr.Number(
label="Extra Activities",
minimum=0,
maximum=4
)
offer = gr.Checkbox(
label="Has Previous Offer"
)
submit_btn = gr.Button("Predict")
with gr.Column():
output = gr.Textbox(label="Prediction")
submit_btn.click(
fn=predict,
inputs=[skills, experience, grades, projects, extra, offer],
outputs=[output]
)
if __name__ == "__main__":
demo.launch()