File size: 5,367 Bytes
4b6046e
bcb086a
 
 
 
 
 
8fdb1a1
4b6046e
bcb086a
8fdb1a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4b6046e
8fdb1a1
 
 
 
 
 
 
 
 
 
bcb086a
8fdb1a1
bcb086a
 
 
 
 
 
8fdb1a1
4b6046e
 
 
 
bcb086a
8fdb1a1
4b6046e
8fdb1a1
 
bcb086a
 
 
 
4b6046e
bcb086a
4b6046e
 
8fdb1a1
4b6046e
 
8fdb1a1
 
 
 
 
4b6046e
bcb086a
8fdb1a1
bcb086a
4b6046e
8fdb1a1
 
 
 
 
bcb086a
4b6046e
 
 
 
 
8fdb1a1
4b6046e
8fdb1a1
 
4b6046e
8fdb1a1
 
 
 
 
4b6046e
8fdb1a1
4b6046e
 
 
 
8fdb1a1
4b6046e
bcb086a
8fdb1a1
bcb086a
4b6046e
 
 
 
8fdb1a1
 
 
4b6046e
bcb086a
8fdb1a1
 
bcb086a
4b6046e
8fdb1a1
4b6046e
 
bcb086a
 
8fdb1a1
4b6046e
 
 
8fdb1a1
 
4b6046e
8fdb1a1
4b6046e
 
 
 
 
 
 
8fdb1a1
4b6046e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import pickle 
import pandas as pd
import shap
import gradio as gr
import numpy as np
import matplotlib.pyplot as plt

# Load the model
loaded_model = pickle.load(open("salar_xgb_team.pkl", 'rb'))

# SHAP setup
explainer = shap.Explainer(loaded_model)  # DO NOT CHANGE

# Education mapping
education_map = {
    "Less than 1st grade": 1,
    "1st–4th grade": 2,
    "5th–6th grade": 3,
    "7th–8th grade": 4,
    "9th grade": 5,
    "10th grade": 6,
    "11th grade": 7,
    "12th grade (no diploma)": 8,
    "High School Grad": 9,
    "Some College": 10,
    "Associate's Degree (Voc)": 11,
    "Associate's Degree (Acad)": 12,
    "Bachelor's Degree": 13,
    "Master's Degree": 14,
    "Professional School": 15,
    "Doctorate": 16
}

# Main model logic
def main_func(age, education_level, sex, capital_gain, capital_loss, hours_per_week):
    education_num = education_map[education_level]
    sex = 1 if sex == "Female" else 0

    new_row = pd.DataFrame.from_dict({
        'age': age,
        'education-num': education_num,
        'sex': sex,
        'capital-gain': capital_gain,
        'capital-loss': capital_loss,
        'hours-per-week': hours_per_week
    }, orient='index').transpose()

    prob = loaded_model.predict_proba(new_row)

    shap_values = explainer(new_row)
    plot = shap.plots.bar(shap_values[0], max_display=6, order=shap.Explanation.abs, show_data='auto', show=False)

    plt.tight_layout()
    local_plot = plt.gcf()
    plt.close()

    return {
        "Chance of Earning > $50K": float(prob[0][1]),
        "Chance of Earning ≤ $50K": float(prob[0][0])
    }, local_plot

# Gradio UI
title = "**Household Income Predictor** 💰"
description1 = """This app uses your input to predict whether a household earns more or less than $50K per year."""
description2 = """Adjust the values below or select a sample profile, then click 'Analyze' to see the prediction and feature impact."""

with gr.Blocks(title=title) as demo:
    gr.Markdown(f"## {title}")
    gr.Markdown(description1)
    gr.Markdown("---")
    gr.Markdown(description2)
    gr.Markdown("---")

    # Sample profile dropdown
    scenario = gr.Dropdown(
        ["Select a Sample", 
         "👨‍💻 Young Tech Worker: 28 yrs, Bachelor's, 45 hrs/week", 
         "👵 Retired Part-Timer: 65 yrs, no college, 20 hrs/week", 
         "👩‍🏫 Mid-Career Teacher: 42 yrs, Master's, 38 hrs/week", 
         "👨‍🔧 Manual Laborer: 50 yrs, High School Grad, 60 hrs/week"],
        label="📋 Choose a Sample Profile (optional — autofills values to explore common cases)"
    )

    # Inputs
    with gr.Row():
        age = gr.Number(label="🧓 Age", value=35)
        education_level = gr.Dropdown(
            list(education_map.keys()),
            label="🎓 Education Level",
            value="Some College"
        )
    with gr.Row():
        sex = gr.Radio(["Male", "Female"], label="🧍 Sex")
        capital_gain = gr.Number(label="📈 Capital Gain", value=0)
        capital_loss = gr.Number(label="📉 Capital Loss", value=0)
        hours_per_week = gr.Number(label="⏱ Hours per Week", value=40)

    # Handle preset scenario changes
    def fill_scenario(scenario_choice):
        if scenario_choice == "👨‍💻 Young Tech Worker: 28 yrs, Bachelor's, 45 hrs/week":
            return [28, "Bachelor's Degree", "Male", 0, 0, 45]
        elif scenario_choice == "👵 Retired Part-Timer: 65 yrs, no college, 20 hrs/week":
            return [65, "9th grade", "Female", 0, 0, 20]
        elif scenario_choice == "👩‍🏫 Mid-Career Teacher: 42 yrs, Master's, 38 hrs/week":
            return [42, "Master's Degree", "Female", 0, 0, 38]
        elif scenario_choice == "👨‍🔧 Manual Laborer: 50 yrs, High School Grad, 60 hrs/week":
            return [50, "High School Grad", "Male", 0, 0, 60]
        else:
            return [35, "Some College", "Male", 0, 0, 40]

    scenario.change(
        fn=fill_scenario,
        inputs=[scenario],
        outputs=[age, education_level, sex, capital_gain, capital_loss, hours_per_week]
    )

    # Outputs
    with gr.Column(visible=True) as output_col:
        label = gr.Label(label="🧠 Predicted Income")
        confidence = gr.Slider(0, 100, value=50, label="📊 Confidence in > $50K", interactive=False)
        local_plot = gr.Plot(label="🔍 Top SHAP Features")

    # Wrapped function for UI
    def wrapped_main(age, education_level, sex, capital_gain, capital_loss, hours_per_week):
        result, shap_plot = main_func(age, education_level, sex, capital_gain, capital_loss, hours_per_week)
        return result, float(result["Chance of Earning > $50K"]) * 100, shap_plot

    # Button
    submit_btn = gr.Button("🔎 Analyze")
    submit_btn.click(
        wrapped_main,
        [age, education_level, sex, capital_gain, capital_loss, hours_per_week],
        [label, confidence, local_plot],
        api_name="Salary_Predictor"
    )

    # Examples
    gr.Markdown("### 🧪 Try Some Examples:")
    gr.Examples(
        [
            [28, "Bachelor's Degree", "Male", 0, 0, 45],
            [60, "9th grade", "Female", 0, 0, 25]
        ],
        [age, education_level, sex, capital_gain, capital_loss, hours_per_week],
        [label, confidence, local_plot],
        wrapped_main,
        cache_examples=True
    )

demo.launch()