Anders-sonderby commited on
Commit
2ef5509
·
verified ·
1 Parent(s): 1a64aec

Upload 3 files

Browse files
Files changed (3) hide show
  1. app_test.py +142 -0
  2. requirements.txt +153 -3
  3. salary_model_cloud.pkl +3 -0
app_test.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # streamlit_salary_app.py
2
+
3
+ import streamlit as st
4
+ import pandas as pd
5
+ import numpy as np
6
+ import joblib
7
+ import os
8
+ import shap
9
+
10
+ # -------------------------
11
+ # 1️⃣ Load model pipeline safely
12
+ # -------------------------
13
+ import cloudpickle
14
+ import os
15
+
16
+ script_dir = os.path.dirname(__file__) # folder where this script is
17
+ model_path = os.path.join(script_dir, "salary_model_cloud.pkl") # use the new cloudpickle file
18
+
19
+ try:
20
+ with open(model_path, "rb") as f:
21
+ model_pipeline = cloudpickle.load(f)
22
+ st.success("Model loaded successfully!")
23
+ except FileNotFoundError:
24
+ st.error(f"Model file not found at {model_path}. Make sure it exists.")
25
+ st.stop()
26
+ except Exception as e:
27
+ st.error(f"Error loading the model: {e}")
28
+ st.stop()
29
+
30
+
31
+ # -------------------------
32
+ # 2️⃣ App title and description
33
+ # -------------------------
34
+ st.title("💰 Developer Salary Calculator")
35
+ st.markdown("""
36
+ Estimate a developer's expected salary based on their profile.
37
+ Adjust the inputs in the sidebar to see predictions and explanations.
38
+ """)
39
+
40
+ # -------------------------
41
+ # 3️⃣ Sidebar for user input
42
+ # -------------------------
43
+ st.sidebar.header("Profile Inputs")
44
+
45
+ age_group = st.sidebar.number_input("Age Group", min_value=0, max_value=10, value=1)
46
+ years_code_pro = st.sidebar.number_input("Years of Coding Experience", min_value=0, max_value=50, value=3)
47
+ remote_work = st.sidebar.selectbox("Remote Work", ["In-person", "Hybrid (some remote, some in-person)", "Remote"])
48
+ ed_level = st.sidebar.selectbox("Education Level", [
49
+ "Some college/university study without earning a degree",
50
+ "Associate degree (A.A., A.S., etc.)",
51
+ "Bachelor’s degree (B.A., B.S., B.Eng., etc.)",
52
+ "Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",
53
+ "Professional degree (JD, MD, Ph.D, Ed.D, etc.)"
54
+ ])
55
+ dev_type = st.sidebar.selectbox("Developer Type", [
56
+ "Developer, back-end",
57
+ "Developer, full-stack",
58
+ "Developer, AI",
59
+ "Engineering manager",
60
+ "Data scientist or machine learning specialist"
61
+ ])
62
+ org_size = st.sidebar.selectbox("Organization Size", [
63
+ "1 to 9 employees", "10 to 19", "20 to 99", "100 to 499", "500 to 999",
64
+ "1,000 to 4,999", "5,000 to 9,999", "10,000+"
65
+ ])
66
+ country = st.sidebar.selectbox("Country", [
67
+ "Germany", "Netherlands", "Portugal", "Italy", "Croatia", "Belgium", "Austria"
68
+ ])
69
+ so_account = st.sidebar.selectbox("Has Stack Overflow Account?", [True, False])
70
+ ai_select = st.sidebar.selectbox("Interested in AI?", [True, False])
71
+
72
+ # -------------------------
73
+ # 4️⃣ Prepare input DataFrame
74
+ # -------------------------
75
+ input_df = pd.DataFrame([{
76
+ "age_group": age_group,
77
+ "years_code_pro": years_code_pro,
78
+ "remote_work": remote_work,
79
+ "ed_level": ed_level,
80
+ "dev_type": dev_type,
81
+ "org_size": org_size,
82
+ "country": country,
83
+ "so_account": so_account,
84
+ "ai_select": ai_select
85
+ }])
86
+
87
+ # -------------------------
88
+ # 5️⃣ Make prediction
89
+ # -------------------------
90
+ predicted_salary = model_pipeline.predict(input_df)[0]
91
+
92
+ # Approximate expected range for tree-based models
93
+ try:
94
+ if hasattr(model_pipeline.named_steps['regressor'], 'estimators_'):
95
+ preds_per_tree = np.array([tree.predict(model_pipeline.named_steps['preprocessor'].transform(input_df))
96
+ for tree in model_pipeline.named_steps['regressor'].estimators_])
97
+ lower = np.percentile(preds_per_tree, 5)
98
+ upper = np.percentile(preds_per_tree, 95)
99
+ else:
100
+ lower, upper = predicted_salary*0.9, predicted_salary*1.1 # fallback ±10%
101
+ except:
102
+ lower, upper = predicted_salary*0.9, predicted_salary*1.1
103
+
104
+ st.subheader("Predicted Salary")
105
+ st.write(f"💶 Predicted salary: **€{predicted_salary:,.0f}**")
106
+ st.write(f"🔹 Approximate expected range: €{lower:,.0f} – €{upper:,.0f}")
107
+
108
+ # -------------------------
109
+ # 6️⃣ SHAP explanation table
110
+ # -------------------------
111
+ st.subheader("Feature Contribution (SHAP)")
112
+
113
+ prep = model_pipeline.named_steps["preprocessor"]
114
+ model = model_pipeline.named_steps.get("regressor")
115
+
116
+ X_shap = pd.DataFrame(prep.transform(input_df), columns=prep.get_feature_names_out())
117
+
118
+ explainer = shap.TreeExplainer(model)
119
+ shap_values = explainer.shap_values(X_shap)
120
+
121
+ # Get the correct SHAP row for regression
122
+ if isinstance(shap_values, list):
123
+ shap_vals_row = shap_values[0][0] # fallback if list
124
+ else:
125
+ shap_vals_row = shap_values[0] if shap_values.ndim > 1 else shap_values
126
+
127
+ shap_df = pd.DataFrame({
128
+ "feature": X_shap.columns,
129
+ "feature_value": X_shap.iloc[0, :].values,
130
+ "shap_value": shap_vals_row,
131
+ "abs_shap": np.abs(shap_vals_row)
132
+ }).sort_values("abs_shap", ascending=False)
133
+
134
+ st.dataframe(shap_df)
135
+
136
+ # -------------------------
137
+ # 7️⃣ SHAP force plot
138
+ # -------------------------
139
+ st.subheader("SHAP Force Plot")
140
+ shap.initjs()
141
+ force_plot = shap.force_plot(explainer.expected_value, shap_vals_row, X_shap.iloc[0, :], matplotlib=True)
142
+ st.pyplot(force_plot)
requirements.txt CHANGED
@@ -1,3 +1,153 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adagio==0.2.6
2
+ aiohappyeyeballs==2.6.1
3
+ aiohttp==3.12.15
4
+ aiosignal==1.4.0
5
+ alembic==1.16.5
6
+ altair==5.5.0
7
+ annotated-types==0.7.0
8
+ anyio==4.11.0
9
+ appdirs==1.4.4
10
+ appnope==0.1.4
11
+ asttokens==3.0.0
12
+ async-timeout==5.0.1
13
+ attrs==25.3.0
14
+ blinker==1.9.0
15
+ branca==0.8.1
16
+ cachetools==6.2.0
17
+ certifi==2025.8.3
18
+ charset-normalizer==3.4.3
19
+ click==8.1.8
20
+ cloudpickle==3.1.1
21
+ cmdstanpy==1.2.5
22
+ colorlog==6.9.0
23
+ comm==0.2.3
24
+ contourpy==1.3.0
25
+ coreforecast==0.0.16
26
+ cycler==0.12.1
27
+ darts==0.37.1
28
+ debugpy==1.8.16
29
+ decorator==5.2.1
30
+ exceptiongroup==1.3.0
31
+ executing==2.2.1
32
+ fastapi==0.118.0
33
+ fastjsonschema==2.21.2
34
+ filelock==3.19.1
35
+ folium==0.20.0
36
+ fonttools==4.59.2
37
+ frozenlist==1.7.0
38
+ fs==2.4.16
39
+ fsspec==2025.9.0
40
+ fugue==0.9.1
41
+ geographiclib==2.1
42
+ geopy==2.4.1
43
+ gitdb==4.0.12
44
+ GitPython==3.1.45
45
+ handcalcs==1.9.0
46
+ holidays==0.81
47
+ idna==3.10
48
+ importlib_metadata==8.7.0
49
+ importlib_resources==6.5.2
50
+ innerscope==0.7.0
51
+ ipykernel==6.30.1
52
+ ipython==8.18.1
53
+ ipywidgets==8.1.7
54
+ jedi==0.19.2
55
+ Jinja2==3.1.6
56
+ joblib==1.5.2
57
+ jsonschema==4.25.1
58
+ jsonschema-specifications==2025.9.1
59
+ jupyter_client==8.6.3
60
+ jupyter_core==5.8.1
61
+ jupyterlab_widgets==3.0.15
62
+ kiwisolver==1.4.7
63
+ lightning-utilities==0.15.2
64
+ llvmlite==0.43.0
65
+ Mako==1.3.10
66
+ MarkupSafe==3.0.2
67
+ matplotlib==3.9.4
68
+ matplotlib-inline==0.1.7
69
+ mlxtend==0.23.4
70
+ more-itertools==10.8.0
71
+ mpmath==1.3.0
72
+ multidict==6.6.4
73
+ narwhals==2.4.0
74
+ nbformat==5.10.4
75
+ nest-asyncio==1.6.0
76
+ networkx==3.2.1
77
+ nfoursid==1.0.2
78
+ numba==0.60.0
79
+ numpy==2.0.2
80
+ optuna==4.5.0
81
+ packaging==25.0
82
+ pandas==2.3.2
83
+ parso==0.8.5
84
+ patsy==1.0.1
85
+ pexpect==4.9.0
86
+ pillow==11.3.0
87
+ platformdirs==4.4.0
88
+ plotly==6.3.0
89
+ prompt_toolkit==3.0.52
90
+ propcache==0.3.2
91
+ prophet==1.1.7
92
+ protobuf==6.32.0
93
+ psutil==7.0.0
94
+ ptyprocess==0.7.0
95
+ pure_eval==0.2.3
96
+ pyarrow==21.0.0
97
+ pydantic==2.11.9
98
+ pydantic_core==2.33.2
99
+ pydeck==0.9.1
100
+ Pygments==2.19.2
101
+ pynndescent==0.5.13
102
+ pyod==2.0.5
103
+ pyparsing==3.2.3
104
+ python-dateutil==2.9.0.post0
105
+ pytorch-lightning==2.5.2
106
+ pytz==2025.2
107
+ PyYAML==6.0.2
108
+ pyzmq==27.1.0
109
+ referencing==0.36.2
110
+ requests==2.32.5
111
+ rpds-py==0.27.1
112
+ scikit-learn==1.6.1
113
+ scipy==1.13.1
114
+ seaborn==0.13.2
115
+ shap==0.48.0
116
+ six==1.17.0
117
+ sklearn==0.0
118
+ slicer==0.0.8
119
+ smmap==5.0.2
120
+ sniffio==1.3.1
121
+ SQLAlchemy==2.0.43
122
+ stack-data==0.6.3
123
+ stanio==0.5.1
124
+ starlette==0.48.0
125
+ statsforecast==2.0.2
126
+ statsmodels==0.14.5
127
+ streamlit==1.49.1
128
+ sympy==1.14.0
129
+ tenacity==9.1.2
130
+ tensorboardX==2.6.4
131
+ threadpoolctl==3.6.0
132
+ toml==0.10.2
133
+ tomli==2.2.1
134
+ toolz==1.0.0
135
+ torch==2.8.0
136
+ torchmetrics==1.8.2
137
+ tornado==6.5.2
138
+ tqdm==4.67.1
139
+ traitlets==5.14.3
140
+ triad==0.9.8
141
+ typing-inspection==0.4.1
142
+ typing_extensions==4.15.0
143
+ tzdata==2025.2
144
+ umap-learn==0.5.9.post2
145
+ urllib3==2.5.0
146
+ utilsforecast==0.2.12
147
+ wcwidth==0.2.13
148
+ widgetsnbextension==4.0.14
149
+ xarray==2024.7.0
150
+ xgboost==2.1.4
151
+ xyzservices==2025.4.0
152
+ yarl==1.20.1
153
+ zipp==3.23.0
salary_model_cloud.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:441a5132ef5b8d64ee33983924d52ec1e0256dbede3fe130dea46ff7921232d3
3
+ size 20705117