Spaces:
Build error
Build error
Upload 3 files
Browse files- app_test.py +142 -0
- requirements.txt +153 -3
- salary_model_cloud.pkl +3 -0
app_test.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# streamlit_salary_app.py
|
| 2 |
+
|
| 3 |
+
import streamlit as st
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import numpy as np
|
| 6 |
+
import joblib
|
| 7 |
+
import os
|
| 8 |
+
import shap
|
| 9 |
+
|
| 10 |
+
# -------------------------
|
| 11 |
+
# 1️⃣ Load model pipeline safely
|
| 12 |
+
# -------------------------
|
| 13 |
+
import cloudpickle
|
| 14 |
+
import os
|
| 15 |
+
|
| 16 |
+
script_dir = os.path.dirname(__file__) # folder where this script is
|
| 17 |
+
model_path = os.path.join(script_dir, "salary_model_cloud.pkl") # use the new cloudpickle file
|
| 18 |
+
|
| 19 |
+
try:
|
| 20 |
+
with open(model_path, "rb") as f:
|
| 21 |
+
model_pipeline = cloudpickle.load(f)
|
| 22 |
+
st.success("Model loaded successfully!")
|
| 23 |
+
except FileNotFoundError:
|
| 24 |
+
st.error(f"Model file not found at {model_path}. Make sure it exists.")
|
| 25 |
+
st.stop()
|
| 26 |
+
except Exception as e:
|
| 27 |
+
st.error(f"Error loading the model: {e}")
|
| 28 |
+
st.stop()
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
# -------------------------
|
| 32 |
+
# 2️⃣ App title and description
|
| 33 |
+
# -------------------------
|
| 34 |
+
st.title("💰 Developer Salary Calculator")
|
| 35 |
+
st.markdown("""
|
| 36 |
+
Estimate a developer's expected salary based on their profile.
|
| 37 |
+
Adjust the inputs in the sidebar to see predictions and explanations.
|
| 38 |
+
""")
|
| 39 |
+
|
| 40 |
+
# -------------------------
|
| 41 |
+
# 3️⃣ Sidebar for user input
|
| 42 |
+
# -------------------------
|
| 43 |
+
st.sidebar.header("Profile Inputs")
|
| 44 |
+
|
| 45 |
+
age_group = st.sidebar.number_input("Age Group", min_value=0, max_value=10, value=1)
|
| 46 |
+
years_code_pro = st.sidebar.number_input("Years of Coding Experience", min_value=0, max_value=50, value=3)
|
| 47 |
+
remote_work = st.sidebar.selectbox("Remote Work", ["In-person", "Hybrid (some remote, some in-person)", "Remote"])
|
| 48 |
+
ed_level = st.sidebar.selectbox("Education Level", [
|
| 49 |
+
"Some college/university study without earning a degree",
|
| 50 |
+
"Associate degree (A.A., A.S., etc.)",
|
| 51 |
+
"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",
|
| 52 |
+
"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",
|
| 53 |
+
"Professional degree (JD, MD, Ph.D, Ed.D, etc.)"
|
| 54 |
+
])
|
| 55 |
+
dev_type = st.sidebar.selectbox("Developer Type", [
|
| 56 |
+
"Developer, back-end",
|
| 57 |
+
"Developer, full-stack",
|
| 58 |
+
"Developer, AI",
|
| 59 |
+
"Engineering manager",
|
| 60 |
+
"Data scientist or machine learning specialist"
|
| 61 |
+
])
|
| 62 |
+
org_size = st.sidebar.selectbox("Organization Size", [
|
| 63 |
+
"1 to 9 employees", "10 to 19", "20 to 99", "100 to 499", "500 to 999",
|
| 64 |
+
"1,000 to 4,999", "5,000 to 9,999", "10,000+"
|
| 65 |
+
])
|
| 66 |
+
country = st.sidebar.selectbox("Country", [
|
| 67 |
+
"Germany", "Netherlands", "Portugal", "Italy", "Croatia", "Belgium", "Austria"
|
| 68 |
+
])
|
| 69 |
+
so_account = st.sidebar.selectbox("Has Stack Overflow Account?", [True, False])
|
| 70 |
+
ai_select = st.sidebar.selectbox("Interested in AI?", [True, False])
|
| 71 |
+
|
| 72 |
+
# -------------------------
|
| 73 |
+
# 4️⃣ Prepare input DataFrame
|
| 74 |
+
# -------------------------
|
| 75 |
+
input_df = pd.DataFrame([{
|
| 76 |
+
"age_group": age_group,
|
| 77 |
+
"years_code_pro": years_code_pro,
|
| 78 |
+
"remote_work": remote_work,
|
| 79 |
+
"ed_level": ed_level,
|
| 80 |
+
"dev_type": dev_type,
|
| 81 |
+
"org_size": org_size,
|
| 82 |
+
"country": country,
|
| 83 |
+
"so_account": so_account,
|
| 84 |
+
"ai_select": ai_select
|
| 85 |
+
}])
|
| 86 |
+
|
| 87 |
+
# -------------------------
|
| 88 |
+
# 5️⃣ Make prediction
|
| 89 |
+
# -------------------------
|
| 90 |
+
predicted_salary = model_pipeline.predict(input_df)[0]
|
| 91 |
+
|
| 92 |
+
# Approximate expected range for tree-based models
|
| 93 |
+
try:
|
| 94 |
+
if hasattr(model_pipeline.named_steps['regressor'], 'estimators_'):
|
| 95 |
+
preds_per_tree = np.array([tree.predict(model_pipeline.named_steps['preprocessor'].transform(input_df))
|
| 96 |
+
for tree in model_pipeline.named_steps['regressor'].estimators_])
|
| 97 |
+
lower = np.percentile(preds_per_tree, 5)
|
| 98 |
+
upper = np.percentile(preds_per_tree, 95)
|
| 99 |
+
else:
|
| 100 |
+
lower, upper = predicted_salary*0.9, predicted_salary*1.1 # fallback ±10%
|
| 101 |
+
except:
|
| 102 |
+
lower, upper = predicted_salary*0.9, predicted_salary*1.1
|
| 103 |
+
|
| 104 |
+
st.subheader("Predicted Salary")
|
| 105 |
+
st.write(f"💶 Predicted salary: **€{predicted_salary:,.0f}**")
|
| 106 |
+
st.write(f"🔹 Approximate expected range: €{lower:,.0f} – €{upper:,.0f}")
|
| 107 |
+
|
| 108 |
+
# -------------------------
|
| 109 |
+
# 6️⃣ SHAP explanation table
|
| 110 |
+
# -------------------------
|
| 111 |
+
st.subheader("Feature Contribution (SHAP)")
|
| 112 |
+
|
| 113 |
+
prep = model_pipeline.named_steps["preprocessor"]
|
| 114 |
+
model = model_pipeline.named_steps.get("regressor")
|
| 115 |
+
|
| 116 |
+
X_shap = pd.DataFrame(prep.transform(input_df), columns=prep.get_feature_names_out())
|
| 117 |
+
|
| 118 |
+
explainer = shap.TreeExplainer(model)
|
| 119 |
+
shap_values = explainer.shap_values(X_shap)
|
| 120 |
+
|
| 121 |
+
# Get the correct SHAP row for regression
|
| 122 |
+
if isinstance(shap_values, list):
|
| 123 |
+
shap_vals_row = shap_values[0][0] # fallback if list
|
| 124 |
+
else:
|
| 125 |
+
shap_vals_row = shap_values[0] if shap_values.ndim > 1 else shap_values
|
| 126 |
+
|
| 127 |
+
shap_df = pd.DataFrame({
|
| 128 |
+
"feature": X_shap.columns,
|
| 129 |
+
"feature_value": X_shap.iloc[0, :].values,
|
| 130 |
+
"shap_value": shap_vals_row,
|
| 131 |
+
"abs_shap": np.abs(shap_vals_row)
|
| 132 |
+
}).sort_values("abs_shap", ascending=False)
|
| 133 |
+
|
| 134 |
+
st.dataframe(shap_df)
|
| 135 |
+
|
| 136 |
+
# -------------------------
|
| 137 |
+
# 7️⃣ SHAP force plot
|
| 138 |
+
# -------------------------
|
| 139 |
+
st.subheader("SHAP Force Plot")
|
| 140 |
+
shap.initjs()
|
| 141 |
+
force_plot = shap.force_plot(explainer.expected_value, shap_vals_row, X_shap.iloc[0, :], matplotlib=True)
|
| 142 |
+
st.pyplot(force_plot)
|
requirements.txt
CHANGED
|
@@ -1,3 +1,153 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
adagio==0.2.6
|
| 2 |
+
aiohappyeyeballs==2.6.1
|
| 3 |
+
aiohttp==3.12.15
|
| 4 |
+
aiosignal==1.4.0
|
| 5 |
+
alembic==1.16.5
|
| 6 |
+
altair==5.5.0
|
| 7 |
+
annotated-types==0.7.0
|
| 8 |
+
anyio==4.11.0
|
| 9 |
+
appdirs==1.4.4
|
| 10 |
+
appnope==0.1.4
|
| 11 |
+
asttokens==3.0.0
|
| 12 |
+
async-timeout==5.0.1
|
| 13 |
+
attrs==25.3.0
|
| 14 |
+
blinker==1.9.0
|
| 15 |
+
branca==0.8.1
|
| 16 |
+
cachetools==6.2.0
|
| 17 |
+
certifi==2025.8.3
|
| 18 |
+
charset-normalizer==3.4.3
|
| 19 |
+
click==8.1.8
|
| 20 |
+
cloudpickle==3.1.1
|
| 21 |
+
cmdstanpy==1.2.5
|
| 22 |
+
colorlog==6.9.0
|
| 23 |
+
comm==0.2.3
|
| 24 |
+
contourpy==1.3.0
|
| 25 |
+
coreforecast==0.0.16
|
| 26 |
+
cycler==0.12.1
|
| 27 |
+
darts==0.37.1
|
| 28 |
+
debugpy==1.8.16
|
| 29 |
+
decorator==5.2.1
|
| 30 |
+
exceptiongroup==1.3.0
|
| 31 |
+
executing==2.2.1
|
| 32 |
+
fastapi==0.118.0
|
| 33 |
+
fastjsonschema==2.21.2
|
| 34 |
+
filelock==3.19.1
|
| 35 |
+
folium==0.20.0
|
| 36 |
+
fonttools==4.59.2
|
| 37 |
+
frozenlist==1.7.0
|
| 38 |
+
fs==2.4.16
|
| 39 |
+
fsspec==2025.9.0
|
| 40 |
+
fugue==0.9.1
|
| 41 |
+
geographiclib==2.1
|
| 42 |
+
geopy==2.4.1
|
| 43 |
+
gitdb==4.0.12
|
| 44 |
+
GitPython==3.1.45
|
| 45 |
+
handcalcs==1.9.0
|
| 46 |
+
holidays==0.81
|
| 47 |
+
idna==3.10
|
| 48 |
+
importlib_metadata==8.7.0
|
| 49 |
+
importlib_resources==6.5.2
|
| 50 |
+
innerscope==0.7.0
|
| 51 |
+
ipykernel==6.30.1
|
| 52 |
+
ipython==8.18.1
|
| 53 |
+
ipywidgets==8.1.7
|
| 54 |
+
jedi==0.19.2
|
| 55 |
+
Jinja2==3.1.6
|
| 56 |
+
joblib==1.5.2
|
| 57 |
+
jsonschema==4.25.1
|
| 58 |
+
jsonschema-specifications==2025.9.1
|
| 59 |
+
jupyter_client==8.6.3
|
| 60 |
+
jupyter_core==5.8.1
|
| 61 |
+
jupyterlab_widgets==3.0.15
|
| 62 |
+
kiwisolver==1.4.7
|
| 63 |
+
lightning-utilities==0.15.2
|
| 64 |
+
llvmlite==0.43.0
|
| 65 |
+
Mako==1.3.10
|
| 66 |
+
MarkupSafe==3.0.2
|
| 67 |
+
matplotlib==3.9.4
|
| 68 |
+
matplotlib-inline==0.1.7
|
| 69 |
+
mlxtend==0.23.4
|
| 70 |
+
more-itertools==10.8.0
|
| 71 |
+
mpmath==1.3.0
|
| 72 |
+
multidict==6.6.4
|
| 73 |
+
narwhals==2.4.0
|
| 74 |
+
nbformat==5.10.4
|
| 75 |
+
nest-asyncio==1.6.0
|
| 76 |
+
networkx==3.2.1
|
| 77 |
+
nfoursid==1.0.2
|
| 78 |
+
numba==0.60.0
|
| 79 |
+
numpy==2.0.2
|
| 80 |
+
optuna==4.5.0
|
| 81 |
+
packaging==25.0
|
| 82 |
+
pandas==2.3.2
|
| 83 |
+
parso==0.8.5
|
| 84 |
+
patsy==1.0.1
|
| 85 |
+
pexpect==4.9.0
|
| 86 |
+
pillow==11.3.0
|
| 87 |
+
platformdirs==4.4.0
|
| 88 |
+
plotly==6.3.0
|
| 89 |
+
prompt_toolkit==3.0.52
|
| 90 |
+
propcache==0.3.2
|
| 91 |
+
prophet==1.1.7
|
| 92 |
+
protobuf==6.32.0
|
| 93 |
+
psutil==7.0.0
|
| 94 |
+
ptyprocess==0.7.0
|
| 95 |
+
pure_eval==0.2.3
|
| 96 |
+
pyarrow==21.0.0
|
| 97 |
+
pydantic==2.11.9
|
| 98 |
+
pydantic_core==2.33.2
|
| 99 |
+
pydeck==0.9.1
|
| 100 |
+
Pygments==2.19.2
|
| 101 |
+
pynndescent==0.5.13
|
| 102 |
+
pyod==2.0.5
|
| 103 |
+
pyparsing==3.2.3
|
| 104 |
+
python-dateutil==2.9.0.post0
|
| 105 |
+
pytorch-lightning==2.5.2
|
| 106 |
+
pytz==2025.2
|
| 107 |
+
PyYAML==6.0.2
|
| 108 |
+
pyzmq==27.1.0
|
| 109 |
+
referencing==0.36.2
|
| 110 |
+
requests==2.32.5
|
| 111 |
+
rpds-py==0.27.1
|
| 112 |
+
scikit-learn==1.6.1
|
| 113 |
+
scipy==1.13.1
|
| 114 |
+
seaborn==0.13.2
|
| 115 |
+
shap==0.48.0
|
| 116 |
+
six==1.17.0
|
| 117 |
+
sklearn==0.0
|
| 118 |
+
slicer==0.0.8
|
| 119 |
+
smmap==5.0.2
|
| 120 |
+
sniffio==1.3.1
|
| 121 |
+
SQLAlchemy==2.0.43
|
| 122 |
+
stack-data==0.6.3
|
| 123 |
+
stanio==0.5.1
|
| 124 |
+
starlette==0.48.0
|
| 125 |
+
statsforecast==2.0.2
|
| 126 |
+
statsmodels==0.14.5
|
| 127 |
+
streamlit==1.49.1
|
| 128 |
+
sympy==1.14.0
|
| 129 |
+
tenacity==9.1.2
|
| 130 |
+
tensorboardX==2.6.4
|
| 131 |
+
threadpoolctl==3.6.0
|
| 132 |
+
toml==0.10.2
|
| 133 |
+
tomli==2.2.1
|
| 134 |
+
toolz==1.0.0
|
| 135 |
+
torch==2.8.0
|
| 136 |
+
torchmetrics==1.8.2
|
| 137 |
+
tornado==6.5.2
|
| 138 |
+
tqdm==4.67.1
|
| 139 |
+
traitlets==5.14.3
|
| 140 |
+
triad==0.9.8
|
| 141 |
+
typing-inspection==0.4.1
|
| 142 |
+
typing_extensions==4.15.0
|
| 143 |
+
tzdata==2025.2
|
| 144 |
+
umap-learn==0.5.9.post2
|
| 145 |
+
urllib3==2.5.0
|
| 146 |
+
utilsforecast==0.2.12
|
| 147 |
+
wcwidth==0.2.13
|
| 148 |
+
widgetsnbextension==4.0.14
|
| 149 |
+
xarray==2024.7.0
|
| 150 |
+
xgboost==2.1.4
|
| 151 |
+
xyzservices==2025.4.0
|
| 152 |
+
yarl==1.20.1
|
| 153 |
+
zipp==3.23.0
|
salary_model_cloud.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:441a5132ef5b8d64ee33983924d52ec1e0256dbede3fe130dea46ff7921232d3
|
| 3 |
+
size 20705117
|