Spaces:

Anders-sonderby
/

developer_salary_test

Build error

App Files Files Community

Anders-sonderby commited on Oct 8, 2025

Commit

2ef5509

verified ·

1 Parent(s): 1a64aec

Upload 3 files

Browse files

Files changed (3) hide show

app_test.py +142 -0
requirements.txt +153 -3
salary_model_cloud.pkl +3 -0

app_test.py ADDED Viewed

	@@ -0,0 +1,142 @@

+# streamlit_salary_app.py
+import streamlit as st
+import pandas as pd
+import numpy as np
+import joblib
+import os
+import shap
+# -------------------------
+# 1️⃣ Load model pipeline safely
+# -------------------------
+import cloudpickle
+import os
+script_dir = os.path.dirname(__file__)  # folder where this script is
+model_path = os.path.join(script_dir, "salary_model_cloud.pkl")  # use the new cloudpickle file
+try:
+    with open(model_path, "rb") as f:
+        model_pipeline = cloudpickle.load(f)
+    st.success("Model loaded successfully!")
+except FileNotFoundError:
+    st.error(f"Model file not found at {model_path}. Make sure it exists.")
+    st.stop()
+except Exception as e:
+    st.error(f"Error loading the model: {e}")
+    st.stop()
+# -------------------------
+# 2️⃣ App title and description
+# -------------------------
+st.title("💰 Developer Salary Calculator")
+st.markdown("""
+Estimate a developer's expected salary based on their profile.
+Adjust the inputs in the sidebar to see predictions and explanations.
+""")
+# -------------------------
+# 3️⃣ Sidebar for user input
+# -------------------------
+st.sidebar.header("Profile Inputs")
+age_group = st.sidebar.number_input("Age Group", min_value=0, max_value=10, value=1)
+years_code_pro = st.sidebar.number_input("Years of Coding Experience", min_value=0, max_value=50, value=3)
+remote_work = st.sidebar.selectbox("Remote Work", ["In-person", "Hybrid (some remote, some in-person)", "Remote"])
+ed_level = st.sidebar.selectbox("Education Level", [
+    "Some college/university study without earning a degree",
+    "Associate degree (A.A., A.S., etc.)",
+    "Bachelor’s degree (B.A., B.S., B.Eng., etc.)",
+    "Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",
+    "Professional degree (JD, MD, Ph.D, Ed.D, etc.)"
+])
+dev_type = st.sidebar.selectbox("Developer Type", [
+    "Developer, back-end",
+    "Developer, full-stack",
+    "Developer, AI",
+    "Engineering manager",
+    "Data scientist or machine learning specialist"
+])
+org_size = st.sidebar.selectbox("Organization Size", [
+    "1 to 9 employees", "10 to 19", "20 to 99", "100 to 499", "500 to 999",
+    "1,000 to 4,999", "5,000 to 9,999", "10,000+"
+])
+country = st.sidebar.selectbox("Country", [
+    "Germany", "Netherlands", "Portugal", "Italy", "Croatia", "Belgium", "Austria"
+])
+so_account = st.sidebar.selectbox("Has Stack Overflow Account?", [True, False])
+ai_select = st.sidebar.selectbox("Interested in AI?", [True, False])
+# -------------------------
+# 4️⃣ Prepare input DataFrame
+# -------------------------
+input_df = pd.DataFrame([{
+    "age_group": age_group,
+    "years_code_pro": years_code_pro,
+    "remote_work": remote_work,
+    "ed_level": ed_level,
+    "dev_type": dev_type,
+    "org_size": org_size,
+    "country": country,
+    "so_account": so_account,
+    "ai_select": ai_select
+}])
+# -------------------------
+# 5️⃣ Make prediction
+# -------------------------
+predicted_salary = model_pipeline.predict(input_df)[0]
+# Approximate expected range for tree-based models
+try:
+    if hasattr(model_pipeline.named_steps['regressor'], 'estimators_'):
+        preds_per_tree = np.array([tree.predict(model_pipeline.named_steps['preprocessor'].transform(input_df))
+                                   for tree in model_pipeline.named_steps['regressor'].estimators_])
+        lower = np.percentile(preds_per_tree, 5)
+        upper = np.percentile(preds_per_tree, 95)
+    else:
+        lower, upper = predicted_salary*0.9, predicted_salary*1.1  # fallback ±10%
+except:
+    lower, upper = predicted_salary*0.9, predicted_salary*1.1
+st.subheader("Predicted Salary")
+st.write(f"💶 Predicted salary: **€{predicted_salary:,.0f}**")
+st.write(f"🔹 Approximate expected range: €{lower:,.0f} – €{upper:,.0f}")
+# -------------------------
+# 6️⃣ SHAP explanation table
+# -------------------------
+st.subheader("Feature Contribution (SHAP)")
+prep = model_pipeline.named_steps["preprocessor"]
+model = model_pipeline.named_steps.get("regressor")
+X_shap = pd.DataFrame(prep.transform(input_df), columns=prep.get_feature_names_out())
+explainer = shap.TreeExplainer(model)
+shap_values = explainer.shap_values(X_shap)
+# Get the correct SHAP row for regression
+if isinstance(shap_values, list):
+    shap_vals_row = shap_values[0][0]  # fallback if list
+else:
+    shap_vals_row = shap_values[0] if shap_values.ndim > 1 else shap_values
+shap_df = pd.DataFrame({
+    "feature": X_shap.columns,
+    "feature_value": X_shap.iloc[0, :].values,
+    "shap_value": shap_vals_row,
+    "abs_shap": np.abs(shap_vals_row)
+}).sort_values("abs_shap", ascending=False)
+st.dataframe(shap_df)
+# -------------------------
+# 7️⃣ SHAP force plot
+# -------------------------
+st.subheader("SHAP Force Plot")
+shap.initjs()
+force_plot = shap.force_plot(explainer.expected_value, shap_vals_row, X_shap.iloc[0, :], matplotlib=True)
+st.pyplot(force_plot)

requirements.txt CHANGED Viewed

@@ -1,3 +1,153 @@
-altair
-pandas
-streamlit

+adagio==0.2.6
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiosignal==1.4.0
+alembic==1.16.5
+altair==5.5.0
+annotated-types==0.7.0
+anyio==4.11.0
+appdirs==1.4.4
+appnope==0.1.4
+asttokens==3.0.0
+async-timeout==5.0.1
+attrs==25.3.0
+blinker==1.9.0
+branca==0.8.1
+cachetools==6.2.0
+certifi==2025.8.3
+charset-normalizer==3.4.3
+click==8.1.8
+cloudpickle==3.1.1
+cmdstanpy==1.2.5
+colorlog==6.9.0
+comm==0.2.3
+contourpy==1.3.0
+coreforecast==0.0.16
+cycler==0.12.1
+darts==0.37.1
+debugpy==1.8.16
+decorator==5.2.1
+exceptiongroup==1.3.0
+executing==2.2.1
+fastapi==0.118.0
+fastjsonschema==2.21.2
+filelock==3.19.1
+folium==0.20.0
+fonttools==4.59.2
+frozenlist==1.7.0
+fs==2.4.16
+fsspec==2025.9.0
+fugue==0.9.1
+geographiclib==2.1
+geopy==2.4.1
+gitdb==4.0.12
+GitPython==3.1.45
+handcalcs==1.9.0
+holidays==0.81
+idna==3.10
+importlib_metadata==8.7.0
+importlib_resources==6.5.2
+innerscope==0.7.0
+ipykernel==6.30.1
+ipython==8.18.1
+ipywidgets==8.1.7
+jedi==0.19.2
+Jinja2==3.1.6
+joblib==1.5.2
+jsonschema==4.25.1
+jsonschema-specifications==2025.9.1
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.7
+lightning-utilities==0.15.2
+llvmlite==0.43.0
+Mako==1.3.10
+MarkupSafe==3.0.2
+matplotlib==3.9.4
+matplotlib-inline==0.1.7
+mlxtend==0.23.4
+more-itertools==10.8.0
+mpmath==1.3.0
+multidict==6.6.4
+narwhals==2.4.0
+nbformat==5.10.4
+nest-asyncio==1.6.0
+networkx==3.2.1
+nfoursid==1.0.2
+numba==0.60.0
+numpy==2.0.2
+optuna==4.5.0
+packaging==25.0
+pandas==2.3.2
+parso==0.8.5
+patsy==1.0.1
+pexpect==4.9.0
+pillow==11.3.0
+platformdirs==4.4.0
+plotly==6.3.0
+prompt_toolkit==3.0.52
+propcache==0.3.2
+prophet==1.1.7
+protobuf==6.32.0
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pyarrow==21.0.0
+pydantic==2.11.9
+pydantic_core==2.33.2
+pydeck==0.9.1
+Pygments==2.19.2
+pynndescent==0.5.13
+pyod==2.0.5
+pyparsing==3.2.3
+python-dateutil==2.9.0.post0
+pytorch-lightning==2.5.2
+pytz==2025.2
+PyYAML==6.0.2
+pyzmq==27.1.0
+referencing==0.36.2
+requests==2.32.5
+rpds-py==0.27.1
+scikit-learn==1.6.1
+scipy==1.13.1
+seaborn==0.13.2
+shap==0.48.0
+six==1.17.0
+sklearn==0.0
+slicer==0.0.8
+smmap==5.0.2
+sniffio==1.3.1
+SQLAlchemy==2.0.43
+stack-data==0.6.3
+stanio==0.5.1
+starlette==0.48.0
+statsforecast==2.0.2
+statsmodels==0.14.5
+streamlit==1.49.1
+sympy==1.14.0
+tenacity==9.1.2
+tensorboardX==2.6.4
+threadpoolctl==3.6.0
+toml==0.10.2
+tomli==2.2.1
+toolz==1.0.0
+torch==2.8.0
+torchmetrics==1.8.2
+tornado==6.5.2
+tqdm==4.67.1
+traitlets==5.14.3
+triad==0.9.8
+typing-inspection==0.4.1
+typing_extensions==4.15.0
+tzdata==2025.2
+umap-learn==0.5.9.post2
+urllib3==2.5.0
+utilsforecast==0.2.12
+wcwidth==0.2.13
+widgetsnbextension==4.0.14
+xarray==2024.7.0
+xgboost==2.1.4
+xyzservices==2025.4.0
+yarl==1.20.1
+zipp==3.23.0

salary_model_cloud.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:441a5132ef5b8d64ee33983924d52ec1e0256dbede3fe130dea46ff7921232d3
+size 20705117