|
|
|
|
|
"""Untitled8.ipynb |
|
|
|
|
|
Automatically generated by Colab. |
|
|
|
|
|
Original file is located at |
|
|
https://colab.research.google.com/drive/1SnoorFAucvS1FXD1vzyJnJ-_hoZUfJ_u |
|
|
""" |
|
|
|
|
|
import streamlit as st |
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
import plotly.express as px |
|
|
|
|
|
|
|
|
st.set_page_config( |
|
|
page_title="Developer Salary Explorer", |
|
|
page_icon="π»", |
|
|
layout="wide" |
|
|
) |
|
|
|
|
|
@st.cache_data |
|
|
def load_data(): |
|
|
"""Load and preprocess the Stack Overflow survey data""" |
|
|
try: |
|
|
df = pd.read_csv('stackoverflow_survey_single_response.txt') |
|
|
|
|
|
|
|
|
df_clean = df[df['converted_comp_yearly'].notna()].copy() |
|
|
df_clean = df_clean[df_clean['converted_comp_yearly'] > 1000] |
|
|
|
|
|
|
|
|
for col in ['years_code', 'years_code_pro', 'age']: |
|
|
df_clean[col] = pd.to_numeric(df_clean[col], errors='coerce') |
|
|
df_clean[col] = df_clean[col].fillna(df_clean[col].median()) |
|
|
|
|
|
|
|
|
df_clean['experience_level'] = pd.cut( |
|
|
df_clean['years_code_pro'], |
|
|
bins=[0, 2, 5, 10, 50], |
|
|
labels=['Junior (0-2 yrs)', 'Mid (3-5 yrs)', 'Senior (6-10 yrs)', 'Expert (10+ yrs)'] |
|
|
) |
|
|
|
|
|
|
|
|
top_countries = ['United States of America', 'United Kingdom of Great Britain and Northern Ireland', |
|
|
'Germany', 'India', 'Canada', 'France', 'Australia'] |
|
|
df_clean['country'] = df_clean['country'].apply( |
|
|
lambda x: x if x in top_countries else 'Other' |
|
|
) |
|
|
|
|
|
|
|
|
education_map = { |
|
|
1: 'Less than Bachelor', |
|
|
2: 'Bachelor\'s Degree', |
|
|
3: 'Master\'s Degree', |
|
|
4: 'Doctoral Degree', |
|
|
5: 'Professional Degree' |
|
|
} |
|
|
df_clean['education_level'] = df_clean['ed_level'].map(education_map) |
|
|
df_clean['education_level'] = df_clean['education_level'].fillna('Other') |
|
|
|
|
|
return df_clean |
|
|
|
|
|
except Exception as e: |
|
|
st.error(f"Error loading data: {str(e)}") |
|
|
return pd.DataFrame() |
|
|
|
|
|
def main(): |
|
|
st.title("π» Developer Salary Explorer") |
|
|
st.markdown("Explore how country, education, and experience influence developer salaries worldwide.") |
|
|
|
|
|
|
|
|
df = load_data() |
|
|
|
|
|
if df.empty: |
|
|
st.error("No data loaded. Please check your data file.") |
|
|
return |
|
|
|
|
|
st.sidebar.header("π Filter Data") |
|
|
|
|
|
|
|
|
countries = sorted(df['country'].unique()) |
|
|
selected_countries = st.sidebar.multiselect( |
|
|
"Select Countries:", |
|
|
options=countries, |
|
|
default=countries[:3] |
|
|
) |
|
|
|
|
|
|
|
|
education_levels = sorted(df['education_level'].unique()) |
|
|
selected_education = st.sidebar.multiselect( |
|
|
"Select Education Levels:", |
|
|
options=education_levels, |
|
|
default=education_levels |
|
|
) |
|
|
|
|
|
|
|
|
min_exp, max_exp = st.sidebar.slider( |
|
|
"Years of Professional Experience:", |
|
|
min_value=int(df['years_code_pro'].min()), |
|
|
max_value=int(min(df['years_code_pro'].max(), 40)), |
|
|
value=(0, 15) |
|
|
) |
|
|
|
|
|
|
|
|
filtered_df = df[ |
|
|
(df['country'].isin(selected_countries)) & |
|
|
(df['education_level'].isin(selected_education)) & |
|
|
(df['years_code_pro'] >= min_exp) & |
|
|
(df['years_code_pro'] <= max_exp) |
|
|
] |
|
|
|
|
|
|
|
|
st.header("π Key Metrics") |
|
|
|
|
|
col1, col2, col3, col4 = st.columns(4) |
|
|
|
|
|
with col1: |
|
|
median_salary = filtered_df['converted_comp_yearly'].median() |
|
|
st.metric("Median Salary", f"${median_salary:,.0f}") |
|
|
|
|
|
with col2: |
|
|
avg_salary = filtered_df['converted_comp_yearly'].mean() |
|
|
st.metric("Average Salary", f"${avg_salary:,.0f}") |
|
|
|
|
|
with col3: |
|
|
sample_size = len(filtered_df) |
|
|
st.metric("Sample Size", f"{sample_size:,}") |
|
|
|
|
|
with col4: |
|
|
salary_range = f"${filtered_df['converted_comp_yearly'].min():,.0f} - ${filtered_df['converted_comp_yearly'].max():,.0f}" |
|
|
st.metric("Salary Range", salary_range) |
|
|
|
|
|
if sample_size == 0: |
|
|
st.warning("No data matches your filters. Please adjust your selection.") |
|
|
return |
|
|
|
|
|
|
|
|
st.header("π Salary Analysis") |
|
|
|
|
|
|
|
|
st.subheader("π Salary by Country") |
|
|
country_stats = filtered_df.groupby('country')['converted_comp_yearly'].median().sort_values(ascending=False) |
|
|
fig1 = px.bar( |
|
|
x=country_stats.index, |
|
|
y=country_stats.values, |
|
|
title="Median Salary by Country", |
|
|
labels={'x': 'Country', 'y': 'Median Salary (USD)'} |
|
|
) |
|
|
st.plotly_chart(fig1, use_container_width=True) |
|
|
|
|
|
|
|
|
st.subheader("π Salary by Education Level") |
|
|
fig2 = px.box( |
|
|
filtered_df, |
|
|
x='education_level', |
|
|
y='converted_comp_yearly', |
|
|
title="Salary Distribution by Education Level" |
|
|
) |
|
|
st.plotly_chart(fig2, use_container_width=True) |
|
|
|
|
|
|
|
|
st.subheader("π
Salary vs Experience") |
|
|
fig3 = px.scatter( |
|
|
filtered_df, |
|
|
x='years_code_pro', |
|
|
y='converted_comp_yearly', |
|
|
color='country', |
|
|
title="Salary Growth with Experience", |
|
|
trendline="lowess" |
|
|
) |
|
|
st.plotly_chart(fig3, use_container_width=True) |
|
|
|
|
|
|
|
|
st.subheader("π¨βπ» Salary by Experience Level") |
|
|
exp_stats = filtered_df.groupby('experience_level')['converted_comp_yearly'].median() |
|
|
fig4 = px.bar( |
|
|
x=exp_stats.index, |
|
|
y=exp_stats.values, |
|
|
title="Median Salary by Experience Level" |
|
|
) |
|
|
st.plotly_chart(fig4, use_container_width=True) |
|
|
|
|
|
|
|
|
st.header("π Detailed Data View") |
|
|
if st.checkbox("Show filtered data table"): |
|
|
display_cols = ['country', 'education_level', 'experience_level', 'years_code_pro', 'converted_comp_yearly'] |
|
|
st.dataframe( |
|
|
filtered_df[display_cols].sort_values('converted_comp_yearly', ascending=False), |
|
|
use_container_width=True |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |