File size: 6,314 Bytes
5f19f8d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 |
# -*- coding: utf-8 -*-
"""Untitled8.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1SnoorFAucvS1FXD1vzyJnJ-_hoZUfJ_u
"""
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
# Page configuration
st.set_page_config(
page_title="Developer Salary Explorer",
page_icon="π»",
layout="wide"
)
@st.cache_data
def load_data():
"""Load and preprocess the Stack Overflow survey data"""
try:
df = pd.read_csv('stackoverflow_survey_single_response.txt')
# Filter only rows with compensation data
df_clean = df[df['converted_comp_yearly'].notna()].copy()
df_clean = df_clean[df_clean['converted_comp_yearly'] > 1000]
# Handle missing values in numeric columns
for col in ['years_code', 'years_code_pro', 'age']:
df_clean[col] = pd.to_numeric(df_clean[col], errors='coerce')
df_clean[col] = df_clean[col].fillna(df_clean[col].median())
# Create experience levels
df_clean['experience_level'] = pd.cut(
df_clean['years_code_pro'],
bins=[0, 2, 5, 10, 50],
labels=['Junior (0-2 yrs)', 'Mid (3-5 yrs)', 'Senior (6-10 yrs)', 'Expert (10+ yrs)']
)
# Simplify country to major regions
top_countries = ['United States of America', 'United Kingdom of Great Britain and Northern Ireland',
'Germany', 'India', 'Canada', 'France', 'Australia']
df_clean['country'] = df_clean['country'].apply(
lambda x: x if x in top_countries else 'Other'
)
# Map education levels to readable names
education_map = {
1: 'Less than Bachelor',
2: 'Bachelor\'s Degree',
3: 'Master\'s Degree',
4: 'Doctoral Degree',
5: 'Professional Degree'
}
df_clean['education_level'] = df_clean['ed_level'].map(education_map)
df_clean['education_level'] = df_clean['education_level'].fillna('Other')
return df_clean
except Exception as e:
st.error(f"Error loading data: {str(e)}")
return pd.DataFrame()
def main():
st.title("π» Developer Salary Explorer")
st.markdown("Explore how country, education, and experience influence developer salaries worldwide.")
# Load data
df = load_data()
if df.empty:
st.error("No data loaded. Please check your data file.")
return
st.sidebar.header("π Filter Data")
# Country filter
countries = sorted(df['country'].unique())
selected_countries = st.sidebar.multiselect(
"Select Countries:",
options=countries,
default=countries[:3] # Default to first 3 countries
)
# Education level filter
education_levels = sorted(df['education_level'].unique())
selected_education = st.sidebar.multiselect(
"Select Education Levels:",
options=education_levels,
default=education_levels
)
# Years of experience slider
min_exp, max_exp = st.sidebar.slider(
"Years of Professional Experience:",
min_value=int(df['years_code_pro'].min()),
max_value=int(min(df['years_code_pro'].max(), 40)), # Cap at 40 for better UX
value=(0, 15)
)
# Apply filters
filtered_df = df[
(df['country'].isin(selected_countries)) &
(df['education_level'].isin(selected_education)) &
(df['years_code_pro'] >= min_exp) &
(df['years_code_pro'] <= max_exp)
]
# Display metrics
st.header("π Key Metrics")
col1, col2, col3, col4 = st.columns(4)
with col1:
median_salary = filtered_df['converted_comp_yearly'].median()
st.metric("Median Salary", f"${median_salary:,.0f}")
with col2:
avg_salary = filtered_df['converted_comp_yearly'].mean()
st.metric("Average Salary", f"${avg_salary:,.0f}")
with col3:
sample_size = len(filtered_df)
st.metric("Sample Size", f"{sample_size:,}")
with col4:
salary_range = f"${filtered_df['converted_comp_yearly'].min():,.0f} - ${filtered_df['converted_comp_yearly'].max():,.0f}"
st.metric("Salary Range", salary_range)
if sample_size == 0:
st.warning("No data matches your filters. Please adjust your selection.")
return
# Visualizations
st.header("π Salary Analysis")
# 1. Salary by Country
st.subheader("π Salary by Country")
country_stats = filtered_df.groupby('country')['converted_comp_yearly'].median().sort_values(ascending=False)
fig1 = px.bar(
x=country_stats.index,
y=country_stats.values,
title="Median Salary by Country",
labels={'x': 'Country', 'y': 'Median Salary (USD)'}
)
st.plotly_chart(fig1, use_container_width=True)
# 2. Salary by Education Level
st.subheader("π Salary by Education Level")
fig2 = px.box(
filtered_df,
x='education_level',
y='converted_comp_yearly',
title="Salary Distribution by Education Level"
)
st.plotly_chart(fig2, use_container_width=True)
# 3. Salary by Experience
st.subheader("π
Salary vs Experience")
fig3 = px.scatter(
filtered_df,
x='years_code_pro',
y='converted_comp_yearly',
color='country',
title="Salary Growth with Experience",
trendline="lowess"
)
st.plotly_chart(fig3, use_container_width=True)
# 4. Experience Level Analysis
st.subheader("π¨βπ» Salary by Experience Level")
exp_stats = filtered_df.groupby('experience_level')['converted_comp_yearly'].median()
fig4 = px.bar(
x=exp_stats.index,
y=exp_stats.values,
title="Median Salary by Experience Level"
)
st.plotly_chart(fig4, use_container_width=True)
# Data Table
st.header("π Detailed Data View")
if st.checkbox("Show filtered data table"):
display_cols = ['country', 'education_level', 'experience_level', 'years_code_pro', 'converted_comp_yearly']
st.dataframe(
filtered_df[display_cols].sort_values('converted_comp_yearly', ascending=False),
use_container_width=True
)
if __name__ == "__main__":
main() |