Ham / Streamlit_app.py
Hamna97's picture
Upload 3 files
5f19f8d verified
# -*- coding: utf-8 -*-
"""Untitled8.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1SnoorFAucvS1FXD1vzyJnJ-_hoZUfJ_u
"""
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
# Page configuration
st.set_page_config(
page_title="Developer Salary Explorer",
page_icon="πŸ’»",
layout="wide"
)
@st.cache_data
def load_data():
"""Load and preprocess the Stack Overflow survey data"""
try:
df = pd.read_csv('stackoverflow_survey_single_response.txt')
# Filter only rows with compensation data
df_clean = df[df['converted_comp_yearly'].notna()].copy()
df_clean = df_clean[df_clean['converted_comp_yearly'] > 1000]
# Handle missing values in numeric columns
for col in ['years_code', 'years_code_pro', 'age']:
df_clean[col] = pd.to_numeric(df_clean[col], errors='coerce')
df_clean[col] = df_clean[col].fillna(df_clean[col].median())
# Create experience levels
df_clean['experience_level'] = pd.cut(
df_clean['years_code_pro'],
bins=[0, 2, 5, 10, 50],
labels=['Junior (0-2 yrs)', 'Mid (3-5 yrs)', 'Senior (6-10 yrs)', 'Expert (10+ yrs)']
)
# Simplify country to major regions
top_countries = ['United States of America', 'United Kingdom of Great Britain and Northern Ireland',
'Germany', 'India', 'Canada', 'France', 'Australia']
df_clean['country'] = df_clean['country'].apply(
lambda x: x if x in top_countries else 'Other'
)
# Map education levels to readable names
education_map = {
1: 'Less than Bachelor',
2: 'Bachelor\'s Degree',
3: 'Master\'s Degree',
4: 'Doctoral Degree',
5: 'Professional Degree'
}
df_clean['education_level'] = df_clean['ed_level'].map(education_map)
df_clean['education_level'] = df_clean['education_level'].fillna('Other')
return df_clean
except Exception as e:
st.error(f"Error loading data: {str(e)}")
return pd.DataFrame()
def main():
st.title("πŸ’» Developer Salary Explorer")
st.markdown("Explore how country, education, and experience influence developer salaries worldwide.")
# Load data
df = load_data()
if df.empty:
st.error("No data loaded. Please check your data file.")
return
st.sidebar.header("πŸ” Filter Data")
# Country filter
countries = sorted(df['country'].unique())
selected_countries = st.sidebar.multiselect(
"Select Countries:",
options=countries,
default=countries[:3] # Default to first 3 countries
)
# Education level filter
education_levels = sorted(df['education_level'].unique())
selected_education = st.sidebar.multiselect(
"Select Education Levels:",
options=education_levels,
default=education_levels
)
# Years of experience slider
min_exp, max_exp = st.sidebar.slider(
"Years of Professional Experience:",
min_value=int(df['years_code_pro'].min()),
max_value=int(min(df['years_code_pro'].max(), 40)), # Cap at 40 for better UX
value=(0, 15)
)
# Apply filters
filtered_df = df[
(df['country'].isin(selected_countries)) &
(df['education_level'].isin(selected_education)) &
(df['years_code_pro'] >= min_exp) &
(df['years_code_pro'] <= max_exp)
]
# Display metrics
st.header("πŸ“Š Key Metrics")
col1, col2, col3, col4 = st.columns(4)
with col1:
median_salary = filtered_df['converted_comp_yearly'].median()
st.metric("Median Salary", f"${median_salary:,.0f}")
with col2:
avg_salary = filtered_df['converted_comp_yearly'].mean()
st.metric("Average Salary", f"${avg_salary:,.0f}")
with col3:
sample_size = len(filtered_df)
st.metric("Sample Size", f"{sample_size:,}")
with col4:
salary_range = f"${filtered_df['converted_comp_yearly'].min():,.0f} - ${filtered_df['converted_comp_yearly'].max():,.0f}"
st.metric("Salary Range", salary_range)
if sample_size == 0:
st.warning("No data matches your filters. Please adjust your selection.")
return
# Visualizations
st.header("πŸ“ˆ Salary Analysis")
# 1. Salary by Country
st.subheader("🌍 Salary by Country")
country_stats = filtered_df.groupby('country')['converted_comp_yearly'].median().sort_values(ascending=False)
fig1 = px.bar(
x=country_stats.index,
y=country_stats.values,
title="Median Salary by Country",
labels={'x': 'Country', 'y': 'Median Salary (USD)'}
)
st.plotly_chart(fig1, use_container_width=True)
# 2. Salary by Education Level
st.subheader("πŸŽ“ Salary by Education Level")
fig2 = px.box(
filtered_df,
x='education_level',
y='converted_comp_yearly',
title="Salary Distribution by Education Level"
)
st.plotly_chart(fig2, use_container_width=True)
# 3. Salary by Experience
st.subheader("πŸ“… Salary vs Experience")
fig3 = px.scatter(
filtered_df,
x='years_code_pro',
y='converted_comp_yearly',
color='country',
title="Salary Growth with Experience",
trendline="lowess"
)
st.plotly_chart(fig3, use_container_width=True)
# 4. Experience Level Analysis
st.subheader("πŸ‘¨β€πŸ’» Salary by Experience Level")
exp_stats = filtered_df.groupby('experience_level')['converted_comp_yearly'].median()
fig4 = px.bar(
x=exp_stats.index,
y=exp_stats.values,
title="Median Salary by Experience Level"
)
st.plotly_chart(fig4, use_container_width=True)
# Data Table
st.header("πŸ“‹ Detailed Data View")
if st.checkbox("Show filtered data table"):
display_cols = ['country', 'education_level', 'experience_level', 'years_code_pro', 'converted_comp_yearly']
st.dataframe(
filtered_df[display_cols].sort_values('converted_comp_yearly', ascending=False),
use_container_width=True
)
if __name__ == "__main__":
main()