# -*- coding: utf-8 -*- """Untitled8.ipynb Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/1SnoorFAucvS1FXD1vzyJnJ-_hoZUfJ_u """ import streamlit as st import pandas as pd import numpy as np import plotly.express as px # Page configuration st.set_page_config( page_title="Developer Salary Explorer", page_icon="💻", layout="wide" ) @st.cache_data def load_data(): """Load and preprocess the Stack Overflow survey data""" try: df = pd.read_csv('stackoverflow_survey_single_response.txt') # Filter only rows with compensation data df_clean = df[df['converted_comp_yearly'].notna()].copy() df_clean = df_clean[df_clean['converted_comp_yearly'] > 1000] # Handle missing values in numeric columns for col in ['years_code', 'years_code_pro', 'age']: df_clean[col] = pd.to_numeric(df_clean[col], errors='coerce') df_clean[col] = df_clean[col].fillna(df_clean[col].median()) # Create experience levels df_clean['experience_level'] = pd.cut( df_clean['years_code_pro'], bins=[0, 2, 5, 10, 50], labels=['Junior (0-2 yrs)', 'Mid (3-5 yrs)', 'Senior (6-10 yrs)', 'Expert (10+ yrs)'] ) # Simplify country to major regions top_countries = ['United States of America', 'United Kingdom of Great Britain and Northern Ireland', 'Germany', 'India', 'Canada', 'France', 'Australia'] df_clean['country'] = df_clean['country'].apply( lambda x: x if x in top_countries else 'Other' ) # Map education levels to readable names education_map = { 1: 'Less than Bachelor', 2: 'Bachelor\'s Degree', 3: 'Master\'s Degree', 4: 'Doctoral Degree', 5: 'Professional Degree' } df_clean['education_level'] = df_clean['ed_level'].map(education_map) df_clean['education_level'] = df_clean['education_level'].fillna('Other') return df_clean except Exception as e: st.error(f"Error loading data: {str(e)}") return pd.DataFrame() def main(): st.title("💻 Developer Salary Explorer") st.markdown("Explore how country, education, and experience influence developer salaries worldwide.") # Load data df = load_data() if df.empty: st.error("No data loaded. Please check your data file.") return st.sidebar.header("🔍 Filter Data") # Country filter countries = sorted(df['country'].unique()) selected_countries = st.sidebar.multiselect( "Select Countries:", options=countries, default=countries[:3] # Default to first 3 countries ) # Education level filter education_levels = sorted(df['education_level'].unique()) selected_education = st.sidebar.multiselect( "Select Education Levels:", options=education_levels, default=education_levels ) # Years of experience slider min_exp, max_exp = st.sidebar.slider( "Years of Professional Experience:", min_value=int(df['years_code_pro'].min()), max_value=int(min(df['years_code_pro'].max(), 40)), # Cap at 40 for better UX value=(0, 15) ) # Apply filters filtered_df = df[ (df['country'].isin(selected_countries)) & (df['education_level'].isin(selected_education)) & (df['years_code_pro'] >= min_exp) & (df['years_code_pro'] <= max_exp) ] # Display metrics st.header("📊 Key Metrics") col1, col2, col3, col4 = st.columns(4) with col1: median_salary = filtered_df['converted_comp_yearly'].median() st.metric("Median Salary", f"${median_salary:,.0f}") with col2: avg_salary = filtered_df['converted_comp_yearly'].mean() st.metric("Average Salary", f"${avg_salary:,.0f}") with col3: sample_size = len(filtered_df) st.metric("Sample Size", f"{sample_size:,}") with col4: salary_range = f"${filtered_df['converted_comp_yearly'].min():,.0f} - ${filtered_df['converted_comp_yearly'].max():,.0f}" st.metric("Salary Range", salary_range) if sample_size == 0: st.warning("No data matches your filters. Please adjust your selection.") return # Visualizations st.header("📈 Salary Analysis") # 1. Salary by Country st.subheader("🌍 Salary by Country") country_stats = filtered_df.groupby('country')['converted_comp_yearly'].median().sort_values(ascending=False) fig1 = px.bar( x=country_stats.index, y=country_stats.values, title="Median Salary by Country", labels={'x': 'Country', 'y': 'Median Salary (USD)'} ) st.plotly_chart(fig1, use_container_width=True) # 2. Salary by Education Level st.subheader("🎓 Salary by Education Level") fig2 = px.box( filtered_df, x='education_level', y='converted_comp_yearly', title="Salary Distribution by Education Level" ) st.plotly_chart(fig2, use_container_width=True) # 3. Salary by Experience st.subheader("📅 Salary vs Experience") fig3 = px.scatter( filtered_df, x='years_code_pro', y='converted_comp_yearly', color='country', title="Salary Growth with Experience", trendline="lowess" ) st.plotly_chart(fig3, use_container_width=True) # 4. Experience Level Analysis st.subheader("👨‍💻 Salary by Experience Level") exp_stats = filtered_df.groupby('experience_level')['converted_comp_yearly'].median() fig4 = px.bar( x=exp_stats.index, y=exp_stats.values, title="Median Salary by Experience Level" ) st.plotly_chart(fig4, use_container_width=True) # Data Table st.header("📋 Detailed Data View") if st.checkbox("Show filtered data table"): display_cols = ['country', 'education_level', 'experience_level', 'years_code_pro', 'converted_comp_yearly'] st.dataframe( filtered_df[display_cols].sort_values('converted_comp_yearly', ascending=False), use_container_width=True ) if __name__ == "__main__": main()