Spaces:

Hamna97
/

Ham

Sleeping

App Files Files Community

Ham / Streamlit_app.py

Hamna97

Upload 3 files

5f19f8d verified 4 months ago

raw

history blame contribute delete

6.31 kB

	# -- coding: utf-8 --
	"""Untitled8.ipynb

	Automatically generated by Colab.

	Original file is located at
	https://colab.research.google.com/drive/1SnoorFAucvS1FXD1vzyJnJ-_hoZUfJ_u
	"""

	import streamlit as st
	import pandas as pd
	import numpy as np
	import plotly.express as px

	# Page configuration
	st.set_page_config(
	page_title="Developer Salary Explorer",
	page_icon="💻",
	layout="wide"
	)

	@st.cache_data
	def load_data():
	"""Load and preprocess the Stack Overflow survey data"""
	try:
	df = pd.read_csv('stackoverflow_survey_single_response.txt')

	# Filter only rows with compensation data
	df_clean = df[df['converted_comp_yearly'].notna()].copy()
	df_clean = df_clean[df_clean['converted_comp_yearly'] > 1000]

	# Handle missing values in numeric columns
	for col in ['years_code', 'years_code_pro', 'age']:
	df_clean[col] = pd.to_numeric(df_clean[col], errors='coerce')
	df_clean[col] = df_clean[col].fillna(df_clean[col].median())

	# Create experience levels
	df_clean['experience_level'] = pd.cut(
	df_clean['years_code_pro'],
	bins=[0, 2, 5, 10, 50],
	labels=['Junior (0-2 yrs)', 'Mid (3-5 yrs)', 'Senior (6-10 yrs)', 'Expert (10+ yrs)']
	)

	# Simplify country to major regions
	top_countries = ['United States of America', 'United Kingdom of Great Britain and Northern Ireland',
	'Germany', 'India', 'Canada', 'France', 'Australia']
	df_clean['country'] = df_clean['country'].apply(
	lambda x: x if x in top_countries else 'Other'
	)

	# Map education levels to readable names
	education_map = {
	1: 'Less than Bachelor',
	2: 'Bachelor\'s Degree',
	3: 'Master\'s Degree',
	4: 'Doctoral Degree',
	5: 'Professional Degree'
	}
	df_clean['education_level'] = df_clean['ed_level'].map(education_map)
	df_clean['education_level'] = df_clean['education_level'].fillna('Other')

	return df_clean

	except Exception as e:
	st.error(f"Error loading data: {str(e)}")
	return pd.DataFrame()

	def main():
	st.title("💻 Developer Salary Explorer")
	st.markdown("Explore how country, education, and experience influence developer salaries worldwide.")

	# Load data
	df = load_data()

	if df.empty:
	st.error("No data loaded. Please check your data file.")
	return

	st.sidebar.header("🔍 Filter Data")

	# Country filter
	countries = sorted(df['country'].unique())
	selected_countries = st.sidebar.multiselect(
	"Select Countries:",
	options=countries,
	default=countries[:3] # Default to first 3 countries
	)

	# Education level filter
	education_levels = sorted(df['education_level'].unique())
	selected_education = st.sidebar.multiselect(
	"Select Education Levels:",
	options=education_levels,
	default=education_levels
	)

	# Years of experience slider
	min_exp, max_exp = st.sidebar.slider(
	"Years of Professional Experience:",
	min_value=int(df['years_code_pro'].min()),
	max_value=int(min(df['years_code_pro'].max(), 40)), # Cap at 40 for better UX
	value=(0, 15)
	)

	# Apply filters
	filtered_df = df[
	(df['country'].isin(selected_countries)) &
	(df['education_level'].isin(selected_education)) &
	(df['years_code_pro'] >= min_exp) &
	(df['years_code_pro'] <= max_exp)
	]

	# Display metrics
	st.header("📊 Key Metrics")

	col1, col2, col3, col4 = st.columns(4)

	with col1:
	median_salary = filtered_df['converted_comp_yearly'].median()
	st.metric("Median Salary", f"${median_salary:,.0f}")

	with col2:
	avg_salary = filtered_df['converted_comp_yearly'].mean()
	st.metric("Average Salary", f"${avg_salary:,.0f}")

	with col3:
	sample_size = len(filtered_df)
	st.metric("Sample Size", f"{sample_size:,}")

	with col4:
	salary_range = f"${filtered_df['converted_comp_yearly'].min():,.0f} - ${filtered_df['converted_comp_yearly'].max():,.0f}"
	st.metric("Salary Range", salary_range)

	if sample_size == 0:
	st.warning("No data matches your filters. Please adjust your selection.")
	return

	# Visualizations
	st.header("📈 Salary Analysis")

	# 1. Salary by Country
	st.subheader("🌍 Salary by Country")
	country_stats = filtered_df.groupby('country')['converted_comp_yearly'].median().sort_values(ascending=False)
	fig1 = px.bar(
	x=country_stats.index,
	y=country_stats.values,
	title="Median Salary by Country",
	labels={'x': 'Country', 'y': 'Median Salary (USD)'}
	)
	st.plotly_chart(fig1, use_container_width=True)

	# 2. Salary by Education Level
	st.subheader("🎓 Salary by Education Level")
	fig2 = px.box(
	filtered_df,
	x='education_level',
	y='converted_comp_yearly',
	title="Salary Distribution by Education Level"
	)
	st.plotly_chart(fig2, use_container_width=True)

	# 3. Salary by Experience
	st.subheader("📅 Salary vs Experience")
	fig3 = px.scatter(
	filtered_df,
	x='years_code_pro',
	y='converted_comp_yearly',
	color='country',
	title="Salary Growth with Experience",
	trendline="lowess"
	)
	st.plotly_chart(fig3, use_container_width=True)

	# 4. Experience Level Analysis
	st.subheader("👨‍💻 Salary by Experience Level")
	exp_stats = filtered_df.groupby('experience_level')['converted_comp_yearly'].median()
	fig4 = px.bar(
	x=exp_stats.index,
	y=exp_stats.values,
	title="Median Salary by Experience Level"
	)
	st.plotly_chart(fig4, use_container_width=True)

	# Data Table
	st.header("📋 Detailed Data View")
	if st.checkbox("Show filtered data table"):
	display_cols = ['country', 'education_level', 'experience_level', 'years_code_pro', 'converted_comp_yearly']
	st.dataframe(
	filtered_df[display_cols].sort_values('converted_comp_yearly', ascending=False),
	use_container_width=True
	)

	if __name__ == "__main__":
	main()