Hamna97 commited on
Commit
5f19f8d
Β·
verified Β·
1 Parent(s): ed33803

Upload 3 files

Browse files
Streamlit_app.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Untitled8.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1SnoorFAucvS1FXD1vzyJnJ-_hoZUfJ_u
8
+ """
9
+
10
+ import streamlit as st
11
+ import pandas as pd
12
+ import numpy as np
13
+ import plotly.express as px
14
+
15
+ # Page configuration
16
+ st.set_page_config(
17
+ page_title="Developer Salary Explorer",
18
+ page_icon="πŸ’»",
19
+ layout="wide"
20
+ )
21
+
22
+ @st.cache_data
23
+ def load_data():
24
+ """Load and preprocess the Stack Overflow survey data"""
25
+ try:
26
+ df = pd.read_csv('stackoverflow_survey_single_response.txt')
27
+
28
+ # Filter only rows with compensation data
29
+ df_clean = df[df['converted_comp_yearly'].notna()].copy()
30
+ df_clean = df_clean[df_clean['converted_comp_yearly'] > 1000]
31
+
32
+ # Handle missing values in numeric columns
33
+ for col in ['years_code', 'years_code_pro', 'age']:
34
+ df_clean[col] = pd.to_numeric(df_clean[col], errors='coerce')
35
+ df_clean[col] = df_clean[col].fillna(df_clean[col].median())
36
+
37
+ # Create experience levels
38
+ df_clean['experience_level'] = pd.cut(
39
+ df_clean['years_code_pro'],
40
+ bins=[0, 2, 5, 10, 50],
41
+ labels=['Junior (0-2 yrs)', 'Mid (3-5 yrs)', 'Senior (6-10 yrs)', 'Expert (10+ yrs)']
42
+ )
43
+
44
+ # Simplify country to major regions
45
+ top_countries = ['United States of America', 'United Kingdom of Great Britain and Northern Ireland',
46
+ 'Germany', 'India', 'Canada', 'France', 'Australia']
47
+ df_clean['country'] = df_clean['country'].apply(
48
+ lambda x: x if x in top_countries else 'Other'
49
+ )
50
+
51
+ # Map education levels to readable names
52
+ education_map = {
53
+ 1: 'Less than Bachelor',
54
+ 2: 'Bachelor\'s Degree',
55
+ 3: 'Master\'s Degree',
56
+ 4: 'Doctoral Degree',
57
+ 5: 'Professional Degree'
58
+ }
59
+ df_clean['education_level'] = df_clean['ed_level'].map(education_map)
60
+ df_clean['education_level'] = df_clean['education_level'].fillna('Other')
61
+
62
+ return df_clean
63
+
64
+ except Exception as e:
65
+ st.error(f"Error loading data: {str(e)}")
66
+ return pd.DataFrame()
67
+
68
+ def main():
69
+ st.title("πŸ’» Developer Salary Explorer")
70
+ st.markdown("Explore how country, education, and experience influence developer salaries worldwide.")
71
+
72
+ # Load data
73
+ df = load_data()
74
+
75
+ if df.empty:
76
+ st.error("No data loaded. Please check your data file.")
77
+ return
78
+
79
+ st.sidebar.header("πŸ” Filter Data")
80
+
81
+ # Country filter
82
+ countries = sorted(df['country'].unique())
83
+ selected_countries = st.sidebar.multiselect(
84
+ "Select Countries:",
85
+ options=countries,
86
+ default=countries[:3] # Default to first 3 countries
87
+ )
88
+
89
+ # Education level filter
90
+ education_levels = sorted(df['education_level'].unique())
91
+ selected_education = st.sidebar.multiselect(
92
+ "Select Education Levels:",
93
+ options=education_levels,
94
+ default=education_levels
95
+ )
96
+
97
+ # Years of experience slider
98
+ min_exp, max_exp = st.sidebar.slider(
99
+ "Years of Professional Experience:",
100
+ min_value=int(df['years_code_pro'].min()),
101
+ max_value=int(min(df['years_code_pro'].max(), 40)), # Cap at 40 for better UX
102
+ value=(0, 15)
103
+ )
104
+
105
+ # Apply filters
106
+ filtered_df = df[
107
+ (df['country'].isin(selected_countries)) &
108
+ (df['education_level'].isin(selected_education)) &
109
+ (df['years_code_pro'] >= min_exp) &
110
+ (df['years_code_pro'] <= max_exp)
111
+ ]
112
+
113
+ # Display metrics
114
+ st.header("πŸ“Š Key Metrics")
115
+
116
+ col1, col2, col3, col4 = st.columns(4)
117
+
118
+ with col1:
119
+ median_salary = filtered_df['converted_comp_yearly'].median()
120
+ st.metric("Median Salary", f"${median_salary:,.0f}")
121
+
122
+ with col2:
123
+ avg_salary = filtered_df['converted_comp_yearly'].mean()
124
+ st.metric("Average Salary", f"${avg_salary:,.0f}")
125
+
126
+ with col3:
127
+ sample_size = len(filtered_df)
128
+ st.metric("Sample Size", f"{sample_size:,}")
129
+
130
+ with col4:
131
+ salary_range = f"${filtered_df['converted_comp_yearly'].min():,.0f} - ${filtered_df['converted_comp_yearly'].max():,.0f}"
132
+ st.metric("Salary Range", salary_range)
133
+
134
+ if sample_size == 0:
135
+ st.warning("No data matches your filters. Please adjust your selection.")
136
+ return
137
+
138
+ # Visualizations
139
+ st.header("πŸ“ˆ Salary Analysis")
140
+
141
+ # 1. Salary by Country
142
+ st.subheader("🌍 Salary by Country")
143
+ country_stats = filtered_df.groupby('country')['converted_comp_yearly'].median().sort_values(ascending=False)
144
+ fig1 = px.bar(
145
+ x=country_stats.index,
146
+ y=country_stats.values,
147
+ title="Median Salary by Country",
148
+ labels={'x': 'Country', 'y': 'Median Salary (USD)'}
149
+ )
150
+ st.plotly_chart(fig1, use_container_width=True)
151
+
152
+ # 2. Salary by Education Level
153
+ st.subheader("πŸŽ“ Salary by Education Level")
154
+ fig2 = px.box(
155
+ filtered_df,
156
+ x='education_level',
157
+ y='converted_comp_yearly',
158
+ title="Salary Distribution by Education Level"
159
+ )
160
+ st.plotly_chart(fig2, use_container_width=True)
161
+
162
+ # 3. Salary by Experience
163
+ st.subheader("πŸ“… Salary vs Experience")
164
+ fig3 = px.scatter(
165
+ filtered_df,
166
+ x='years_code_pro',
167
+ y='converted_comp_yearly',
168
+ color='country',
169
+ title="Salary Growth with Experience",
170
+ trendline="lowess"
171
+ )
172
+ st.plotly_chart(fig3, use_container_width=True)
173
+
174
+ # 4. Experience Level Analysis
175
+ st.subheader("πŸ‘¨β€πŸ’» Salary by Experience Level")
176
+ exp_stats = filtered_df.groupby('experience_level')['converted_comp_yearly'].median()
177
+ fig4 = px.bar(
178
+ x=exp_stats.index,
179
+ y=exp_stats.values,
180
+ title="Median Salary by Experience Level"
181
+ )
182
+ st.plotly_chart(fig4, use_container_width=True)
183
+
184
+ # Data Table
185
+ st.header("πŸ“‹ Detailed Data View")
186
+ if st.checkbox("Show filtered data table"):
187
+ display_cols = ['country', 'education_level', 'experience_level', 'years_code_pro', 'converted_comp_yearly']
188
+ st.dataframe(
189
+ filtered_df[display_cols].sort_values('converted_comp_yearly', ascending=False),
190
+ use_container_width=True
191
+ )
192
+
193
+ if __name__ == "__main__":
194
+ main()
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
 
stackoverflow_survey_single_response.txt ADDED
The diff for this file is too large to render. See raw diff