Spaces:

Peter512
/

developer-salary-predictor

Sleeping

App Files Files Community

Peter512 commited on Oct 9, 2025

Commit

5ef0dc4

verified ·

1 Parent(s): b8ffbce

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -31

app.py CHANGED Viewed

@@ -349,42 +349,66 @@ if st.session_state.get('has_prediction', False):
         with col1:
             st.subheader("📈 Top Contributing Factors")
-            # Create feature mapping for better readability WITH emojis
-            def clean_feature_name_with_emoji(feature):
-                """Convert technical feature names to user-friendly labels WITH emojis"""
-                # Store original for fallback
-                original = feature
                 # Remove prefixes
                 feature = feature.replace('cat__', '').replace('num__', '').replace('remainder__', '')
-                # Simple mappings with emojis
                 simple_map = {
-                    'years_code_pro': '⏱️ Years of Experience',
-                    'age_group': '👤 Age Group',
-                    'so_account': '📚 Stack Overflow Account',
-                    'ai_select': '🤖 Uses AI Tools'
                 }
                 if feature in simple_map:
                     return simple_map[feature]
-                # Handle categorical variables with prefixes and emojis
                 replacements = {
-                    'country_': ('🌍 Country: ', ''),
-                    'remote_work_': ('🏠 Work: ', ''),
-                    'dev_type_': ('💻 Role: ', ''),
-                    'org_size_': ('🏢 Company Size: ', ''),
-                    'ed_level_': ('🎓 Education: ', '')
                 }
-                for prefix, (emoji_label, _) in replacements.items():
                     if prefix in feature:
-                        return emoji_label + feature.replace(prefix, '').replace('_', ' ')
-                # Fallback: just clean up underscores and return
                 return feature.replace('_', ' ').title()
             shap_df = pd.DataFrame({
                 'Feature': feature_names,
                 'SHAP Value': shap_values[0],
@@ -393,18 +417,13 @@ if st.session_state.get('has_prediction', False):
             shap_df['Abs SHAP'] = shap_df['SHAP Value'].abs()
             shap_df = shap_df.sort_values('Abs SHAP', ascending=False).head(10)
-            # Clean feature names with emojis
             shap_df['Feature_Clean_Emoji'] = shap_df['Feature'].apply(clean_feature_name_with_emoji)
             # Create visualization with improved styling
             fig, ax = plt.subplots(figsize=(10, 6))
-            # Set font that supports emojis (try different options for compatibility)
-            try:
-                plt.rcParams['font.family'] = 'DejaVu Sans'
-            except:
-                pass
             # Modern color scheme
             colors = ['#10b981' if x > 0 else '#ef4444' for x in shap_df['SHAP Value']]
@@ -436,9 +455,9 @@ if st.session_state.get('has_prediction', False):
                     fontweight='bold', fontsize=10,
                     color=color)
-            # Set labels with cleaned names WITH EMOJIS
             ax.set_yticks(range(len(shap_df)))
-            ax.set_yticklabels(shap_df['Feature_Clean_Emoji'], fontsize=10)
             ax.set_xlabel('Impact on Salary (EUR)', fontsize=11, fontweight='bold')
             ax.set_title('How Different Factors Affect Your Salary',
                         fontsize=13, fontweight='bold', pad=20)
@@ -465,12 +484,13 @@ if st.session_state.get('has_prediction', False):
             plt.tight_layout()
             st.pyplot(fig)
-            # Add explanation box
             st.info("""
             **💡 How to read this chart:**
             - **Green bars** pointing right → These factors *increase* your salary
             - **Red bars** pointing left → These factors *decrease* your salary
-            - Longer bars = Bigger impact on your predicted salary
             """)
         with col2:

         with col1:
             st.subheader("📈 Top Contributing Factors")
+            # Create feature mapping WITHOUT emojis for chart
+            def clean_feature_name(feature):
+                """Convert technical feature names to user-friendly labels WITHOUT emojis"""
                 # Remove prefixes
                 feature = feature.replace('cat__', '').replace('num__', '').replace('remainder__', '')
+                # Simple mappings
                 simple_map = {
+                    'years_code_pro': 'Years of Experience',
+                    'age_group': 'Age Group',
+                    'so_account': 'Stack Overflow Account',
+                    'ai_select': 'Uses AI Tools'
                 }
                 if feature in simple_map:
                     return simple_map[feature]
+                # Handle categorical variables
                 replacements = {
+                    'country_': 'Country: ',
+                    'remote_work_': 'Work: ',
+                    'dev_type_': 'Role: ',
+                    'org_size_': 'Company Size: ',
+                    'ed_level_': 'Education: '
                 }
+                for prefix, label in replacements.items():
                     if prefix in feature:
+                        return label + feature.replace(prefix, '').replace('_', ' ')
+                # Fallback
                 return feature.replace('_', ' ').title()
+            # Create emoji version for the table only
+            def clean_feature_name_with_emoji(feature):
+                """Convert technical feature names to user-friendly labels WITH emojis"""
+                base_name = clean_feature_name(feature)
+                # Add emojis based on content
+                if 'Years of Experience' in base_name:
+                    return '⏱️ ' + base_name
+                elif 'Age Group' in base_name:
+                    return '👤 ' + base_name
+                elif 'Country:' in base_name:
+                    return '🌍 ' + base_name
+                elif 'Work:' in base_name:
+                    return '🏠 ' + base_name
+                elif 'Role:' in base_name:
+                    return '💻 ' + base_name
+                elif 'Company Size:' in base_name:
+                    return '🏢 ' + base_name
+                elif 'Education:' in base_name:
+                    return '🎓 ' + base_name
+                elif 'Stack Overflow' in base_name:
+                    return '📚 ' + base_name
+                elif 'AI Tools' in base_name:
+                    return '🤖 ' + base_name
+                return base_name
             shap_df = pd.DataFrame({
                 'Feature': feature_names,
                 'SHAP Value': shap_values[0],
             shap_df['Abs SHAP'] = shap_df['SHAP Value'].abs()
             shap_df = shap_df.sort_values('Abs SHAP', ascending=False).head(10)
+            # Clean feature names - NO emojis for chart, WITH emojis for table
+            shap_df['Feature_Clean'] = shap_df['Feature'].apply(clean_feature_name)
             shap_df['Feature_Clean_Emoji'] = shap_df['Feature'].apply(clean_feature_name_with_emoji)
             # Create visualization with improved styling
             fig, ax = plt.subplots(figsize=(10, 6))
             # Modern color scheme
             colors = ['#10b981' if x > 0 else '#ef4444' for x in shap_df['SHAP Value']]
                     fontweight='bold', fontsize=10,
                     color=color)
+            # Set labels with cleaned names WITHOUT EMOJIS
             ax.set_yticks(range(len(shap_df)))
+            ax.set_yticklabels(shap_df['Feature_Clean'], fontsize=10)
             ax.set_xlabel('Impact on Salary (EUR)', fontsize=11, fontweight='bold')
             ax.set_title('How Different Factors Affect Your Salary',
                         fontsize=13, fontweight='bold', pad=20)
             plt.tight_layout()
             st.pyplot(fig)
+            # Add explanation box with improved text
             st.info("""
             **💡 How to read this chart:**
             - **Green bars** pointing right → These factors *increase* your salary
             - **Red bars** pointing left → These factors *decrease* your salary
+            - **Longer bars** = Bigger impact on your predicted salary
+            - **Why do I see other countries/categories I didn't select?** The chart shows the top 10 most impactful features for your prediction. For categories you *didn't* select (like other countries), a negative value means "not having this characteristic decreases your salary" — in other words, if you had selected that option instead, your salary would be higher. For example, "Country: Germany = -€1,059" means being from Germany would have added €1,059 to your salary.
             """)
         with col2: