Spaces:

Peter512
/

developer-salary-predictor

Sleeping

App Files Files Community

Peter512 commited on Oct 9, 2025

Commit

ee536ca

verified ·

1 Parent(s): 1add186

Update app.py

Browse files

Files changed (1) hide show

app.py +87 -13

app.py CHANGED Viewed

@@ -345,10 +345,37 @@ if st.session_state.get('has_prediction', False):
         # Feature contribution table
         col1, col2 = st.columns([2, 1])
         with col1:
             st.subheader("📈 Top Contributing Factors")
             shap_df = pd.DataFrame({
                 'Feature': feature_names,
                 'SHAP Value': shap_values[0],
@@ -357,25 +384,72 @@ if st.session_state.get('has_prediction', False):
             shap_df['Abs SHAP'] = shap_df['SHAP Value'].abs()
             shap_df = shap_df.sort_values('Abs SHAP', ascending=False).head(10)
-            # Create visualization
             fig, ax = plt.subplots(figsize=(10, 6))
-            colors = ['#2ecc71' if x > 0 else '#e74c3c' for x in shap_df['SHAP Value']]
-            ax.barh(range(len(shap_df)), shap_df['SHAP Value'], color=colors, alpha=0.8)
             ax.set_yticks(range(len(shap_df)))
-            ax.set_yticklabels(shap_df['Feature'], fontsize=10)
             ax.set_xlabel('Impact on Salary (EUR)', fontsize=11, fontweight='bold')
-            ax.set_title('Top 10 Feature Impacts', fontsize=13, fontweight='bold', pad=20)
-            ax.axvline(x=0, color='black', linestyle='-', linewidth=1)
-            ax.grid(axis='x', alpha=0.3)
             plt.tight_layout()
             st.pyplot(fig)
         with col2:
             st.subheader("📋 Impact Details")
-            # Format the table
-            display_df = shap_df[['Feature', 'SHAP Value', 'Impact']].copy()
-            display_df['SHAP Value'] = display_df['SHAP Value'].apply(lambda x: f"€{x:,.0f}")
             display_df = display_df.reset_index(drop=True)
             st.dataframe(
@@ -586,7 +660,7 @@ else:
         - **Sample**: 7,000+ European professional developers
         - **Algorithm**: Optimized Random Forest
         - **Accuracy**: RMSE ~€18,600
-        - **Last Updated**: 2024
         """)
     with col2:

         # Feature contribution table
         col1, col2 = st.columns([2, 1])
         with col1:
             st.subheader("📈 Top Contributing Factors")
+            # Create feature mapping for better readability
+            def clean_feature_name(feature):
+                """Convert technical feature names to user-friendly labels"""
+                # Remove prefixes
+                feature = feature.replace('cat__', '').replace('num__', '')
+                # Add emojis and format nicely
+                emoji_map = {
+                    'years_code_pro': '⏱️ Years of Experience',
+                    'age_group': '👤 Age Group',
+                    'country_': '🌍 Country: ',
+                    'remote_work_': '🏠 Work: ',
+                    'dev_type_': '💻 Role: ',
+                    'org_size_': '🏢 Company: ',
+                    'ed_level_': '🎓 Education: ',
+                    'so_account': '📚 Stack Overflow',
+                    'ai_select': '🤖 AI Tools'
+                }
+                for key, label in emoji_map.items():
+                    if feature.startswith(key):
+                        return label + feature.replace(key, '')
+                    elif feature == key:
+                        return label.rstrip(': ')
+                return feature
             shap_df = pd.DataFrame({
                 'Feature': feature_names,
                 'SHAP Value': shap_values[0],
             shap_df['Abs SHAP'] = shap_df['SHAP Value'].abs()
             shap_df = shap_df.sort_values('Abs SHAP', ascending=False).head(10)
+            # Clean feature names
+            shap_df['Feature_Clean'] = shap_df['Feature'].apply(clean_feature_name)
+            # Create visualization with improved styling
             fig, ax = plt.subplots(figsize=(10, 6))
+            # Modern color scheme
+            colors = ['#10b981' if x > 0 else '#ef4444' for x in shap_df['SHAP Value']]
+            # Create bars
+            bars = ax.barh(range(len(shap_df)), shap_df['SHAP Value'], color=colors, alpha=0.85, height=0.7)
+            # Add value labels on bars
+            for i, (bar, value) in enumerate(zip(bars, shap_df['SHAP Value'])):
+                x_pos = value + (500 if value > 0 else -500)
+                ax.text(x_pos, i, f'€{abs(value):,.0f}',
+                    ha='left' if value > 0 else 'right',
+                    va='center', fontweight='bold', fontsize=10,
+                    color='#10b981' if value > 0 else '#ef4444')
+            # Set labels with cleaned names
             ax.set_yticks(range(len(shap_df)))
+            ax.set_yticklabels(shap_df['Feature_Clean'], fontsize=10)
             ax.set_xlabel('Impact on Salary (EUR)', fontsize=11, fontweight='bold')
+            ax.set_title('How Different Factors Affect Your Salary',
+                        fontsize=13, fontweight='bold', pad=20)
+            # Add zero line
+            ax.axvline(x=0, color='#64748b', linestyle='-', linewidth=2, alpha=0.5)
+            # Add legend
+            from matplotlib.patches import Patch
+            legend_elements = [
+                Patch(facecolor='#10b981', alpha=0.85, label='Increases Salary'),
+                Patch(facecolor='#ef4444', alpha=0.85, label='Decreases Salary')
+            ]
+            ax.legend(handles=legend_elements, loc='lower right', frameon=True,
+                    fancybox=True, shadow=True, fontsize=10)
+            # Styling
+            ax.grid(axis='x', alpha=0.2, linestyle='--')
+            ax.set_facecolor('#f8fafc')
+            fig.patch.set_facecolor('white')
+            ax.spines['top'].set_visible(False)
+            ax.spines['right'].set_visible(False)
             plt.tight_layout()
             st.pyplot(fig)
+            # Add explanation box
+            st.info("""
+            **💡 How to read this chart:**
+            - **Green bars** pointing right → These factors *increase* your salary
+            - **Red bars** pointing left → These factors *decrease* your salary
+            - Longer bars = Bigger impact on your predicted salary
+            """)
         with col2:
             st.subheader("📋 Impact Details")
+            # Format the table with cleaned names
+            display_df = shap_df[['Feature_Clean', 'SHAP Value', 'Impact']].copy()
+            display_df.columns = ['Factor', 'Impact Amount', 'Direction']
+            display_df['Impact Amount'] = display_df['Impact Amount'].apply(
+                lambda x: f"€{abs(x):,.0f}"
+            )
             display_df = display_df.reset_index(drop=True)
             st.dataframe(
         - **Sample**: 7,000+ European professional developers
         - **Algorithm**: Optimized Random Forest
         - **Accuracy**: RMSE ~€18,600
+        - **Last Updated**: 2025
         """)
     with col2: