Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -345,10 +345,37 @@ if st.session_state.get('has_prediction', False):
|
|
| 345 |
|
| 346 |
# Feature contribution table
|
| 347 |
col1, col2 = st.columns([2, 1])
|
| 348 |
-
|
| 349 |
with col1:
|
| 350 |
st.subheader("📈 Top Contributing Factors")
|
| 351 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 352 |
shap_df = pd.DataFrame({
|
| 353 |
'Feature': feature_names,
|
| 354 |
'SHAP Value': shap_values[0],
|
|
@@ -357,25 +384,72 @@ if st.session_state.get('has_prediction', False):
|
|
| 357 |
shap_df['Abs SHAP'] = shap_df['SHAP Value'].abs()
|
| 358 |
shap_df = shap_df.sort_values('Abs SHAP', ascending=False).head(10)
|
| 359 |
|
| 360 |
-
#
|
|
|
|
|
|
|
|
|
|
| 361 |
fig, ax = plt.subplots(figsize=(10, 6))
|
| 362 |
-
|
| 363 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 364 |
ax.set_yticks(range(len(shap_df)))
|
| 365 |
-
ax.set_yticklabels(shap_df['
|
| 366 |
ax.set_xlabel('Impact on Salary (EUR)', fontsize=11, fontweight='bold')
|
| 367 |
-
ax.set_title('
|
| 368 |
-
|
| 369 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 370 |
plt.tight_layout()
|
| 371 |
st.pyplot(fig)
|
| 372 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 373 |
with col2:
|
| 374 |
st.subheader("📋 Impact Details")
|
| 375 |
|
| 376 |
-
# Format the table
|
| 377 |
-
display_df = shap_df[['
|
| 378 |
-
display_df
|
|
|
|
|
|
|
|
|
|
| 379 |
display_df = display_df.reset_index(drop=True)
|
| 380 |
|
| 381 |
st.dataframe(
|
|
@@ -586,7 +660,7 @@ else:
|
|
| 586 |
- **Sample**: 7,000+ European professional developers
|
| 587 |
- **Algorithm**: Optimized Random Forest
|
| 588 |
- **Accuracy**: RMSE ~€18,600
|
| 589 |
-
- **Last Updated**:
|
| 590 |
""")
|
| 591 |
|
| 592 |
with col2:
|
|
|
|
| 345 |
|
| 346 |
# Feature contribution table
|
| 347 |
col1, col2 = st.columns([2, 1])
|
| 348 |
+
|
| 349 |
with col1:
|
| 350 |
st.subheader("📈 Top Contributing Factors")
|
| 351 |
|
| 352 |
+
# Create feature mapping for better readability
|
| 353 |
+
def clean_feature_name(feature):
|
| 354 |
+
"""Convert technical feature names to user-friendly labels"""
|
| 355 |
+
# Remove prefixes
|
| 356 |
+
feature = feature.replace('cat__', '').replace('num__', '')
|
| 357 |
+
|
| 358 |
+
# Add emojis and format nicely
|
| 359 |
+
emoji_map = {
|
| 360 |
+
'years_code_pro': '⏱️ Years of Experience',
|
| 361 |
+
'age_group': '👤 Age Group',
|
| 362 |
+
'country_': '🌍 Country: ',
|
| 363 |
+
'remote_work_': '🏠 Work: ',
|
| 364 |
+
'dev_type_': '💻 Role: ',
|
| 365 |
+
'org_size_': '🏢 Company: ',
|
| 366 |
+
'ed_level_': '🎓 Education: ',
|
| 367 |
+
'so_account': '📚 Stack Overflow',
|
| 368 |
+
'ai_select': '🤖 AI Tools'
|
| 369 |
+
}
|
| 370 |
+
|
| 371 |
+
for key, label in emoji_map.items():
|
| 372 |
+
if feature.startswith(key):
|
| 373 |
+
return label + feature.replace(key, '')
|
| 374 |
+
elif feature == key:
|
| 375 |
+
return label.rstrip(': ')
|
| 376 |
+
|
| 377 |
+
return feature
|
| 378 |
+
|
| 379 |
shap_df = pd.DataFrame({
|
| 380 |
'Feature': feature_names,
|
| 381 |
'SHAP Value': shap_values[0],
|
|
|
|
| 384 |
shap_df['Abs SHAP'] = shap_df['SHAP Value'].abs()
|
| 385 |
shap_df = shap_df.sort_values('Abs SHAP', ascending=False).head(10)
|
| 386 |
|
| 387 |
+
# Clean feature names
|
| 388 |
+
shap_df['Feature_Clean'] = shap_df['Feature'].apply(clean_feature_name)
|
| 389 |
+
|
| 390 |
+
# Create visualization with improved styling
|
| 391 |
fig, ax = plt.subplots(figsize=(10, 6))
|
| 392 |
+
|
| 393 |
+
# Modern color scheme
|
| 394 |
+
colors = ['#10b981' if x > 0 else '#ef4444' for x in shap_df['SHAP Value']]
|
| 395 |
+
|
| 396 |
+
# Create bars
|
| 397 |
+
bars = ax.barh(range(len(shap_df)), shap_df['SHAP Value'], color=colors, alpha=0.85, height=0.7)
|
| 398 |
+
|
| 399 |
+
# Add value labels on bars
|
| 400 |
+
for i, (bar, value) in enumerate(zip(bars, shap_df['SHAP Value'])):
|
| 401 |
+
x_pos = value + (500 if value > 0 else -500)
|
| 402 |
+
ax.text(x_pos, i, f'€{abs(value):,.0f}',
|
| 403 |
+
ha='left' if value > 0 else 'right',
|
| 404 |
+
va='center', fontweight='bold', fontsize=10,
|
| 405 |
+
color='#10b981' if value > 0 else '#ef4444')
|
| 406 |
+
|
| 407 |
+
# Set labels with cleaned names
|
| 408 |
ax.set_yticks(range(len(shap_df)))
|
| 409 |
+
ax.set_yticklabels(shap_df['Feature_Clean'], fontsize=10)
|
| 410 |
ax.set_xlabel('Impact on Salary (EUR)', fontsize=11, fontweight='bold')
|
| 411 |
+
ax.set_title('How Different Factors Affect Your Salary',
|
| 412 |
+
fontsize=13, fontweight='bold', pad=20)
|
| 413 |
+
|
| 414 |
+
# Add zero line
|
| 415 |
+
ax.axvline(x=0, color='#64748b', linestyle='-', linewidth=2, alpha=0.5)
|
| 416 |
+
|
| 417 |
+
# Add legend
|
| 418 |
+
from matplotlib.patches import Patch
|
| 419 |
+
legend_elements = [
|
| 420 |
+
Patch(facecolor='#10b981', alpha=0.85, label='Increases Salary'),
|
| 421 |
+
Patch(facecolor='#ef4444', alpha=0.85, label='Decreases Salary')
|
| 422 |
+
]
|
| 423 |
+
ax.legend(handles=legend_elements, loc='lower right', frameon=True,
|
| 424 |
+
fancybox=True, shadow=True, fontsize=10)
|
| 425 |
+
|
| 426 |
+
# Styling
|
| 427 |
+
ax.grid(axis='x', alpha=0.2, linestyle='--')
|
| 428 |
+
ax.set_facecolor('#f8fafc')
|
| 429 |
+
fig.patch.set_facecolor('white')
|
| 430 |
+
ax.spines['top'].set_visible(False)
|
| 431 |
+
ax.spines['right'].set_visible(False)
|
| 432 |
+
|
| 433 |
plt.tight_layout()
|
| 434 |
st.pyplot(fig)
|
| 435 |
+
|
| 436 |
+
# Add explanation box
|
| 437 |
+
st.info("""
|
| 438 |
+
**💡 How to read this chart:**
|
| 439 |
+
- **Green bars** pointing right → These factors *increase* your salary
|
| 440 |
+
- **Red bars** pointing left → These factors *decrease* your salary
|
| 441 |
+
- Longer bars = Bigger impact on your predicted salary
|
| 442 |
+
""")
|
| 443 |
+
|
| 444 |
with col2:
|
| 445 |
st.subheader("📋 Impact Details")
|
| 446 |
|
| 447 |
+
# Format the table with cleaned names
|
| 448 |
+
display_df = shap_df[['Feature_Clean', 'SHAP Value', 'Impact']].copy()
|
| 449 |
+
display_df.columns = ['Factor', 'Impact Amount', 'Direction']
|
| 450 |
+
display_df['Impact Amount'] = display_df['Impact Amount'].apply(
|
| 451 |
+
lambda x: f"€{abs(x):,.0f}"
|
| 452 |
+
)
|
| 453 |
display_df = display_df.reset_index(drop=True)
|
| 454 |
|
| 455 |
st.dataframe(
|
|
|
|
| 660 |
- **Sample**: 7,000+ European professional developers
|
| 661 |
- **Algorithm**: Optimized Random Forest
|
| 662 |
- **Accuracy**: RMSE ~€18,600
|
| 663 |
+
- **Last Updated**: 2025
|
| 664 |
""")
|
| 665 |
|
| 666 |
with col2:
|