Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -349,70 +349,42 @@ if st.session_state.get('has_prediction', False):
|
|
| 349 |
with col1:
|
| 350 |
st.subheader("📈 Top Contributing Factors")
|
| 351 |
|
| 352 |
-
# Create feature mapping for better readability
|
| 353 |
-
def
|
| 354 |
-
"""Convert technical feature names to user-friendly labels"""
|
| 355 |
# Store original for fallback
|
| 356 |
original = feature
|
| 357 |
|
| 358 |
# Remove prefixes
|
| 359 |
feature = feature.replace('cat__', '').replace('num__', '').replace('remainder__', '')
|
| 360 |
|
| 361 |
-
# Simple mappings
|
| 362 |
simple_map = {
|
| 363 |
-
'years_code_pro': 'Years of Experience',
|
| 364 |
-
'age_group': 'Age Group',
|
| 365 |
-
'so_account': 'Stack Overflow Account',
|
| 366 |
-
'ai_select': 'Uses AI Tools'
|
| 367 |
}
|
| 368 |
|
| 369 |
if feature in simple_map:
|
| 370 |
return simple_map[feature]
|
| 371 |
|
| 372 |
-
# Handle categorical variables with prefixes
|
| 373 |
replacements = {
|
| 374 |
-
'country_': 'Country: ',
|
| 375 |
-
'remote_work_': 'Work: ',
|
| 376 |
-
'dev_type_': 'Role: ',
|
| 377 |
-
'org_size_': 'Company: ',
|
| 378 |
-
'ed_level_': 'Education: '
|
| 379 |
}
|
| 380 |
|
| 381 |
-
for prefix,
|
| 382 |
if prefix in feature:
|
| 383 |
-
return
|
| 384 |
|
| 385 |
# Fallback: just clean up underscores and return
|
| 386 |
return feature.replace('_', ' ').title()
|
| 387 |
|
| 388 |
-
# Create emoji version for the table only
|
| 389 |
-
def clean_feature_name_with_emoji(feature):
|
| 390 |
-
"""Convert technical feature names to user-friendly labels WITH emojis"""
|
| 391 |
-
# Get base cleaned name
|
| 392 |
-
base_name = clean_feature_name(feature)
|
| 393 |
-
|
| 394 |
-
# Add emojis based on content
|
| 395 |
-
if 'Years of Experience' in base_name:
|
| 396 |
-
return '⏱️ ' + base_name
|
| 397 |
-
elif 'Age Group' in base_name:
|
| 398 |
-
return '👤 ' + base_name
|
| 399 |
-
elif 'Country:' in base_name:
|
| 400 |
-
return '🌍 ' + base_name
|
| 401 |
-
elif 'Work:' in base_name:
|
| 402 |
-
return '🏠 ' + base_name
|
| 403 |
-
elif 'Role:' in base_name:
|
| 404 |
-
return '💻 ' + base_name
|
| 405 |
-
elif 'Company:' in base_name:
|
| 406 |
-
return '🏢 ' + base_name
|
| 407 |
-
elif 'Education:' in base_name:
|
| 408 |
-
return '🎓 ' + base_name
|
| 409 |
-
elif 'Stack Overflow' in base_name:
|
| 410 |
-
return '📚 ' + base_name
|
| 411 |
-
elif 'AI Tools' in base_name:
|
| 412 |
-
return '🤖 ' + base_name
|
| 413 |
-
|
| 414 |
-
return base_name
|
| 415 |
-
|
| 416 |
shap_df = pd.DataFrame({
|
| 417 |
'Feature': feature_names,
|
| 418 |
'SHAP Value': shap_values[0],
|
|
@@ -421,16 +393,18 @@ if st.session_state.get('has_prediction', False):
|
|
| 421 |
shap_df['Abs SHAP'] = shap_df['SHAP Value'].abs()
|
| 422 |
shap_df = shap_df.sort_values('Abs SHAP', ascending=False).head(10)
|
| 423 |
|
| 424 |
-
# Clean feature names
|
| 425 |
-
shap_df['Feature_Clean'] = shap_df['Feature'].apply(clean_feature_name)
|
| 426 |
shap_df['Feature_Clean_Emoji'] = shap_df['Feature'].apply(clean_feature_name_with_emoji)
|
| 427 |
|
| 428 |
-
# Debug: Print to verify (optional - remove in production)
|
| 429 |
-
# st.write("Debug - Feature names:", list(zip(shap_df['Feature'], shap_df['Feature_Clean'])))
|
| 430 |
-
|
| 431 |
# Create visualization with improved styling
|
| 432 |
fig, ax = plt.subplots(figsize=(10, 6))
|
| 433 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 434 |
# Modern color scheme
|
| 435 |
colors = ['#10b981' if x > 0 else '#ef4444' for x in shap_df['SHAP Value']]
|
| 436 |
|
|
@@ -462,9 +436,9 @@ if st.session_state.get('has_prediction', False):
|
|
| 462 |
fontweight='bold', fontsize=10,
|
| 463 |
color=color)
|
| 464 |
|
| 465 |
-
# Set labels with cleaned names
|
| 466 |
ax.set_yticks(range(len(shap_df)))
|
| 467 |
-
ax.set_yticklabels(shap_df['
|
| 468 |
ax.set_xlabel('Impact on Salary (EUR)', fontsize=11, fontweight='bold')
|
| 469 |
ax.set_title('How Different Factors Affect Your Salary',
|
| 470 |
fontsize=13, fontweight='bold', pad=20)
|
|
|
|
| 349 |
with col1:
|
| 350 |
st.subheader("📈 Top Contributing Factors")
|
| 351 |
|
| 352 |
+
# Create feature mapping for better readability WITH emojis
|
| 353 |
+
def clean_feature_name_with_emoji(feature):
|
| 354 |
+
"""Convert technical feature names to user-friendly labels WITH emojis"""
|
| 355 |
# Store original for fallback
|
| 356 |
original = feature
|
| 357 |
|
| 358 |
# Remove prefixes
|
| 359 |
feature = feature.replace('cat__', '').replace('num__', '').replace('remainder__', '')
|
| 360 |
|
| 361 |
+
# Simple mappings with emojis
|
| 362 |
simple_map = {
|
| 363 |
+
'years_code_pro': '⏱️ Years of Experience',
|
| 364 |
+
'age_group': '👤 Age Group',
|
| 365 |
+
'so_account': '📚 Stack Overflow Account',
|
| 366 |
+
'ai_select': '🤖 Uses AI Tools'
|
| 367 |
}
|
| 368 |
|
| 369 |
if feature in simple_map:
|
| 370 |
return simple_map[feature]
|
| 371 |
|
| 372 |
+
# Handle categorical variables with prefixes and emojis
|
| 373 |
replacements = {
|
| 374 |
+
'country_': ('🌍 Country: ', ''),
|
| 375 |
+
'remote_work_': ('🏠 Work: ', ''),
|
| 376 |
+
'dev_type_': ('💻 Role: ', ''),
|
| 377 |
+
'org_size_': ('🏢 Company Size: ', ''),
|
| 378 |
+
'ed_level_': ('🎓 Education: ', '')
|
| 379 |
}
|
| 380 |
|
| 381 |
+
for prefix, (emoji_label, _) in replacements.items():
|
| 382 |
if prefix in feature:
|
| 383 |
+
return emoji_label + feature.replace(prefix, '').replace('_', ' ')
|
| 384 |
|
| 385 |
# Fallback: just clean up underscores and return
|
| 386 |
return feature.replace('_', ' ').title()
|
| 387 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 388 |
shap_df = pd.DataFrame({
|
| 389 |
'Feature': feature_names,
|
| 390 |
'SHAP Value': shap_values[0],
|
|
|
|
| 393 |
shap_df['Abs SHAP'] = shap_df['SHAP Value'].abs()
|
| 394 |
shap_df = shap_df.sort_values('Abs SHAP', ascending=False).head(10)
|
| 395 |
|
| 396 |
+
# Clean feature names with emojis
|
|
|
|
| 397 |
shap_df['Feature_Clean_Emoji'] = shap_df['Feature'].apply(clean_feature_name_with_emoji)
|
| 398 |
|
|
|
|
|
|
|
|
|
|
| 399 |
# Create visualization with improved styling
|
| 400 |
fig, ax = plt.subplots(figsize=(10, 6))
|
| 401 |
|
| 402 |
+
# Set font that supports emojis (try different options for compatibility)
|
| 403 |
+
try:
|
| 404 |
+
plt.rcParams['font.family'] = 'DejaVu Sans'
|
| 405 |
+
except:
|
| 406 |
+
pass
|
| 407 |
+
|
| 408 |
# Modern color scheme
|
| 409 |
colors = ['#10b981' if x > 0 else '#ef4444' for x in shap_df['SHAP Value']]
|
| 410 |
|
|
|
|
| 436 |
fontweight='bold', fontsize=10,
|
| 437 |
color=color)
|
| 438 |
|
| 439 |
+
# Set labels with cleaned names WITH EMOJIS
|
| 440 |
ax.set_yticks(range(len(shap_df)))
|
| 441 |
+
ax.set_yticklabels(shap_df['Feature_Clean_Emoji'], fontsize=10)
|
| 442 |
ax.set_xlabel('Impact on Salary (EUR)', fontsize=11, fontweight='bold')
|
| 443 |
ax.set_title('How Different Factors Affect Your Salary',
|
| 444 |
fontsize=13, fontweight='bold', pad=20)
|