Peter512 commited on
Commit
5ef0dc4
·
verified ·
1 Parent(s): b8ffbce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -31
app.py CHANGED
@@ -349,42 +349,66 @@ if st.session_state.get('has_prediction', False):
349
  with col1:
350
  st.subheader("📈 Top Contributing Factors")
351
 
352
- # Create feature mapping for better readability WITH emojis
353
- def clean_feature_name_with_emoji(feature):
354
- """Convert technical feature names to user-friendly labels WITH emojis"""
355
- # Store original for fallback
356
- original = feature
357
-
358
  # Remove prefixes
359
  feature = feature.replace('cat__', '').replace('num__', '').replace('remainder__', '')
360
 
361
- # Simple mappings with emojis
362
  simple_map = {
363
- 'years_code_pro': '⏱️ Years of Experience',
364
- 'age_group': '👤 Age Group',
365
- 'so_account': '📚 Stack Overflow Account',
366
- 'ai_select': '🤖 Uses AI Tools'
367
  }
368
 
369
  if feature in simple_map:
370
  return simple_map[feature]
371
 
372
- # Handle categorical variables with prefixes and emojis
373
  replacements = {
374
- 'country_': ('🌍 Country: ', ''),
375
- 'remote_work_': ('🏠 Work: ', ''),
376
- 'dev_type_': ('💻 Role: ', ''),
377
- 'org_size_': ('🏢 Company Size: ', ''),
378
- 'ed_level_': ('🎓 Education: ', '')
379
  }
380
 
381
- for prefix, (emoji_label, _) in replacements.items():
382
  if prefix in feature:
383
- return emoji_label + feature.replace(prefix, '').replace('_', ' ')
384
 
385
- # Fallback: just clean up underscores and return
386
  return feature.replace('_', ' ').title()
387
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
388
  shap_df = pd.DataFrame({
389
  'Feature': feature_names,
390
  'SHAP Value': shap_values[0],
@@ -393,18 +417,13 @@ if st.session_state.get('has_prediction', False):
393
  shap_df['Abs SHAP'] = shap_df['SHAP Value'].abs()
394
  shap_df = shap_df.sort_values('Abs SHAP', ascending=False).head(10)
395
 
396
- # Clean feature names with emojis
 
397
  shap_df['Feature_Clean_Emoji'] = shap_df['Feature'].apply(clean_feature_name_with_emoji)
398
 
399
  # Create visualization with improved styling
400
  fig, ax = plt.subplots(figsize=(10, 6))
401
 
402
- # Set font that supports emojis (try different options for compatibility)
403
- try:
404
- plt.rcParams['font.family'] = 'DejaVu Sans'
405
- except:
406
- pass
407
-
408
  # Modern color scheme
409
  colors = ['#10b981' if x > 0 else '#ef4444' for x in shap_df['SHAP Value']]
410
 
@@ -436,9 +455,9 @@ if st.session_state.get('has_prediction', False):
436
  fontweight='bold', fontsize=10,
437
  color=color)
438
 
439
- # Set labels with cleaned names WITH EMOJIS
440
  ax.set_yticks(range(len(shap_df)))
441
- ax.set_yticklabels(shap_df['Feature_Clean_Emoji'], fontsize=10)
442
  ax.set_xlabel('Impact on Salary (EUR)', fontsize=11, fontweight='bold')
443
  ax.set_title('How Different Factors Affect Your Salary',
444
  fontsize=13, fontweight='bold', pad=20)
@@ -465,12 +484,13 @@ if st.session_state.get('has_prediction', False):
465
  plt.tight_layout()
466
  st.pyplot(fig)
467
 
468
- # Add explanation box
469
  st.info("""
470
  **💡 How to read this chart:**
471
  - **Green bars** pointing right → These factors *increase* your salary
472
  - **Red bars** pointing left → These factors *decrease* your salary
473
- - Longer bars = Bigger impact on your predicted salary
 
474
  """)
475
 
476
  with col2:
 
349
  with col1:
350
  st.subheader("📈 Top Contributing Factors")
351
 
352
+ # Create feature mapping WITHOUT emojis for chart
353
+ def clean_feature_name(feature):
354
+ """Convert technical feature names to user-friendly labels WITHOUT emojis"""
 
 
 
355
  # Remove prefixes
356
  feature = feature.replace('cat__', '').replace('num__', '').replace('remainder__', '')
357
 
358
+ # Simple mappings
359
  simple_map = {
360
+ 'years_code_pro': 'Years of Experience',
361
+ 'age_group': 'Age Group',
362
+ 'so_account': 'Stack Overflow Account',
363
+ 'ai_select': 'Uses AI Tools'
364
  }
365
 
366
  if feature in simple_map:
367
  return simple_map[feature]
368
 
369
+ # Handle categorical variables
370
  replacements = {
371
+ 'country_': 'Country: ',
372
+ 'remote_work_': 'Work: ',
373
+ 'dev_type_': 'Role: ',
374
+ 'org_size_': 'Company Size: ',
375
+ 'ed_level_': 'Education: '
376
  }
377
 
378
+ for prefix, label in replacements.items():
379
  if prefix in feature:
380
+ return label + feature.replace(prefix, '').replace('_', ' ')
381
 
382
+ # Fallback
383
  return feature.replace('_', ' ').title()
384
 
385
+ # Create emoji version for the table only
386
+ def clean_feature_name_with_emoji(feature):
387
+ """Convert technical feature names to user-friendly labels WITH emojis"""
388
+ base_name = clean_feature_name(feature)
389
+
390
+ # Add emojis based on content
391
+ if 'Years of Experience' in base_name:
392
+ return '⏱️ ' + base_name
393
+ elif 'Age Group' in base_name:
394
+ return '👤 ' + base_name
395
+ elif 'Country:' in base_name:
396
+ return '🌍 ' + base_name
397
+ elif 'Work:' in base_name:
398
+ return '🏠 ' + base_name
399
+ elif 'Role:' in base_name:
400
+ return '💻 ' + base_name
401
+ elif 'Company Size:' in base_name:
402
+ return '🏢 ' + base_name
403
+ elif 'Education:' in base_name:
404
+ return '🎓 ' + base_name
405
+ elif 'Stack Overflow' in base_name:
406
+ return '📚 ' + base_name
407
+ elif 'AI Tools' in base_name:
408
+ return '🤖 ' + base_name
409
+
410
+ return base_name
411
+
412
  shap_df = pd.DataFrame({
413
  'Feature': feature_names,
414
  'SHAP Value': shap_values[0],
 
417
  shap_df['Abs SHAP'] = shap_df['SHAP Value'].abs()
418
  shap_df = shap_df.sort_values('Abs SHAP', ascending=False).head(10)
419
 
420
+ # Clean feature names - NO emojis for chart, WITH emojis for table
421
+ shap_df['Feature_Clean'] = shap_df['Feature'].apply(clean_feature_name)
422
  shap_df['Feature_Clean_Emoji'] = shap_df['Feature'].apply(clean_feature_name_with_emoji)
423
 
424
  # Create visualization with improved styling
425
  fig, ax = plt.subplots(figsize=(10, 6))
426
 
 
 
 
 
 
 
427
  # Modern color scheme
428
  colors = ['#10b981' if x > 0 else '#ef4444' for x in shap_df['SHAP Value']]
429
 
 
455
  fontweight='bold', fontsize=10,
456
  color=color)
457
 
458
+ # Set labels with cleaned names WITHOUT EMOJIS
459
  ax.set_yticks(range(len(shap_df)))
460
+ ax.set_yticklabels(shap_df['Feature_Clean'], fontsize=10)
461
  ax.set_xlabel('Impact on Salary (EUR)', fontsize=11, fontweight='bold')
462
  ax.set_title('How Different Factors Affect Your Salary',
463
  fontsize=13, fontweight='bold', pad=20)
 
484
  plt.tight_layout()
485
  st.pyplot(fig)
486
 
487
+ # Add explanation box with improved text
488
  st.info("""
489
  **💡 How to read this chart:**
490
  - **Green bars** pointing right → These factors *increase* your salary
491
  - **Red bars** pointing left → These factors *decrease* your salary
492
+ - **Longer bars** = Bigger impact on your predicted salary
493
+ - **Why do I see other countries/categories I didn't select?** The chart shows the top 10 most impactful features for your prediction. For categories you *didn't* select (like other countries), a negative value means "not having this characteristic decreases your salary" — in other words, if you had selected that option instead, your salary would be higher. For example, "Country: Germany = -€1,059" means being from Germany would have added €1,059 to your salary.
494
  """)
495
 
496
  with col2: