Peter512 commited on
Commit
b8ffbce
·
verified ·
1 Parent(s): a7a06f6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -51
app.py CHANGED
@@ -349,70 +349,42 @@ if st.session_state.get('has_prediction', False):
349
  with col1:
350
  st.subheader("📈 Top Contributing Factors")
351
 
352
- # Create feature mapping for better readability
353
- def clean_feature_name(feature):
354
- """Convert technical feature names to user-friendly labels"""
355
  # Store original for fallback
356
  original = feature
357
 
358
  # Remove prefixes
359
  feature = feature.replace('cat__', '').replace('num__', '').replace('remainder__', '')
360
 
361
- # Simple mappings
362
  simple_map = {
363
- 'years_code_pro': 'Years of Experience',
364
- 'age_group': 'Age Group',
365
- 'so_account': 'Stack Overflow Account',
366
- 'ai_select': 'Uses AI Tools'
367
  }
368
 
369
  if feature in simple_map:
370
  return simple_map[feature]
371
 
372
- # Handle categorical variables with prefixes
373
  replacements = {
374
- 'country_': 'Country: ',
375
- 'remote_work_': 'Work: ',
376
- 'dev_type_': 'Role: ',
377
- 'org_size_': 'Company: ',
378
- 'ed_level_': 'Education: '
379
  }
380
 
381
- for prefix, label in replacements.items():
382
  if prefix in feature:
383
- return label + feature.replace(prefix, '').replace('_', ' ')
384
 
385
  # Fallback: just clean up underscores and return
386
  return feature.replace('_', ' ').title()
387
 
388
- # Create emoji version for the table only
389
- def clean_feature_name_with_emoji(feature):
390
- """Convert technical feature names to user-friendly labels WITH emojis"""
391
- # Get base cleaned name
392
- base_name = clean_feature_name(feature)
393
-
394
- # Add emojis based on content
395
- if 'Years of Experience' in base_name:
396
- return '⏱️ ' + base_name
397
- elif 'Age Group' in base_name:
398
- return '👤 ' + base_name
399
- elif 'Country:' in base_name:
400
- return '🌍 ' + base_name
401
- elif 'Work:' in base_name:
402
- return '🏠 ' + base_name
403
- elif 'Role:' in base_name:
404
- return '💻 ' + base_name
405
- elif 'Company:' in base_name:
406
- return '🏢 ' + base_name
407
- elif 'Education:' in base_name:
408
- return '🎓 ' + base_name
409
- elif 'Stack Overflow' in base_name:
410
- return '📚 ' + base_name
411
- elif 'AI Tools' in base_name:
412
- return '🤖 ' + base_name
413
-
414
- return base_name
415
-
416
  shap_df = pd.DataFrame({
417
  'Feature': feature_names,
418
  'SHAP Value': shap_values[0],
@@ -421,16 +393,18 @@ if st.session_state.get('has_prediction', False):
421
  shap_df['Abs SHAP'] = shap_df['SHAP Value'].abs()
422
  shap_df = shap_df.sort_values('Abs SHAP', ascending=False).head(10)
423
 
424
- # Clean feature names
425
- shap_df['Feature_Clean'] = shap_df['Feature'].apply(clean_feature_name)
426
  shap_df['Feature_Clean_Emoji'] = shap_df['Feature'].apply(clean_feature_name_with_emoji)
427
 
428
- # Debug: Print to verify (optional - remove in production)
429
- # st.write("Debug - Feature names:", list(zip(shap_df['Feature'], shap_df['Feature_Clean'])))
430
-
431
  # Create visualization with improved styling
432
  fig, ax = plt.subplots(figsize=(10, 6))
433
 
 
 
 
 
 
 
434
  # Modern color scheme
435
  colors = ['#10b981' if x > 0 else '#ef4444' for x in shap_df['SHAP Value']]
436
 
@@ -462,9 +436,9 @@ if st.session_state.get('has_prediction', False):
462
  fontweight='bold', fontsize=10,
463
  color=color)
464
 
465
- # Set labels with cleaned names (no emojis)
466
  ax.set_yticks(range(len(shap_df)))
467
- ax.set_yticklabels(shap_df['Feature_Clean'], fontsize=10)
468
  ax.set_xlabel('Impact on Salary (EUR)', fontsize=11, fontweight='bold')
469
  ax.set_title('How Different Factors Affect Your Salary',
470
  fontsize=13, fontweight='bold', pad=20)
 
349
  with col1:
350
  st.subheader("📈 Top Contributing Factors")
351
 
352
+ # Create feature mapping for better readability WITH emojis
353
+ def clean_feature_name_with_emoji(feature):
354
+ """Convert technical feature names to user-friendly labels WITH emojis"""
355
  # Store original for fallback
356
  original = feature
357
 
358
  # Remove prefixes
359
  feature = feature.replace('cat__', '').replace('num__', '').replace('remainder__', '')
360
 
361
+ # Simple mappings with emojis
362
  simple_map = {
363
+ 'years_code_pro': '⏱️ Years of Experience',
364
+ 'age_group': '👤 Age Group',
365
+ 'so_account': '📚 Stack Overflow Account',
366
+ 'ai_select': '🤖 Uses AI Tools'
367
  }
368
 
369
  if feature in simple_map:
370
  return simple_map[feature]
371
 
372
+ # Handle categorical variables with prefixes and emojis
373
  replacements = {
374
+ 'country_': ('🌍 Country: ', ''),
375
+ 'remote_work_': ('🏠 Work: ', ''),
376
+ 'dev_type_': ('💻 Role: ', ''),
377
+ 'org_size_': ('🏢 Company Size: ', ''),
378
+ 'ed_level_': ('🎓 Education: ', '')
379
  }
380
 
381
+ for prefix, (emoji_label, _) in replacements.items():
382
  if prefix in feature:
383
+ return emoji_label + feature.replace(prefix, '').replace('_', ' ')
384
 
385
  # Fallback: just clean up underscores and return
386
  return feature.replace('_', ' ').title()
387
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
388
  shap_df = pd.DataFrame({
389
  'Feature': feature_names,
390
  'SHAP Value': shap_values[0],
 
393
  shap_df['Abs SHAP'] = shap_df['SHAP Value'].abs()
394
  shap_df = shap_df.sort_values('Abs SHAP', ascending=False).head(10)
395
 
396
+ # Clean feature names with emojis
 
397
  shap_df['Feature_Clean_Emoji'] = shap_df['Feature'].apply(clean_feature_name_with_emoji)
398
 
 
 
 
399
  # Create visualization with improved styling
400
  fig, ax = plt.subplots(figsize=(10, 6))
401
 
402
+ # Set font that supports emojis (try different options for compatibility)
403
+ try:
404
+ plt.rcParams['font.family'] = 'DejaVu Sans'
405
+ except:
406
+ pass
407
+
408
  # Modern color scheme
409
  colors = ['#10b981' if x > 0 else '#ef4444' for x in shap_df['SHAP Value']]
410
 
 
436
  fontweight='bold', fontsize=10,
437
  color=color)
438
 
439
+ # Set labels with cleaned names WITH EMOJIS
440
  ax.set_yticks(range(len(shap_df)))
441
+ ax.set_yticklabels(shap_df['Feature_Clean_Emoji'], fontsize=10)
442
  ax.set_xlabel('Impact on Salary (EUR)', fontsize=11, fontweight='bold')
443
  ax.set_title('How Different Factors Affect Your Salary',
444
  fontsize=13, fontweight='bold', pad=20)