Peter512 commited on
Commit
ee536ca
·
verified ·
1 Parent(s): 1add186

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -13
app.py CHANGED
@@ -345,10 +345,37 @@ if st.session_state.get('has_prediction', False):
345
 
346
  # Feature contribution table
347
  col1, col2 = st.columns([2, 1])
348
-
349
  with col1:
350
  st.subheader("📈 Top Contributing Factors")
351
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
352
  shap_df = pd.DataFrame({
353
  'Feature': feature_names,
354
  'SHAP Value': shap_values[0],
@@ -357,25 +384,72 @@ if st.session_state.get('has_prediction', False):
357
  shap_df['Abs SHAP'] = shap_df['SHAP Value'].abs()
358
  shap_df = shap_df.sort_values('Abs SHAP', ascending=False).head(10)
359
 
360
- # Create visualization
 
 
 
361
  fig, ax = plt.subplots(figsize=(10, 6))
362
- colors = ['#2ecc71' if x > 0 else '#e74c3c' for x in shap_df['SHAP Value']]
363
- ax.barh(range(len(shap_df)), shap_df['SHAP Value'], color=colors, alpha=0.8)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
364
  ax.set_yticks(range(len(shap_df)))
365
- ax.set_yticklabels(shap_df['Feature'], fontsize=10)
366
  ax.set_xlabel('Impact on Salary (EUR)', fontsize=11, fontweight='bold')
367
- ax.set_title('Top 10 Feature Impacts', fontsize=13, fontweight='bold', pad=20)
368
- ax.axvline(x=0, color='black', linestyle='-', linewidth=1)
369
- ax.grid(axis='x', alpha=0.3)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
  plt.tight_layout()
371
  st.pyplot(fig)
372
-
 
 
 
 
 
 
 
 
373
  with col2:
374
  st.subheader("📋 Impact Details")
375
 
376
- # Format the table
377
- display_df = shap_df[['Feature', 'SHAP Value', 'Impact']].copy()
378
- display_df['SHAP Value'] = display_df['SHAP Value'].apply(lambda x: f"€{x:,.0f}")
 
 
 
379
  display_df = display_df.reset_index(drop=True)
380
 
381
  st.dataframe(
@@ -586,7 +660,7 @@ else:
586
  - **Sample**: 7,000+ European professional developers
587
  - **Algorithm**: Optimized Random Forest
588
  - **Accuracy**: RMSE ~€18,600
589
- - **Last Updated**: 2024
590
  """)
591
 
592
  with col2:
 
345
 
346
  # Feature contribution table
347
  col1, col2 = st.columns([2, 1])
348
+
349
  with col1:
350
  st.subheader("📈 Top Contributing Factors")
351
 
352
+ # Create feature mapping for better readability
353
+ def clean_feature_name(feature):
354
+ """Convert technical feature names to user-friendly labels"""
355
+ # Remove prefixes
356
+ feature = feature.replace('cat__', '').replace('num__', '')
357
+
358
+ # Add emojis and format nicely
359
+ emoji_map = {
360
+ 'years_code_pro': '⏱️ Years of Experience',
361
+ 'age_group': '👤 Age Group',
362
+ 'country_': '🌍 Country: ',
363
+ 'remote_work_': '🏠 Work: ',
364
+ 'dev_type_': '💻 Role: ',
365
+ 'org_size_': '🏢 Company: ',
366
+ 'ed_level_': '🎓 Education: ',
367
+ 'so_account': '📚 Stack Overflow',
368
+ 'ai_select': '🤖 AI Tools'
369
+ }
370
+
371
+ for key, label in emoji_map.items():
372
+ if feature.startswith(key):
373
+ return label + feature.replace(key, '')
374
+ elif feature == key:
375
+ return label.rstrip(': ')
376
+
377
+ return feature
378
+
379
  shap_df = pd.DataFrame({
380
  'Feature': feature_names,
381
  'SHAP Value': shap_values[0],
 
384
  shap_df['Abs SHAP'] = shap_df['SHAP Value'].abs()
385
  shap_df = shap_df.sort_values('Abs SHAP', ascending=False).head(10)
386
 
387
+ # Clean feature names
388
+ shap_df['Feature_Clean'] = shap_df['Feature'].apply(clean_feature_name)
389
+
390
+ # Create visualization with improved styling
391
  fig, ax = plt.subplots(figsize=(10, 6))
392
+
393
+ # Modern color scheme
394
+ colors = ['#10b981' if x > 0 else '#ef4444' for x in shap_df['SHAP Value']]
395
+
396
+ # Create bars
397
+ bars = ax.barh(range(len(shap_df)), shap_df['SHAP Value'], color=colors, alpha=0.85, height=0.7)
398
+
399
+ # Add value labels on bars
400
+ for i, (bar, value) in enumerate(zip(bars, shap_df['SHAP Value'])):
401
+ x_pos = value + (500 if value > 0 else -500)
402
+ ax.text(x_pos, i, f'€{abs(value):,.0f}',
403
+ ha='left' if value > 0 else 'right',
404
+ va='center', fontweight='bold', fontsize=10,
405
+ color='#10b981' if value > 0 else '#ef4444')
406
+
407
+ # Set labels with cleaned names
408
  ax.set_yticks(range(len(shap_df)))
409
+ ax.set_yticklabels(shap_df['Feature_Clean'], fontsize=10)
410
  ax.set_xlabel('Impact on Salary (EUR)', fontsize=11, fontweight='bold')
411
+ ax.set_title('How Different Factors Affect Your Salary',
412
+ fontsize=13, fontweight='bold', pad=20)
413
+
414
+ # Add zero line
415
+ ax.axvline(x=0, color='#64748b', linestyle='-', linewidth=2, alpha=0.5)
416
+
417
+ # Add legend
418
+ from matplotlib.patches import Patch
419
+ legend_elements = [
420
+ Patch(facecolor='#10b981', alpha=0.85, label='Increases Salary'),
421
+ Patch(facecolor='#ef4444', alpha=0.85, label='Decreases Salary')
422
+ ]
423
+ ax.legend(handles=legend_elements, loc='lower right', frameon=True,
424
+ fancybox=True, shadow=True, fontsize=10)
425
+
426
+ # Styling
427
+ ax.grid(axis='x', alpha=0.2, linestyle='--')
428
+ ax.set_facecolor('#f8fafc')
429
+ fig.patch.set_facecolor('white')
430
+ ax.spines['top'].set_visible(False)
431
+ ax.spines['right'].set_visible(False)
432
+
433
  plt.tight_layout()
434
  st.pyplot(fig)
435
+
436
+ # Add explanation box
437
+ st.info("""
438
+ **💡 How to read this chart:**
439
+ - **Green bars** pointing right → These factors *increase* your salary
440
+ - **Red bars** pointing left → These factors *decrease* your salary
441
+ - Longer bars = Bigger impact on your predicted salary
442
+ """)
443
+
444
  with col2:
445
  st.subheader("📋 Impact Details")
446
 
447
+ # Format the table with cleaned names
448
+ display_df = shap_df[['Feature_Clean', 'SHAP Value', 'Impact']].copy()
449
+ display_df.columns = ['Factor', 'Impact Amount', 'Direction']
450
+ display_df['Impact Amount'] = display_df['Impact Amount'].apply(
451
+ lambda x: f"€{abs(x):,.0f}"
452
+ )
453
  display_df = display_df.reset_index(drop=True)
454
 
455
  st.dataframe(
 
660
  - **Sample**: 7,000+ European professional developers
661
  - **Algorithm**: Optimized Random Forest
662
  - **Accuracy**: RMSE ~€18,600
663
+ - **Last Updated**: 2025
664
  """)
665
 
666
  with col2: