Spaces:

Zirok05
/

CreditScoringSystemSimulation

Running

File size: 8,451 Bytes

bd19734

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

FEATURE_DESCRIPTIONS = { ... }

def get_feature_display_name(feature_name):
    if feature_name in FEATURE_DESCRIPTIONS:
        return FEATURE_DESCRIPTIONS[feature_name]
    name = feature_name.replace('_', ' ').title()
    name = name.replace('Over', '>')
    name = name.replace('Loans', 'Кредитов')
    return name


def interpret_lr(features, lr_model, feature_names):
    """Интерпретация логистической регрессии"""
    if isinstance(features, np.ndarray):
        features = pd.DataFrame(features, columns=feature_names)
    coefficients = lr_model.coef_[0]
    intercept = lr_model.intercept_[0]

    importance_df = pd.DataFrame({
        'feature': feature_names,
        'coefficient': coefficients,
        'value': features.iloc[0].values
    })
    importance_df['logit_contribution'] = importance_df['coefficient'] * importance_df['value']
    importance_df['abs_logit'] = abs(importance_df['logit_contribution'])
    importance_df = importance_df.sort_values('abs_logit', ascending=False)

    base_proba = lr_model.predict_proba(features)[0, 1]
    marginal_effects = []
    features_array = features.values

    for i, feature in enumerate(feature_names):
        features_zero = features_array.copy()
        features_zero[0, i] = 0
        zero_proba = lr_model.predict_proba(features_zero)[0, 1]
        marginal_effect = base_proba - zero_proba
        marginal_effects.append({
            'feature': feature,
            'marginal_effect': marginal_effect,
            'abs_marginal': abs(marginal_effect)
        })

    marginal_df = pd.DataFrame(marginal_effects).sort_values('abs_marginal', ascending=False)

    logit = intercept + importance_df['logit_contribution'].sum()
    proba = 1 / (1 + np.exp(-logit))

    return {
        'logit_contributions': importance_df,
        'marginal_effects': marginal_df,
        'probability': proba,
        'logit': logit,
        'intercept': intercept
    }

def plot_feature_importance_sns(importance_df, value_col='logit_contribution', title="Вклад признаков в логит"):
    df = importance_df.head(10).copy()
    df = df.sort_values(value_col, ascending=True)

    fig, ax = plt.subplots(figsize=(10, 6), facecolor='#f8f9fa')
    ax.set_facecolor('#f8f9fa')

    colors = ['#d7191c' if x > 0 else '#1a9641' if x < 0 else '#ffffbf' for x in df[value_col]]
    bars = ax.barh(df['feature'], df[value_col], color=colors, edgecolor='white', linewidth=1.5, alpha=0.9)

    for bar, val in zip(bars, df[value_col]):
        if abs(val) > 0.02:
            x_pos = val - 0.02 if val > 0 else val + 0.02
            ha = 'right' if val > 0 else 'left'
            ax.text(x_pos, bar.get_y() + bar.get_height() / 2, f'{val:.3f}', ha=ha, va='center', fontsize=9)

    ax.axvline(x=0, color='#495057', linestyle='-', linewidth=1, alpha=0.3)
    ax.grid(axis='x', alpha=0.15, linestyle='--', color='#adb5bd')
    ax.set_axisbelow(True)
    ax.set_xlabel('Вклад в логит', fontsize=11)
    ax.set_ylabel('')
    ax.set_title(title, fontsize=12, fontweight='bold', pad=15)
    ax.set_yticklabels([get_feature_display_name(x) for x in df['feature']], fontsize=10)
    ax.set_yticklabels([get_feature_display_name(x) for x in df['feature']], fontsize=10)
    sns.despine(top=True, right=True, left=False, bottom=False)
    plt.tight_layout()
    return fig

def plot_marginal_effects_sns(marginal_df, title="Влияние на вероятность дефолта"):
    df = marginal_df.head(10).copy()
    df = df.sort_values('marginal_effect', ascending=True)

    fig, ax = plt.subplots(figsize=(10, 6), facecolor='#f8f9fa')
    ax.set_facecolor('#f8f9fa')

    colors = ['#d7191c' if x > 0 else '#1a9641' if x < 0 else '#ffffbf' for x in df['marginal_effect']]
    bars = ax.barh(df['feature'], df['marginal_effect'], color=colors, edgecolor='white', linewidth=1.5, alpha=0.9)

    for bar, val in zip(bars, df['marginal_effect']):
        if abs(val) > 0.01:
            x_pos = val - 0.01 if val > 0 else val + 0.01
            ha = 'right' if val > 0 else 'left'
            ax.text(x_pos, bar.get_y() + bar.get_height() / 2, f'{val:.1%}', ha=ha, va='center', fontsize=9)

    ax.axvline(x=0, color='#495057', linestyle='-', linewidth=1, alpha=0.3)
    ax.grid(axis='x', alpha=0.15, linestyle='--', color='#adb5bd')
    ax.set_axisbelow(True)
    ax.set_xlabel('Изменение вероятности', fontsize=11)
    ax.set_ylabel('')
    ax.set_title(title, fontsize=12, fontweight='bold', pad=15)
    ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'{x:.0%}'))
    ax.set_yticklabels([get_feature_display_name(x) for x in df['feature']], fontsize=10)
    sns.despine(top=True, right=True, left=False, bottom=False)
    plt.tight_layout()
    return fig


def plot_shap_analysis(second_model, processed_scaled, feature_names, second_model_name):
    """Отображение SHAP анализа для tree-based моделей"""
    import streamlit as st
    st.markdown("---")
    st.subheader(f"⚡ Детальный анализ: {second_model_name} (SHAP)")

    with st.spinner("🔄 Рассчитываем SHAP значения..."):
        try:
            import shap

            # Создаем explainer и считаем SHAP
            explainer = shap.TreeExplainer(second_model)
            shap_values = explainer.shap_values(processed_scaled)

            # Для бинарной классификации
            if isinstance(shap_values, list):
                shap_values = shap_values[1]

            # 1. Waterfall plot
            fig, ax = plt.subplots(figsize=(12, 7))
            shap.waterfall_plot(
                shap.Explanation(
                    values=shap_values[0],
                    base_values=explainer.expected_value,
                    data=processed_scaled.iloc[0].values,
                    feature_names=feature_names
                ),
                show=False,
            )
            plt.tight_layout()
            st.pyplot(fig)

            # 2. Объяснение как читать график
            with st.expander("📋 Как читать SHAP график?"):
                st.markdown("""
                - **f(x)** = итоговое предсказание модели
                - **base value** = среднее предсказание по всем клиентам
                - 🔴 Красное → признаки, повышающие риск
                - 🔵 Синее → признаки, снижающие риск
                """)

            # 3. Таблица с SHAP значениями
            shap_df = pd.DataFrame({
                'feature': feature_names,
                'shap_value': shap_values[0],
                'abs_shap': abs(shap_values[0])
            }).sort_values('abs_shap', ascending=False)

            shap_df['description'] = shap_df['feature'].apply(get_feature_display_name)

            st.markdown("### 📋 Факторы, влияющие на решение:")

            col1, col2 = st.columns(2)

            with col1:
                pos = shap_df[shap_df['shap_value'] > 0].head(5)
                if len(pos) > 0:
                    st.markdown("**🔴 Повышают риск:**")
                    for _, row in pos.iterrows():
                        st.markdown(f"- {row['description']}: +{row['shap_value']:.3f}")

            with col2:
                neg = shap_df[shap_df['shap_value'] < 0].head(5)
                if len(neg) > 0:
                    st.markdown("**🟢 Снижают риск:**")
                    for _, row in neg.iterrows():
                        st.markdown(f"- {row['description']}: {row['shap_value']:.3f}")

            with st.expander("📋 Все SHAP значения"):
                display_df = shap_df[['feature', 'description', 'shap_value']].copy()
                display_df.columns = ['Признак', 'Описание', 'SHAP']
                display_df['SHAP'] = display_df['SHAP'].round(3)
                st.dataframe(display_df.sort_values('SHAP', ascending=False), width='stretch')

        except Exception as e:
            st.error(f"❌ Ошибка SHAP: {e}")
            st.info("Установите shap: `pip install shap`")