Spaces:

Zirok05
/

CreditScoringSystemSimulation

Running

App Files Files Community

Artem Zmailov commited on Feb 23

Commit

af457d2

1 Parent(s): a81fc0a

Final cleanup: all heavy files moved to LFS

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.DS_Store +0 -0
.gitattributes +1 -0
app/.DS_Store +0 -0
app/.streamlit/config.toml +2 -0
app/__init__.py +0 -0
app/__pycache__/__init__.cpython-311.pyc +0 -0
app/main.py +33 -0
app/models/__pycache__/escalation.cpython-311.pyc +0 -0
app/models/__pycache__/interpretation.cpython-311.pyc +0 -0
app/models/escalation.py +267 -0
app/models/interpretation.py +194 -0
app/pages/__pycache__/application.cpython-311.pyc +0 -0
app/pages/__pycache__/simulation.cpython-311.pyc +0 -0
app/pages/application.py +329 -0
app/pages/simulation.py +345 -0
app/simulation/.DS_Store +0 -0
app/simulation/__init__.py +0 -0
app/simulation/__pycache__/__init__.cpython-311.pyc +0 -0
app/simulation/controllers/__init__.py +0 -0
app/simulation/controllers/__pycache__/__init__.cpython-311.pyc +0 -0
app/simulation/controllers/__pycache__/base.cpython-311.pyc +0 -0
app/simulation/controllers/__pycache__/pid.cpython-311.pyc +0 -0
app/simulation/controllers/base.py +28 -0
app/simulation/controllers/pid.py +129 -0
app/simulation/core/__init__.py +0 -0
app/simulation/core/__pycache__/__init__.cpython-311.pyc +0 -0
app/simulation/core/__pycache__/processor.cpython-311.pyc +0 -0
app/simulation/core/__pycache__/traffic_generator.cpython-311.pyc +0 -0
app/simulation/core/processor.py +339 -0
app/simulation/core/traffic_generator.py +234 -0
app/simulation/visualization/__init__.py +0 -0
app/simulation/visualization/__pycache__/__init__.cpython-311.pyc +0 -0
app/simulation/visualization/__pycache__/animation.cpython-311.pyc +0 -0
app/simulation/visualization/__pycache__/plots.cpython-311.pyc +0 -0
app/simulation/visualization/animation.py +246 -0
app/simulation/visualization/plots.py +374 -0
app/simulation/visualization/simulation_20:11.gif +0 -0
app/simulation/visualization/simulation_20:19.gif +0 -0
app/simulation/visualization/simulation_20:25.gif +0 -0
app/simulation/visualization/simulation_20:30.gif +0 -0
app/utils/__pycache__/credit_preprocessor.cpython-311.pyc +0 -0
app/utils/__pycache__/data_loader.cpython-311.pyc +0 -0
app/utils/credit_preprocessor.py +329 -0
app/utils/data_loader.py +26 -0
catboost_info/catboost_training.json +104 -0
catboost_info/learn/events.out.tfevents +3 -0
catboost_info/learn_error.tsv +101 -0
catboost_info/time_left.tsv +101 -0
catboost_info/tmp/cat_feature_index.17f2f383-11eeddeb-4a42fe68-9d5bbcbb.tmp +0 -0
datasets/.DS_Store +0 -0

.DS_Store ADDED Viewed

Binary file (12.3 kB). View file

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.csv filter=lfs diff=lfs merge=lfs -text

app/.DS_Store ADDED Viewed

Binary file (10.2 kB). View file

app/.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ [client]
2	+ showSidebarNavigation = false

app/__init__.py ADDED Viewed

File without changes

app/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (169 Bytes). View file

app/main.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import streamlit as st
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(__file__)))
+st.set_page_config(
+    page_title="GiveMeSomeCredit",
+    page_icon="🏦",
+    layout="wide",
+    initial_sidebar_state="collapsed"  # ← сворачивает сайдбар по умолчанию
+)
+st.title("🏦 GiveMeSomeCredit - Кредитный скоринг")
+st.markdown("---")
+col1, col2 = st.columns(2)
+with col1:
+    st.subheader("📝 Анкета")
+    if st.button("Перейти к анкете"):
+        st.switch_page("pages/application.py")  # ← вызовет main()
+with col2:
+    st.subheader("📊 Симуляция")
+    if st.button("Перейти к симуляции"):
+        st.switch_page("pages/simulation.py")  # ← вызовет main()
+st.markdown("---")
+# streamlit run app/main.py

app/models/__pycache__/escalation.cpython-311.pyc ADDED Viewed

Binary file (10.9 kB). View file

app/models/__pycache__/interpretation.cpython-311.pyc ADDED Viewed

Binary file (14.9 kB). View file

app/models/escalation.py ADDED Viewed

	@@ -0,0 +1,267 @@

+import numpy as np
+def check_business_rules(df):
+    """
+    Батчевая проверка бизнес-правил
+    Возвращает:
+    - manual_mask: булев массив (True = в ручной разбор)
+    - auto_reject_mask: булев массив (True = сразу отказ)
+    - messages: массив сообщений
+    - auto_decisions: массив решений для auto_reject_mask (всегда 1 - отказ)
+    """
+    n = len(df)
+    manual_mask = np.zeros(n, dtype=bool)
+    auto_reject_mask = np.zeros(n, dtype=bool)
+    messages = [''] * n
+    auto_decisions = np.zeros(n, dtype=int)
+    # Извлекаем колонки
+    age = df['age'].fillna(0).values
+    monthly_income = df['MonthlyIncome'].fillna(0).values
+    debt_ratio = df['DebtRatio'].fillna(0).values
+    monthly_debt = np.where(monthly_income > 0,
+                            debt_ratio * monthly_income,
+                            debt_ratio)
+    late_90 = df['NumberOfTimes90DaysLate'].fillna(0).values
+    late_60_89 = df['NumberOfTime60-89DaysPastDueNotWorse'].fillna(0).values
+    late_30_59 = df['NumberOfTime30-59DaysPastDueNotWorse'].fillna(0).values
+    real_estate = df['NumberRealEstateLoansOrLines'].fillna(0).values
+    utilization = df['RevolvingUtilizationOfUnsecuredLines'].fillna(0).values
+    # 1. КРИТИЧЕСКИЕ ПРАВИЛА - сразу отказ
+    mask = (age < 18)
+    auto_reject_mask[mask] = True
+    auto_decisions[mask] = 1
+    messages = np.where(mask, 'Возраст менее 18 лет - кредит не выдаётся', messages)
+    # 2. СПЕЦИАЛЬНЫЕ БАНКОВСКИЕ КОДЫ - сразу ручной разбор
+    mask = (late_90 == 98) | (late_60_89 == 98) | (late_30_59 == 98)
+    manual_mask[mask] = True
+    messages = np.where(mask, 'Код 98: Списание долга как безнадежного', messages)
+    mask = (late_90 == 96) | (late_60_89 == 96) | (late_30_59 == 96)
+    manual_mask[mask] = True
+    messages = np.where(mask, 'Код 96: Изъятие залога или реализация имущества', messages)
+    # 3. КРИТИЧЕСКИЕ ПРАВИЛА - сразу ручной разбор
+    mask = (age > 80)
+    manual_mask[mask] = True
+    messages = np.where(mask, 'Возраст > 80 лет - требуется ручной разбор (индивидуальные условия)', messages)
+    mask = (monthly_income > 1000000)
+    manual_mask[mask] = True
+    messages = np.where(mask, 'Доход свыше 1,000,000 $ - требуется ручной разбор', messages)
+    mask = (monthly_debt > 1000000)
+    manual_mask[mask] = True
+    messages = np.where(mask, 'Платежи свыше 1,000,000 $ - требуется ручной разбор', messages)
+    mask = (utilization > 2)
+    manual_mask[mask] = True
+    messages = np.where(mask, 'Использование кредитных средств превышает 200%', messages)
+    mask = (real_estate > 20)
+    manual_mask[mask] = True
+    messages = np.where(mask, 'Количество кредитов под залог недвижимости слишком велико - ручной разбор', messages)
+    # print(f"age: min={age.min()}, max={age.max()}")
+    # print(f"income: max={monthly_income.max()}")
+    # print(f"late_90: values 96/98: {np.sum((late_90 == 96) | (late_90 == 98))}")
+    # print(f"utilization: max={utilization.max()}")
+    # print(f"real_estate: max={real_estate.max()}")
+    return manual_mask, auto_reject_mask, messages, auto_decisions
+def escalation_decision(applications_df, lr_model, second_model, second_model_name,
+                        threshold=0.5, lr_margins=[0.35], second_margins=[0.4],
+                        preprocessor=None, scaler=None):
+    """
+    Универсальная эскалационная логика
+    1. Бизнес-правила:
+       - часть заявок сразу в ручной разбор
+       - часть заявок сразу отказ
+    2. Оставшиеся -> LR
+    3. Если LR неуверена -> вторая модель
+    """
+    n = len(applications_df)
+    decisions = [None] * n
+    manual_mask = np.zeros(n, dtype=bool)
+    # СЧЁТЧИКИ
+    stats = {
+        'business_manual': 0,  # ручной разбор по бизнес-правилам
+        'business_auto': 0,  # авто отказ по бизнес-правилам
+        'lr_confident': 0,  # уверенно решены LR
+        'second_confident': 0,  # уверенно решены второй моделью
+        'second_uncertain': 0,  # неуверенность второй модели → ручной
+        'total': n
+    }
+    # 1. Бизнес-правила
+    bus_manual_mask, bus_reject_mask, bus_messages, bus_decisions = check_business_rules(applications_df)
+    # После check_business_rules
+    #print(f"Бизнес-правила: manual={bus_manual_mask.sum()}, auto_reject={bus_reject_mask.sum()}")
+    # Обрабатываем сразу отказ
+    for i in range(n):
+        if bus_reject_mask[i]:
+            stats['business_auto'] += 1
+            decisions[i] = {
+                'final_decision': 1,
+                'model_used': 'Business Rules',
+                'needs_review': False,
+                'probability': 1.0,
+                'message': bus_messages[i],
+                'lr_proba': None,
+                'second_proba': None,
+                'decision_path': [f"❌ Бизнес-правила: {bus_messages[i]}"]
+            }
+    # Обрабатываем сразу ручной разбор
+    for i in range(n):
+        if bus_manual_mask[i]:
+            stats['business_manual'] += 1
+            manual_mask[i] = True
+            decisions[i] = {
+                'final_decision': None,
+                'model_used': 'Business Rules',
+                'needs_review': True,
+                'probability': None,
+                'message': bus_messages[i],
+                'lr_proba': None,
+                'second_proba': None,
+                'decision_path': [f"⚠️ Бизнес-правила: {bus_messages[i]}"]
+            }
+    # 2. Заявки, которые идут к моделям (не отсеялись бизнес-правилами)
+    model_indices = [i for i in range(n) if decisions[i] is None]
+    if not model_indices:
+        return decisions, manual_mask, stats
+    # 3. Обработка моделями
+    df_models = applications_df.iloc[model_indices]
+    # Препроцессинг
+    processed = preprocessor.transform(df_models)
+    processed_scaled = scaler.transform(processed)
+    # LR предсказания (батч)
+    lr_probas = lr_model.predict_proba(processed_scaled)[:, 1]
+    # Определяем отступы для LR
+    if len(lr_margins) == 1:
+        lr_low = lr_high = lr_margins[0]
+    else:
+        lr_low, lr_high = lr_margins[0], lr_margins[1]
+    # Проверяем уверенность LR
+    lr_confident = np.zeros(len(model_indices), dtype=bool)
+    lr_margin_values = np.zeros(len(model_indices))
+    for j, proba in enumerate(lr_probas):
+        if proba < threshold:
+            margin = threshold - proba
+            lr_confident[j] = margin >= lr_low
+        else:
+            margin = proba - threshold
+            lr_confident[j] = margin >= lr_high
+        lr_margin_values[j] = margin
+    # Обрабатываем уверенные LR
+    for j, idx in enumerate(model_indices):
+        if lr_confident[j]:
+            stats['lr_confident'] += 1
+            decisions[idx] = {
+                'final_decision': int(lr_probas[j] >= threshold),
+                'probability': lr_probas[j],
+                'model_used': 'Logistic Regression',
+                'needs_review': False,
+                'lr_proba': lr_probas[j],
+                'second_proba': None,
+                'lr_margin': lr_margin_values[j],
+                'lr_confident': True,
+                'second_used': False,
+                'decision_path': [
+                    f"1️⃣ Logistic Regression: {lr_probas[j]:.1%} (отступ: {lr_margin_values[j]:.1%})",
+                    f"   ✅ LR уверена - финальное решение"
+                ]
+            }
+    # Неуверенные LR - идут ко второй модели
+    uncertain_indices = [model_indices[j] for j in range(len(model_indices)) if not lr_confident[j]]
+    if uncertain_indices:
+        # Находим позиции неуверенных заявок
+        uncertain_positions = [j for j in range(len(model_indices)) if not lr_confident[j]]
+        processed_uncertain_scaled = processed_scaled.iloc[uncertain_positions]
+        # Вторая модель (батч)
+        second_probas = second_model.predict_proba(processed_uncertain_scaled)[:, 1]
+        # Определяем отступы для второй модели
+        if len(second_margins) == 1:
+            second_low = second_high = second_margins[0]
+        else:
+            second_low, second_high = second_margins[0], second_margins[1]
+        # Проверяем уверенность второй модели
+        for k, idx in enumerate(uncertain_indices):
+            proba = second_probas[k]
+            if proba < threshold:
+                second_margin = threshold - proba
+                second_confident = second_margin >= second_low
+            else:
+                second_margin = proba - threshold
+                second_confident = second_margin >= second_high
+            # Формируем decision_path
+            path = [
+                f"1️⃣ Logistic Regression: {lr_probas[uncertain_positions[k]]:.1%} (отступ: {lr_margin_values[uncertain_positions[k]]:.1%})",
+                f"   ⚠️ LR не уверена → вызываем {second_model_name}",
+                f"2️⃣ {second_model_name}: {proba:.1%} (отступ: {second_margin:.1%})"
+            ]
+            if second_confident:
+                stats['second_confident'] += 1
+                path.append(f"   ✅ {second_model_name} уверен - финальное решение")
+                decisions[idx] = {
+                    'final_decision': int(proba >= threshold),
+                    'probability': proba,
+                    'model_used': second_model_name,
+                    'needs_review': False,
+                    'lr_proba': lr_probas[uncertain_positions[k]],
+                    'second_proba': proba,
+                    'lr_margin': lr_margin_values[uncertain_positions[k]],
+                    'second_margin': second_margin,
+                    'lr_confident': False,
+                    'second_confident': True,
+                    'second_used': True,
+                    'decision_path': path
+                }
+            else:
+                stats['second_uncertain'] += 1
+                path.append(f"   ⚠️ {second_model_name} не уверен → ручной разбор")
+                manual_mask[idx] = True
+                decisions[idx] = {
+                    'final_decision': None,
+                    'probability': proba,
+                    'model_used': 'Manual Review',
+                    'needs_review': True,
+                    'lr_proba': lr_probas[uncertain_positions[k]],
+                    'second_proba': proba,
+                    'lr_margin': lr_margin_values[uncertain_positions[k]],
+                    'second_margin': second_margin,
+                    'lr_confident': False,
+                    'second_confident': False,
+                    'second_used': True,
+                    'message': 'Модели не уверены в решении',
+                    'decision_path': path
+                }
+    return decisions, manual_mask, stats

app/models/interpretation.py ADDED Viewed

	@@ -0,0 +1,194 @@

+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+FEATURE_DESCRIPTIONS = { ... }
+def get_feature_display_name(feature_name):
+    if feature_name in FEATURE_DESCRIPTIONS:
+        return FEATURE_DESCRIPTIONS[feature_name]
+    name = feature_name.replace('_', ' ').title()
+    name = name.replace('Over', '>')
+    name = name.replace('Loans', 'Кредитов')
+    return name
+def interpret_lr(features, lr_model, feature_names):
+    """Интерпретация логистической регрессии"""
+    if isinstance(features, np.ndarray):
+        features = pd.DataFrame(features, columns=feature_names)
+    coefficients = lr_model.coef_[0]
+    intercept = lr_model.intercept_[0]
+    importance_df = pd.DataFrame({
+        'feature': feature_names,
+        'coefficient': coefficients,
+        'value': features.iloc[0].values
+    })
+    importance_df['logit_contribution'] = importance_df['coefficient'] * importance_df['value']
+    importance_df['abs_logit'] = abs(importance_df['logit_contribution'])
+    importance_df = importance_df.sort_values('abs_logit', ascending=False)
+    base_proba = lr_model.predict_proba(features)[0, 1]
+    marginal_effects = []
+    features_array = features.values
+    for i, feature in enumerate(feature_names):
+        features_zero = features_array.copy()
+        features_zero[0, i] = 0
+        zero_proba = lr_model.predict_proba(features_zero)[0, 1]
+        marginal_effect = base_proba - zero_proba
+        marginal_effects.append({
+            'feature': feature,
+            'marginal_effect': marginal_effect,
+            'abs_marginal': abs(marginal_effect)
+        })
+    marginal_df = pd.DataFrame(marginal_effects).sort_values('abs_marginal', ascending=False)
+    logit = intercept + importance_df['logit_contribution'].sum()
+    proba = 1 / (1 + np.exp(-logit))
+    return {
+        'logit_contributions': importance_df,
+        'marginal_effects': marginal_df,
+        'probability': proba,
+        'logit': logit,
+        'intercept': intercept
+    }
+def plot_feature_importance_sns(importance_df, value_col='logit_contribution', title="Вклад признаков в логит"):
+    df = importance_df.head(10).copy()
+    df = df.sort_values(value_col, ascending=True)
+    fig, ax = plt.subplots(figsize=(10, 6), facecolor='#f8f9fa')
+    ax.set_facecolor('#f8f9fa')
+    colors = ['#d7191c' if x > 0 else '#1a9641' if x < 0 else '#ffffbf' for x in df[value_col]]
+    bars = ax.barh(df['feature'], df[value_col], color=colors, edgecolor='white', linewidth=1.5, alpha=0.9)
+    for bar, val in zip(bars, df[value_col]):
+        if abs(val) > 0.02:
+            x_pos = val - 0.02 if val > 0 else val + 0.02
+            ha = 'right' if val > 0 else 'left'
+            ax.text(x_pos, bar.get_y() + bar.get_height() / 2, f'{val:.3f}', ha=ha, va='center', fontsize=9)
+    ax.axvline(x=0, color='#495057', linestyle='-', linewidth=1, alpha=0.3)
+    ax.grid(axis='x', alpha=0.15, linestyle='--', color='#adb5bd')
+    ax.set_axisbelow(True)
+    ax.set_xlabel('Вклад в логит', fontsize=11)
+    ax.set_ylabel('')
+    ax.set_title(title, fontsize=12, fontweight='bold', pad=15)
+    ax.set_yticklabels([get_feature_display_name(x) for x in df['feature']], fontsize=10)
+    ax.set_yticklabels([get_feature_display_name(x) for x in df['feature']], fontsize=10)
+    sns.despine(top=True, right=True, left=False, bottom=False)
+    plt.tight_layout()
+    return fig
+def plot_marginal_effects_sns(marginal_df, title="Влияние на вероятность дефолта"):
+    df = marginal_df.head(10).copy()
+    df = df.sort_values('marginal_effect', ascending=True)
+    fig, ax = plt.subplots(figsize=(10, 6), facecolor='#f8f9fa')
+    ax.set_facecolor('#f8f9fa')
+    colors = ['#d7191c' if x > 0 else '#1a9641' if x < 0 else '#ffffbf' for x in df['marginal_effect']]
+    bars = ax.barh(df['feature'], df['marginal_effect'], color=colors, edgecolor='white', linewidth=1.5, alpha=0.9)
+    for bar, val in zip(bars, df['marginal_effect']):
+        if abs(val) > 0.01:
+            x_pos = val - 0.01 if val > 0 else val + 0.01
+            ha = 'right' if val > 0 else 'left'
+            ax.text(x_pos, bar.get_y() + bar.get_height() / 2, f'{val:.1%}', ha=ha, va='center', fontsize=9)
+    ax.axvline(x=0, color='#495057', linestyle='-', linewidth=1, alpha=0.3)
+    ax.grid(axis='x', alpha=0.15, linestyle='--', color='#adb5bd')
+    ax.set_axisbelow(True)
+    ax.set_xlabel('Изменение вероятности', fontsize=11)
+    ax.set_ylabel('')
+    ax.set_title(title, fontsize=12, fontweight='bold', pad=15)
+    ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'{x:.0%}'))
+    ax.set_yticklabels([get_feature_display_name(x) for x in df['feature']], fontsize=10)
+    sns.despine(top=True, right=True, left=False, bottom=False)
+    plt.tight_layout()
+    return fig
+def plot_shap_analysis(second_model, processed_scaled, feature_names, second_model_name):
+    """Отображение SHAP анализа для tree-based моделей"""
+    import streamlit as st
+    st.markdown("---")
+    st.subheader(f"⚡ Детальный анализ: {second_model_name} (SHAP)")
+    with st.spinner("🔄 Рассчитываем SHAP значения..."):
+        try:
+            import shap
+            # Создаем explainer и считаем SHAP
+            explainer = shap.TreeExplainer(second_model)
+            shap_values = explainer.shap_values(processed_scaled)
+            # Для бинарной классификации
+            if isinstance(shap_values, list):
+                shap_values = shap_values[1]
+            # 1. Waterfall plot
+            fig, ax = plt.subplots(figsize=(12, 7))
+            shap.waterfall_plot(
+                shap.Explanation(
+                    values=shap_values[0],
+                    base_values=explainer.expected_value,
+                    data=processed_scaled.iloc[0].values,
+                    feature_names=feature_names
+                ),
+                show=False,
+            )
+            plt.tight_layout()
+            st.pyplot(fig)
+            # 2. Объяснение как читать график
+            with st.expander("📋 Как читать SHAP график?"):
+                st.markdown("""
+                - **f(x)** = итоговое предсказание модели
+                - **base value** = среднее предсказание по всем клиентам
+                - 🔴 Красное → признаки, повышающие риск
+                - 🔵 Синее → признаки, снижающие риск
+                """)
+            # 3. Таблица с SHAP значениями
+            shap_df = pd.DataFrame({
+                'feature': feature_names,
+                'shap_value': shap_values[0],
+                'abs_shap': abs(shap_values[0])
+            }).sort_values('abs_shap', ascending=False)
+            shap_df['description'] = shap_df['feature'].apply(get_feature_display_name)
+            st.markdown("### 📋 Факторы, влияющие на решение:")
+            col1, col2 = st.columns(2)
+            with col1:
+                pos = shap_df[shap_df['shap_value'] > 0].head(5)
+                if len(pos) > 0:
+                    st.markdown("**🔴 Повышают риск:**")
+                    for _, row in pos.iterrows():
+                        st.markdown(f"- {row['description']}: +{row['shap_value']:.3f}")
+            with col2:
+                neg = shap_df[shap_df['shap_value'] < 0].head(5)
+                if len(neg) > 0:
+                    st.markdown("**🟢 Снижают риск:**")
+                    for _, row in neg.iterrows():
+                        st.markdown(f"- {row['description']}: {row['shap_value']:.3f}")
+            with st.expander("📋 Все SHAP значения"):
+                display_df = shap_df[['feature', 'description', 'shap_value']].copy()
+                display_df.columns = ['Признак', 'Описание', 'SHAP']
+                display_df['SHAP'] = display_df['SHAP'].round(3)
+                st.dataframe(display_df.sort_values('SHAP', ascending=False), width='stretch')
+        except Exception as e:
+            st.error(f"❌ Ошибка SHAP: {e}")
+            st.info("Установите shap: `pip install shap`")

app/pages/__pycache__/application.cpython-311.pyc ADDED Viewed

Binary file (22.7 kB). View file

app/pages/__pycache__/simulation.cpython-311.pyc ADDED Viewed

Binary file (8.93 kB). View file

app/pages/application.py ADDED Viewed

	@@ -0,0 +1,329 @@

+import streamlit as st
+import pandas as pd
+import os
+from app.utils.data_loader import load_artifacts
+from app.models.escalation import escalation_decision
+from app.models.interpretation import (
+    interpret_lr, plot_feature_importance_sns,
+    plot_marginal_effects_sns, plot_shap_analysis,
+    get_feature_display_name
+)
+from app.utils.credit_preprocessor import check_business_rules
+# Пути
+PROJECT_PATH = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
+MODELS_PATH = os.path.join(PROJECT_PATH, 'models/best/train_150/')
+PREPROCESSOR_PATH = os.path.join(PROJECT_PATH, 'preprocessors/')
+def main():
+    st.title("🏦 Кредитный скоринг - Анкета")
+    # Загрузка артефактов
+    preprocessor, scaler, models = load_artifacts(MODELS_PATH, PREPROCESSOR_PATH)
+    # Инициализация статистики
+    if 'stats' not in st.session_state:
+        st.session_state.stats = {
+            'total': 0,
+            'manual': 0,
+            'lr_confident': 0,
+            'second_used': 0,
+            'second_confident': 0,
+            'approved': 0,
+            'declined': 0
+        }
+    if 'step' not in st.session_state:
+        st.session_state.step = 'input'
+    # ВВОД ДАННЫХ
+    if st.session_state.step == 'input':
+        st.header("📋 Анкета заемщика")
+        with st.form("credit_form"):
+            st.subheader("👤 Личная информация")
+            col1, col2 = st.columns(2)
+            with col1:
+                age = st.number_input("Возраст", 0, 150, 35)
+            with col2:
+                dependents = st.number_input("Иждивенцы", 0, 20, 0)
+            st.subheader("💰 Ежемесячный доход")
+            income_method = st.radio("Способ указания дохода", ["Слайдер (до 20,000$)", "Точное значение"],
+                                     horizontal=True)
+            st.subheader("💳 Ежемесячные платежи")
+            debt_method = st.radio("Способ указания платежей", ["Слайдер (до 10,000$)", "Точное значение"],
+                                   horizontal=True)
+            st.subheader("📊 Кредитная история")
+            credit_lines = st.number_input("Открытых кредитов и карт", 0, 100, 5)
+            real_estate = st.number_input("Кредитов под залог недвижимости", 0, 100, 1)
+            st.subheader("📈 Использование лимитов")
+            util_method = st.radio("Уровень использования",
+                                   ["Норма (0-100%)", "Овердрафт (100-200%)", "Экстремальный (>200%)"], horizontal=True)
+            st.subheader("⏱️ Просрочки за последние 2 года")
+            col1, col2, col3 = st.columns(3)
+            with col1:
+                late_30_59 = st.number_input("30-59 дней", 0, 100, 0)
+            with col2:
+                late_60_89 = st.number_input("60-89 дней", 0, 100, 0)
+            with col3:
+                late_90 = st.number_input("90+ дней", 0, 100, 0)
+            submitted = st.form_submit_button("➡️ Далее: указать точные значения")
+        if submitted:
+            st.session_state.update({
+                'age': age, 'dependents': dependents, 'income_method': income_method,
+                'debt_method': debt_method, 'credit_lines': credit_lines,
+                'real_estate': real_estate, 'util_method': util_method,
+                'late_30_59': late_30_59, 'late_60_89': late_60_89, 'late_90': late_90
+            })
+            st.session_state.step = 'values'
+            st.rerun()
+    # ВВОД ТОЧНЫХ ЗНАЧЕНИЙ
+    elif st.session_state.step == 'values':
+        st.header("💰 Укажите точные значения")
+        with st.form("values_form"):
+            col1, col2 = st.columns(2)
+            with col1:
+                st.subheader("Доход")
+                if st.session_state.income_method == "Слайдер (до 20,000$)":
+                    monthly_income = st.slider("Ежемесячный доход ($)", 0, 20000, 5000)
+                else:
+                    monthly_income = st.number_input("Ежемесячный доход ($)", 0, 1000000, 5000)
+            with col2:
+                st.subheader("Платежи")
+                if st.session_state.debt_method == "Слайдер (до 10,000$)":
+                    monthly_debt = st.slider("Ежемесячные платежи ($)", 0, 10000, 1500)
+                else:
+                    monthly_debt = st.number_input("Ежемесячные платежи ($)", 0, 1000000, 1500)
+            st.subheader("📈 Использование лимитов")
+            if st.session_state.util_method == "��орма (0-100%)":
+                util_value = st.slider("Процент использования", 0, 100, 20)
+                utilization = util_value / 100
+            elif st.session_state.util_method == "Овердрафт (100-200%)":
+                util_value = st.slider("Процент использования", 100, 200, 120)
+                utilization = util_value / 100
+            else:
+                st.warning("Экстремальное использование (>200%) - автоматический ручной разбор")
+                utilization = st.number_input("Процент использования", 200, 1000, 200) / 100
+            submitted = st.form_submit_button("✅ Получить решение")
+            # САЙДБАР
+            with st.sidebar:
+                st.markdown("---")
+                st.subheader("⚙️ Настройки")
+                with st.expander("🎯 Пороги уверенности", expanded=False):
+                    threshold = st.slider("Порог одобрения", 0.3, 0.7, 0.5, 0.05)
+                    lr_margin = st.slider("Отступ LR", 0.2, 0.5, 0.35, 0.05)
+                    second_margin = st.slider("Отступ второй модели", 0.2, 0.5, 0.4, 0.05)
+                with st.expander("🤖 Выбор модели", expanded=False):
+                    available_models = [name for name in models.keys() if name != 'Logistic Regression']
+                    second_model_name = st.selectbox("Модель для эскалации", available_models)
+                with st.expander("📊 Статистика", expanded=False):
+                    stats = st.session_state.stats
+                    if stats['total'] > 0:
+                        st.metric("Всего заявок", stats['total'])
+                        st.metric("Ручной разбор", f"{stats['manual'] / stats['total']:.1%}")
+                        st.metric("LR уверена", f"{stats['lr_confident'] / stats['total']:.1%}")
+                        if stats['second_used'] > 0:
+                            st.metric("Вторая модель уверена",
+                                      f"{stats['second_confident'] / stats['second_used']:.1%}")
+                        if st.button("🔄 Сброс"):
+                            st.session_state.stats = {'total': 0, 'manual': 0, 'lr_confident': 0,
+                                                      'second_used': 0, 'second_confident': 0,
+                                                      'approved': 0, 'declined': 0}
+                            st.rerun()
+                    else:
+                        st.info("Нет данных")
+                with st.expander("ℹ️ О проекте", expanded=False):
+                    st.markdown(f"""
+                        **Модели:**
+                        - Logistic Regression
+                        - {', '.join(available_models)}
+                        **AUC:** 0.8578 (LR), ~0.87 (остальные)
+                    """)
+            st.session_state.threshold = threshold
+            st.session_state.lr_margin = lr_margin
+            st.session_state.second_margin = second_margin
+            st.session_state.second_model_name = second_model_name
+        if submitted:
+            debt_ratio = monthly_debt / monthly_income if monthly_income > 0 else monthly_debt
+            # Подготовка данных (ОДИН РАЗ)
+            input_data = pd.DataFrame([{
+                'RevolvingUtilizationOfUnsecuredLines': utilization,
+                'age': st.session_state.age,
+                'NumberOfTime30-59DaysPastDueNotWorse': st.session_state.late_30_59,
+                'DebtRatio': debt_ratio,
+                'MonthlyIncome': monthly_income,
+                'NumberOfOpenCreditLinesAndLoans': st.session_state.credit_lines,
+                'NumberOfTimes90DaysLate': st.session_state.late_90,
+                'NumberRealEstateLoansOrLines': st.session_state.real_estate,
+                'NumberOfTime60-89DaysPastDueNotWorse': st.session_state.late_60_89,
+                'NumberOfDependents': st.session_state.dependents
+            }])
+            st.markdown("---")
+            with st.spinner("🔄 Анализ заявки..."):
+                lr_model = models['Logistic Regression']
+                second_model = models[second_model_name]
+                # Единый вызов эскалации (включает бизнес-правила)
+                decisions, manual_mask, task = escalation_decision(
+                    input_data,
+                    lr_model,
+                    second_model,
+                    second_model_name,
+                    threshold=st.session_state.threshold,
+                    lr_margins=[st.session_state.lr_margin],
+                    second_margins=[st.session_state.second_margin],
+                    preprocessor=preprocessor,
+                    scaler=scaler
+                )
+                decision = decisions[0]
+                # Для интерпретации LR нужны обработанные данные
+                processed = preprocessor.transform(input_data)
+                processed_scaled = scaler.transform(processed)
+                # Обновление статистики
+                st.session_state.stats['total'] += 1
+                if decision['needs_review']:
+                    st.session_state.stats['manual'] += 1
+                else:
+                    if decision['final_decision'] == 0:
+                        st.session_state.stats['approved'] += 1
+                    else:
+                        st.session_state.stats['declined'] += 1
+                if decision.get('lr_confident', False):
+                    st.session_state.stats['lr_confident'] += 1
+                if decision.get('second_used', False):
+                    st.session_state.stats['second_used'] += 1
+                    if decision.get('second_confident', False):
+                        st.session_state.stats['second_confident'] += 1
+                # ОТОБРАЖЕНИЕ РЕЗУЛЬТАТОВ
+                st.subheader("🔄 Цепочка принятия решения")
+                for step in decision['decision_path']:
+                    st.write(step)
+                col1, col2 = st.columns(2)
+                with col1:
+                    st.markdown("**🏦 Logistic Regression**")
+                    st.metric("Вероятность", f"{decision['lr_proba']:.1%}")
+                    st.write(f"Отступ: {decision['lr_margin']:.1%}")
+                    if decision['lr_confident']:
+                        st.success("✅ Уверена")
+                    else:
+                        st.warning("⚠️ Не уверена")
+                with col2:
+                    st.markdown(f"**⚡ {second_model_name}**")
+                    if decision['second_used']:
+                        st.metric("Вероятность", f"{decision['second_proba']:.1%}")
+                        st.write(f"Отступ: {decision['second_margin']:.1%}")
+                        if decision['second_confident']:
+                            st.success("✅ Уверен")
+                        else:
+                            st.warning("⚠️ Не уверен")
+                    else:
+                        st.info("⏳ Не вызывался")
+                st.markdown("---")
+                if decision['needs_review']:
+                    st.warning("👨‍💼 **РУЧНОЙ РАЗБОР**")
+                    st.info("Модели не уверены - требуется проверка специалистом")
+                else:
+                    col1, col2 = st.columns(2)
+                    with col1:
+                        if decision['final_decision'] == 0:
+                            st.success("✅ **КРЕДИТ ОДОБРЕН**")
+                        else:
+                            st.error("❌ **КРЕДИТ НЕ ОДОБРЕН**")
+                    with col2:
+                        st.metric("Модель", decision['model_used'])
+                # ДЕТАЛЬНЫЙ АНАЛИЗ LR
+                st.markdown("---")
+                st.subheader("🔍 Детальный анализ: Logistic Regression")
+                feature_names = processed_scaled.columns.tolist()
+                interpretation = interpret_lr(processed_scaled, lr_model, feature_names)
+                tab1, tab2 = st.tabs(["📊 Вклад в логит", "📈 Влияние на вероятность"])
+                with tab1:
+                    st.markdown("🔴 Положительный вклад = ↑ риск, 🟢 Отрицательный = ↓ риск")
+                    fig1 = plot_feature_importance_sns(interpretation['logit_contributions'])
+                    st.pyplot(fig1)
+                    with st.expander("📋 Все вклады"):
+                        display_df = interpretation['logit_contributions'][
+                            ['feature', 'value', 'coefficient', 'logit_contribution']].copy()
+                        display_df['Описание'] = display_df['feature'].apply(get_feature_display_name)
+                        display_df = display_df[['Описание', 'value', 'coefficient', 'logit_contribution']]
+                        display_df.columns = ['Признак', 'Значение', 'Коэф', 'Вклад']
+                        display_df = display_df.round(3)
+                        st.dataframe(display_df)
+                with tab2:
+                    st.markdown("🔴 Положительное = фактор ↑ риск, 🟢 Отрицательное = ↓ риск")
+                    fig2 = plot_marginal_effects_sns(interpretation['marginal_effects'])
+                    st.pyplot(fig2)
+                    with st.expander("📋 Все эффекты"):
+                        display_df = interpretation['marginal_effects'][['feature', 'marginal_effect']].copy()
+                        display_df['Описание'] = display_df['feature'].apply(get_feature_display_name)
+                        display_df = display_df[['Описание', 'marginal_effect']]
+                        display_df.columns = ['Признак', 'Влияние']
+                        display_df['Влияние'] = display_df['Влияние'].map('{:.1%}'.format)
+                        st.dataframe(display_df)
+                st.info(f"Итоговая вероятность дефолта (LR): {interpretation['probability']:.1%}")
+                # ДЕТАЛЬНЫЙ АНАЛИЗ ВТОРОЙ МОДЕЛИ (SHAP для tree-based)
+                if decision['second_used'] and second_model_name in ['XGBoost', 'LightGBM', 'Random Forest', 'CatBoost']:
+                    plot_shap_analysis(second_model, processed_scaled, feature_names, second_model_name)
+        # КНОПКА НАЗАД
+        if st.button("◀️ Вернуться к выбору способов"):
+            st.session_state.step = 'input'
+            st.rerun()
+    st.markdown("---")
+    col1, col2, col3 = st.columns([1, 2, 1])
+    with col2:
+        if st.button("🏠 На главную", use_container_width=True):
+            st.switch_page("main.py")
+    st.markdown("---")
+    st.caption("🏦 GiveMeSomeCredit - Интерпретируемый кредитный скоринг | Модели: Logistic Regression + выбор")
+if __name__ == "__main__":
+    main()

app/pages/simulation.py ADDED Viewed

	@@ -0,0 +1,345 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import os
+import sys
+import tempfile
+import time
+from datetime import datetime
+from PIL import Image
+import matplotlib.pyplot as plt
+# Остальные импорты...
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from app.utils.data_loader import load_artifacts
+from app.simulation.core.traffic_generator import TrafficGenerator
+from app.simulation.core.processor import ApplicationProcessor
+from app.simulation.controllers.pid import PIDController
+from app.simulation.visualization.plots import (
+    plot_queue_dynamics,
+    plot_specialist_load,
+    plot_inflow,
+    plot_parameters_history,
+    plot_detailed_decisions
+)
+# ============================================================================
+# БЛОК АНИМАЦИИ: Импорт функций для визуализации
+# ============================================================================
+from app.simulation.visualization.animation import create_simulation_video
+# ============================================================================
+def minutes_to_time(minutes, start_time="00:00"):
+    """Преобразует минуты от старта в строку времени ЧЧ:ММ"""
+    start_hour, start_min = map(int, start_time.split(':'))
+    total_minutes = start_hour * 60 + start_min + minutes
+    hour = (total_minutes // 60) % 24
+    minute = total_minutes % 60
+    return f"{hour:02d}:{minute:02d}"
+def main():
+    st.title("📊 Симуляция работы системы")
+    # Загрузка артефактов
+    PROJECT_PATH = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
+    MODELS_PATH = os.path.join(PROJECT_PATH, 'models/best/train_150/')
+    PREPROCESSOR_PATH = os.path.join(PROJECT_PATH, 'preprocessors/')
+    TEST_DATA_PATH = os.path.join(PROJECT_PATH, 'datasets/cs-test.csv')
+    preprocessor, scaler, models = load_artifacts(MODELS_PATH, PREPROCESSOR_PATH)
+    available_models = [name for name in models.keys() if name != 'Logistic Regression']
+    # В сайдбаре добавляем выбор
+    st.sidebar.subheader("🤖 Выбор модели")
+    second_model_name = st.sidebar.selectbox(
+        "Вторая модель для эскалации",
+        available_models,
+        index=0
+    )
+    # Параметры симуляции
+    st.sidebar.header("⚙️ Параметры")
+    # ============================================================================
+    # БЛОК АНИМАЦИИ: Ограничение количества специалистов до 400 для таблицы 20x20
+    # ============================================================================
+    specialists_count = st.sidebar.slider("Количество специалистов (модели)", 10, 400, 100, 10)
+    # ============================================================================
+    business_specialists_count = st.sidebar.slider("Количество экспертов (бизнес-правила)", 1, 100, 30, 1)
+    business_time = st.sidebar.slider("Время обработки бизнес правил(мин)", 5, 30, 15, 5)
+    base_time = st.sidebar.slider("Базовое время обработки (мин)", 2, 15, 5)
+    target_load = st.sidebar.slider(
+        "Целевая загрузка специалистов", 0.5, 1.0, 0.8, 0.05,
+        help="0.8 = 80% - оставляем запас на пики")
+    st.sidebar.subheader("🎯 Порог одобрения")
+    fixed_threshold = st.sidebar.slider(
+        "Порог (фиксированный)",
+        0.3, 0.7, 0.5, 0.05,
+        help="Порог одобрения - стратегический параметр, не меняется PID"
+    )
+    st.sidebar.subheader("🎯 Начальные отступы (%)")
+    lr_low_pct = st.sidebar.slider("LR нижний отступ (% от порога)", 0, 100, 20, 5,
+                                   help="% от расстояния между 0 и порогом")
+    lr_high_pct = st.sidebar.slider("LR верхний отступ (% от 1-порога)", 0, 100, 20, 5,
+                                    help="% от расстояния между порогом и 1")
+    second_low_pct = st.sidebar.slider("Вторая модель нижний (%)", 0, 100, 20, 5)
+    second_high_pct = st.sidebar.slider("Вторая модель верхний (%)", 0, 100, 20, 5)
+    # Преобразуем проценты в абсолютные значения
+    init_lr_low = fixed_threshold * lr_low_pct / 100
+    init_lr_high = (1 - fixed_threshold) * lr_high_pct / 100
+    init_second_low = fixed_threshold * second_low_pct / 100
+    init_second_high = (1 - fixed_threshold) * second_high_pct / 100
+    # Параметры PID
+    st.sidebar.subheader("🎛️ PID регулятор")
+    use_pid = st.sidebar.checkbox("Включить PID", value=True)
+    # ============================================================================
+    # БЛОК АНИМАЦИИ: Переключатель для создания GIF
+    # ============================================================================
+    st.sidebar.subheader("🎬 Анимация")
+    create_gif = st.sidebar.checkbox("Создать GIF после симуляции", value=False)
+    gif_fps = st.sidebar.slider("FPS для GIF", 5, 30, 10, 5)
+    # ============================================================================
+    if use_pid:
+        kp = st.sidebar.slider("P (пропорциональный)", 0.0, 1.0, 0.33)
+        ki = st.sidebar.slider("I (интегральный)", 0.0, 1.0, 0.03)
+        kd = st.sidebar.slider("D (дифференциальный)", 0.0, 1.0, 0.22)
+        w_load = st.sidebar.slider("Вес загрузки", 0.0, 1.0, 0.3)
+    # Кнопка запуска
+    if st.button("🎬 Запустить симуляцию 24 часа"):
+        with st.spinner(f"Загрузка данных и симуляция..."):
+            # 1. Загружаем тестовый датасет
+            test_df = pd.read_csv(TEST_DATA_PATH)
+            if 'SeriousDlqin2yrs' in test_df.columns:
+                test_df = test_df.drop(columns=['SeriousDlqin2yrs'])
+            test_pool = test_df.to_dict('records')
+            # 2. Генерируем распределение заявок по минутам
+            current_time = datetime.now()
+            start_hour = current_time.hour
+            start_minute = current_time.minute
+            gen = TrafficGenerator(total_applications=len(test_pool))
+            minute_counts = gen.generate_minute_counts(start_hour=start_hour, start_minute=start_minute)
+            # Сохраняем для графиков
+            st.session_state.start_time = f"{start_hour:02d}:{start_minute:02d}"
+            st.session_state.minute_counts = minute_counts
+            # 3. Создаём процессор
+            processor = ApplicationProcessor(
+                lr_model=models['Logistic Regression'],
+                second_model=models[second_model_name],
+                second_model_name=second_model_name,
+                specialists_count=specialists_count,
+                business_specialists_count=business_specialists_count,
+                base_processing_time=base_time,
+                business_processing_time=business_time
+            )
+            # 4. Создаём PID если нужно
+            if use_pid:
+                pid = PIDController(
+                    init_threshold=fixed_threshold,
+                    kp_load=kp, ki_load=ki, kd_load=kd,
+                    load_weight=w_load,
+                    init_lr_low=init_lr_low,
+                    init_lr_high=init_lr_high,
+                    init_second_low=init_second_low,
+                    init_second_high=init_second_high,
+                    target_load=target_load
+                )
+            else:
+                pid = None
+            # 5. Симуляция по минутам
+            pool_copy = test_pool.copy()
+            idx = 0
+            progress_bar = st.progress(0)
+            n_steps = len(minute_counts)
+            # ============================================================================
+            # БЛОК АНИМАЦИИ: Сбор данных для кадров
+            # ============================================================================
+            animation_frames = []  # список для хранения кадров анимации
+            # ============================================================================
+            for step, n_apps in enumerate(minute_counts):
+                # Берём заявки из пула
+                batch = pool_copy[idx:idx + n_apps]
+                idx += n_apps
+                # Получаем текущие параметры
+                if pid:
+                    margins = pid.get_margins()
+                    lr_margins = [margins['lr_low'], margins['lr_high']]
+                    second_margins = [margins['second_low'], margins['second_high']]
+                    threshold = fixed_threshold
+                else:
+                    lr_margins = [0.35]
+                    second_margins = [0.4]
+                    threshold = fixed_threshold
+                # Обрабатываем батч
+                result = processor.process_batch(
+                    batch, preprocessor, scaler,
+                    threshold=threshold,
+                    lr_margins=lr_margins,
+                    second_margins=second_margins,
+                    current_time=step
+                )
+                # Обновляем PID
+                if pid:
+                    load = result['specialists_busy'] / specialists_count
+                    pid.update(load)
+                # ============================================================================
+                # БЛОК АНИМАЦИИ: Сохраняем кадр каждые 10 минут (чтобы не было 1440 кадров)
+                # ============================================================================
+                # --- Внутри цикла симуляции в simulation.py ---
+                # Записываем КАЖДУЮ минуту для плавности
+                if step % 1 == 0 or step == n_steps - 1:
+                    specialist_states = processor.specialists.copy()
+                    frame_data = {
+                        'time': step,
+                        'step': step,  # Добавь это поле для совместимости с кодом видео
+                        'time_str': minutes_to_time(step, st.session_state.start_time),
+                        'inflow': n_apps,
+                        'inflow_history': st.session_state.minute_counts[:step + 1],
+                        'load_history': [v / specialists_count for v in processor.stats['specialist_busy'][:step + 1]],
+                        'queue': result['queue_size'],
+                        'business_queue': result.get('business_queue_size', 0),
+                        'load': load if pid else 0,
+                        'specialist_states': specialist_states,
+                        'cumulative': {
+                            'total_processed': processor.stats['total_processed'],
+                            'auto_approved': processor.stats['auto_approved'],
+                            'auto_declined': processor.stats['auto_declined'],
+                            'manual_processed': processor.stats['manual_processed'],
+                            'business_manual_processed': processor.stats.get('business_manual_processed', 0)
+                        }
+                    }
+                    animation_frames.append(frame_data)
+                # ============================================================================
+                # Обновляем прогресс
+                progress_bar.progress((step + 1) / n_steps)
+            # 6. Сохраняем результаты
+            st.session_state.processor = processor
+            st.session_state.pid_history = pid.get_history() if pid else None
+            st.session_state.simulation_done = True
+            st.session_state.batch_stats = processor.batch_stats
+            # ============================================================================
+            # БЛОК АНИМАЦИИ: Сохраняем кадры в session_state
+            # ============================================================================
+            st.session_state.animation_frames = animation_frames
+            # ============================================================================
+    # Отображение результатов
+    if st.session_state.get('simulation_done', False):
+        st.success("✅ Симуляция завершена!")
+        stats = st.session_state.processor.stats
+        # Быстрая статистика
+        col1, col2, col3, col4, col5 = st.columns(5)
+        col1.metric("Всего заявок", stats['total_processed'])
+        col2.metric("Одобрено авто", stats['auto_approved'])
+        col3.metric("Отказ авто", stats['auto_declined'])
+        col4.metric("Ручной разбор", stats['manual_processed'])
+        manual_rate = stats['manual_sent'] / stats['total_processed'] * 100 if stats['total_processed'] > 0 else 0
+        col5.metric("Ручной разбор %", f"{manual_rate:.1f}%")
+        # Графики - ТОЛЬКО ВЫЗОВЫ ФУНКЦИЙ ИЗ plots.py
+        st.subheader("📈 Графики")
+        # Очереди
+        st.pyplot(plot_queue_dynamics(
+            queue_history=stats['queue_history'],
+            business_queue_history=stats.get('business_queue_history'),
+            start_time=st.session_state.get('start_time', '00:00')
+        ))
+        plt.close()
+        # Загрузка специалистов
+        st.pyplot(plot_specialist_load(
+            specialist_busy_history=stats['specialist_busy'],
+            specialists_count=specialists_count,
+            start_time=st.session_state.get('start_time', '00:00')
+        ))
+        plt.close()
+        st.pyplot(plot_inflow(
+            minute_counts=st.session_state.minute_counts,
+            start_time=st.session_state.get('start_time', '00:00')
+        ))
+        plt.close()
+        # Детальный анализ решений
+        st.pyplot(plot_detailed_decisions(
+            batch_stats=st.session_state.batch_stats,
+            second_model_name=second_model_name,
+            start_time=st.session_state.get('start_time', '00:00')
+        ))
+        plt.close()
+        # Параметры PID
+        st.pyplot(plot_parameters_history(
+            pid_history=st.session_state.pid_history,
+            second_model_name=second_model_name,
+            start_time=st.session_state.get('start_time', '00:00')
+        ))
+        plt.close()
+        # ============================================================================
+        # НОВЫЙ БЛОК: Генерация видео (Стратегия для HuggingFace)
+        # ============================================================================
+        if st.session_state.get('animation_frames'):
+            st.divider()
+            st.subheader("🎥 Настройки видео-отчета")
+            col_v1, col_v2 = st.columns(2)
+            with col_v1:
+                # Слайдер для шага кадров (среза)
+                v_step = st.slider("Шаг кадров (1 = каждая минута)", 1, 30, 1,
+                                   help="Чем меньше шаг, тем плавнее видео, но дольше рендеринг")
+            with col_v2:
+                # Слайдер для FPS
+                v_fps = st.slider("Скорость видео (FPS)", 10, 60, 24,
+                                  help="Количество кадров в секунду")
+            if st.button("🎬 Сгенерировать видео", type="primary", use_container_width=True):
+                with st.spinner("Рендеринг видео..."):
+                    from app.simulation.visualization.animation import create_simulation_video
+                    # Используем выбранные в слайдерах параметры
+                    video_path = create_simulation_video(
+                        st.session_state.animation_frames[::v_step],
+                        specialists_count,
+                        second_model_name,
+                        fps=v_fps  # Передаем FPS в функцию
+                    )
+                    st.video(video_path)
+                    st.success("✅ Видео готово! Вы можете его скачать или перематывать.")
+        # --- ВОТ ЭТОТ БЛОК У ТЕБЯ УЖЕ ЕСТЬ В КОНЦЕ ФАЙЛА ---
+        st.write("")
+        col1, col2, col3 = st.columns([1, 2, 1])
+        with col2:
+            if st.button("🏠 На главную", use_container_width=True):
+                st.switch_page("main.py")
+if __name__ == "__main__":
+    main()

app/simulation/.DS_Store ADDED Viewed

Binary file (8.2 kB). View file

app/simulation/__init__.py ADDED Viewed

File without changes

app/simulation/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (180 Bytes). View file

app/simulation/controllers/__init__.py ADDED Viewed

File without changes

app/simulation/controllers/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (192 Bytes). View file

app/simulation/controllers/__pycache__/base.cpython-311.pyc ADDED Viewed

Binary file (1.72 kB). View file

app/simulation/controllers/__pycache__/pid.cpython-311.pyc ADDED Viewed

Binary file (5.3 kB). View file

app/simulation/controllers/base.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from abc import ABC, abstractmethod
+class BaseController(ABC):
+    """Базовый класс для всех контроллеров"""
+    def __init__(self, name="Base"):
+        self.name = name
+        self.history = []
+    @abstractmethod
+    def update(self, current_state, target_state, dt=1.0):
+        """
+        Рассчитывает новые параметры управления
+        Параметры:
+        - current_state: текущее состояние системы (очередь, загрузка)
+        - target_state: целевое состояние
+        - dt: шаг времени
+        Возвращает:
+        - новые пороги и отступы
+        """
+        pass
+    def get_margins(self, hour=None):
+        """Возвращает текущие отступы для LR и второй модели"""
+        pass

app/simulation/controllers/pid.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import numpy as np
+import pandas as pd
+from .base import BaseController
+class PIDController(BaseController):
+    """PID-регулятор для управления отступами на основе загрузки специалистов"""
+    def __init__(self, name="PID",
+                 kp_load=0.1, ki_load=0.01, kd_load=0.05,
+                 load_weight=1.0,
+                 # Начальные значения параметров
+                 init_threshold=0.5,
+                 init_lr_low=0.3, init_lr_high=0.4,
+                 init_second_low=0.35, init_second_high=0.45,
+                 target_load=0.8):
+        super().__init__(name)
+        # Коэффициенты PID для загрузки
+        self.kp_load = kp_load
+        self.ki_load = ki_load
+        self.kd_load = kd_load
+        self.load_weight = load_weight
+        self.target_load = target_load
+        # Состояния PID
+        self.prev_error_load = 0
+        self.integral_load = 0
+        # Начальные параметры
+        self.init_threshold = init_threshold
+        self.init_lr_low = init_lr_low
+        self.init_lr_high = init_lr_high
+        self.init_second_low = init_second_low
+        self.init_second_high = init_second_high
+        # Текущие параметры (отступы)
+        self.threshold = init_threshold
+        self.lr_low = init_lr_low
+        self.lr_high = init_lr_high
+        self.second_low = init_second_low
+        self.second_high = init_second_high
+        # Границы отступов
+        self.bounds = {
+            'lr_low': (0.05, self.threshold - 0.05),
+            'lr_high': (0.05, 1 - self.threshold - 0.05),
+            'second_low': (0.05, self.threshold - 0.05),
+            'second_high': (0.05, 1 - self.threshold - 0.05)
+        }
+        # Ограничение интеграла
+        self.integral_limit = 1.0
+    def update(self, current_load):
+        """
+        current_load: текущая загрузка специалистов (0-1)
+        Остальные параметры оставлены для совместимости, но не используются
+        """
+        # Ошибка по загрузке
+        error_load = self.target_load - current_load
+        # PID для загрузки
+        P_load = self.kp_load * error_load
+        self.integral_load += error_load
+        self.integral_load = np.clip(self.integral_load, -self.integral_limit, self.integral_limit)
+        I_load = self.ki_load * self.integral_load
+        D_load = self.kd_load * (error_load - self.prev_error_load)
+        self.prev_error_load = error_load
+        # Выход регулятора
+        output_load = P_load + I_load + D_load
+        output = self.load_weight * output_load
+        # Адаптируем отступы
+        self._update_parameters(output)
+        # Сохраняем историю
+        self.history.append({
+            'time': len(self.history),
+            'error_load': error_load,
+            'output': output,
+            'threshold': self.threshold,
+            'lr_low': self.lr_low,
+            'lr_high': self.lr_high,
+            'second_low': self.second_low,
+            'second_high': self.second_high,
+            'load': current_load,
+        })
+        return self.get_margins()
+    def _update_parameters(self, output):
+        """Обновляет отступы на основе выхода регулятора"""
+        delta = output * 0.1
+        self.lr_low = np.clip(
+            self.lr_low + delta,
+            self.bounds['lr_low'][0],
+            self.bounds['lr_low'][1]
+        )
+        self.lr_high = np.clip(
+            self.lr_high + delta,
+            self.bounds['lr_high'][0],
+            self.bounds['lr_high'][1]
+        )
+        self.second_low = np.clip(
+            self.second_low + delta,
+            self.bounds['second_low'][0],
+            self.bounds['second_low'][1]
+        )
+        self.second_high = np.clip(
+            self.second_high + delta,
+            self.bounds['second_high'][0],
+            self.bounds['second_high'][1]
+        )
+    def get_margins(self, hour=None):
+        """Возвращает текущие отступы"""
+        return {
+            'lr_low': self.lr_low,
+            'lr_high': self.lr_high,
+            'second_low': self.second_low,
+            'second_high': self.second_high
+        }
+    def get_history(self):
+        """Возвращает историю для визуализации"""
+        return pd.DataFrame(self.history)

app/simulation/core/__init__.py ADDED Viewed

File without changes

app/simulation/core/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (185 Bytes). View file

app/simulation/core/__pycache__/processor.cpython-311.pyc ADDED Viewed

Binary file (13.8 kB). View file

app/simulation/core/__pycache__/traffic_generator.cpython-311.pyc ADDED Viewed

Binary file (13.4 kB). View file

app/simulation/core/processor.py ADDED Viewed

	@@ -0,0 +1,339 @@

+import numpy as np
+import pandas as pd
+from app.models.escalation import escalation_decision
+from app.models.escalation import check_business_rules
+def processing_time_function(lr_proba, second_proba, threshold=0.5, base_time=5,
+                             lr_weight=1.0, second_weight=1.5):
+    """
+    Генерирует время обработки для заявок, попавших в ручной разбор
+    """
+    total_weight = lr_weight + second_weight
+    proba = (lr_proba * lr_weight + second_proba * second_weight) / total_weight
+    margin = abs(proba - threshold)
+    max_margin = max(threshold, 1 - threshold)
+    uncertainty = 1 - (margin / max_margin)
+    mean_time = base_time * (1 + 3 * uncertainty)
+    processing_time = np.random.exponential(scale=mean_time)
+    return max(1, processing_time)
+class ApplicationProcessor:
+    def __init__(self, lr_model, second_model, second_model_name,
+                 specialists_count=5,  # основные специалисты (модели)
+                 business_specialists_count=2,  # эксперты (бизнес-правила)
+                 base_processing_time=5,
+                 business_processing_time=10,  # эксперты дольше копаются
+                 lr_weight=1.0, second_weight=1.5):
+        self.lr_model = lr_model
+        self.second_model = second_model
+        self.second_model_name = second_model_name
+        self.specialists_count = specialists_count
+        self.business_specialists_count = business_specialists_count
+        self.base_processing_time = base_processing_time
+        self.business_processing_time = business_processing_time
+        self.lr_weight = lr_weight
+        self.second_weight = second_weight
+        self.specialists = [0] * specialists_count
+        self.business_specialists = [0] * business_specialists_count  # отдельный пул
+        self.manual_queue = []  # очередь от моделей
+        self.business_queue = []  # очередь от бизнес-правил
+        self.stats = {
+            'total_processed': 0,
+            'auto_approved': 0,
+            'auto_declined': 0,
+            'manual_sent': 0,
+            'manual_processed': 0,
+            'business_manual_sent': 0,
+            'business_manual_processed': 0,
+            'queue_history': [],
+            'business_queue_history': [],
+            'wait_times': [],
+            'business_wait_times': [],
+            'specialist_busy': [],
+            'business_specialist_busy': [],
+            'business_rules_manual': 0,
+            'business_rules_auto': 0
+        }
+        self.batch_stats = []
+    def process_batch(self, applications_batch, preprocessor, scaler,
+                      threshold, lr_margins, second_margins, current_time):
+        """
+        Обрабатывает батч заявок за текущую минуту (батчевая версия)
+        """
+        minute_results = {
+            'new_apps': len(applications_batch),
+            'auto_decisions': [],
+            'new_manual': 0,
+            'new_business_manual': 0,
+            'processed_manual': 0,
+            'processed_business_manual': 0,
+            'queue_size': 0,
+            'business_queue_size': 0,
+            'specialists_busy': sum(1 for s in self.specialists if s > 0),
+            'business_specialists_busy': sum(1 for s in self.business_specialists if s > 0),
+            'business_rules': 0
+        }
+        # 1. Уменьшаем время работы специалистов
+        self.specialists = [max(0, s - 1) for s in self.specialists]
+        self.business_specialists = [max(0, s - 1) for s in self.business_specialists]
+        if not applications_batch:
+            minute_results['queue_size'] = len(self.manual_queue)
+            minute_results['business_queue_size'] = len(self.business_queue)
+            self.stats['queue_history'].append(len(self.manual_queue))
+            self.stats['business_queue_history'].append(len(self.business_queue))
+            self.stats['specialist_busy'].append(minute_results['specialists_busy'])
+            self.stats['business_specialist_busy'].append(minute_results['business_specialists_busy'])
+            return minute_results
+        # 2. Превращаем батч в DataFrame для удобства
+        df = pd.DataFrame(applications_batch)
+        # 3. Применяем бизнес-правила ко всем заявкам (БАТЧЕВО)
+        manual_mask, auto_reject_mask, messages, auto_decisions = check_business_rules(df)
+        # Сохраняем статистику по бизнес-правилам
+        business_manual_count = manual_mask.sum()
+        business_auto_count = auto_reject_mask.sum()
+        # Инициализируем
+        n = len(applications_batch)
+        model_indices = []
+        # 4. Обрабатываем результаты бизнес-правил
+        for idx in range(n):
+            if manual_mask[idx]:
+                # Ручной разбор по бизнес-правилам - в отдельную очередь
+                self.business_queue.append({
+                    'app': applications_batch[idx],
+                    'arrival_time': current_time,
+                    'reason': 'business_rules',
+                    'message': messages[idx],
+                    'lr_proba': None,
+                    'second_proba': None
+                })
+                minute_results['new_business_manual'] += 1
+                minute_results['business_rules'] += 1
+                self.stats['business_rules_manual'] += 1
+                self.stats['business_manual_sent'] += 1
+            elif auto_reject_mask[idx]:
+                # Автоматический отказ по бизнес-правилам
+                decision = {
+                    'final_decision': auto_decisions[idx],  # всегда 1
+                    'model_used': 'Business Rules',
+                    'probability': 1.0,
+                    'needs_review': False,
+                    'message': messages[idx]
+                }
+                minute_results['auto_decisions'].append(decision)
+                self.stats['auto_declined'] += 1
+                self.stats['business_rules_auto'] += 1
+                self.stats['total_processed'] += 1
+            else:
+                # Заявка идет в модели
+                model_indices.append(idx)
+        # Инициализируем переменные для статистики моделей
+        lr_confident_count = 0
+        second_confident_count = 0
+        second_uncertain_count = 0
+        # 5. Батчевая обработка моделей
+        if model_indices:
+            # Берём только заявки, которые прошли бизнес-правила
+            df_models = df.iloc[model_indices].copy()
+            # Формируем DataFrame для моделей
+            model_df = pd.DataFrame({
+                'RevolvingUtilizationOfUnsecuredLines': df_models['RevolvingUtilizationOfUnsecuredLines'],
+                'age': df_models['age'],
+                'NumberOfTime30-59DaysPastDueNotWorse': df_models['NumberOfTime30-59DaysPastDueNotWorse'],
+                'DebtRatio': df_models['DebtRatio'].fillna(0),
+                'MonthlyIncome': df_models['MonthlyIncome'].fillna(0),
+                'NumberOfOpenCreditLinesAndLoans': df_models['NumberOfOpenCreditLinesAndLoans'],
+                'NumberOfTimes90DaysLate': df_models['NumberOfTimes90DaysLate'],
+                'NumberRealEstateLoansOrLines': df_models['NumberRealEstateLoansOrLines'],
+                'NumberOfTime60-89DaysPastDueNotWorse': df_models['NumberOfTime60-89DaysPastDueNotWorse'],
+                'NumberOfDependents': df_models['NumberOfDependents'].fillna(0)
+            })
+            # Вызываем escalation_decision для всего батча
+            batch_decisions, batch_manual_mask, stats = escalation_decision(
+                model_df,
+                self.lr_model,
+                self.second_model,
+                self.second_model_name,
+                threshold=threshold,
+                lr_margins=lr_margins,
+                second_margins=second_margins,
+                preprocessor=preprocessor,
+                scaler=scaler
+            )
+            # Сохраняем статистику из escalation_decision
+            lr_confident_count = stats['lr_confident']
+            second_confident_count = stats['second_confident']
+            second_uncertain_count = stats['second_uncertain']
+            # print(f"Статистика батча: бизнес-ручной={business_manual_count}, "
+            #       f"бизнес-отказ={business_auto_count}, "
+            #       f"LR уверен={lr_confident_count}, "
+            #       f"вторая уверен={second_confident_count}, "
+            #       f"вторая не уверен={second_uncertain_count}")
+            # Распределяем результаты по исходным индексам
+            for local_idx, orig_idx in enumerate(model_indices):
+                decision = batch_decisions[local_idx]
+                if decision['needs_review']:
+                    self.manual_queue.append({
+                        'app': applications_batch[orig_idx],
+                        'arrival_time': current_time,
+                        'reason': 'model_uncertainty',
+                        'decision': decision,
+                        'lr_proba': decision.get('lr_proba'),
+                        'second_proba': decision.get('second_proba')
+                    })
+                    minute_results['new_manual'] += 1
+                    self.stats['manual_sent'] += 1
+                else:
+                    minute_results['auto_decisions'].append(decision)
+                    if decision['final_decision'] == 0:
+                        self.stats['auto_approved'] += 1
+                    else:
+                        self.stats['auto_declined'] += 1
+                self.stats['total_processed'] += 1
+        # Сохраняем общую статистику батча
+        self.batch_stats.append({
+            'time': current_time,
+            'business_manual': business_manual_count,
+            'business_auto': business_auto_count,
+            'lr_confident': lr_confident_count,
+            'second_confident': second_confident_count,
+            'second_uncertain': second_uncertain_count,
+            'total_in_batch': len(applications_batch),
+            'new_manual': minute_results['new_manual'],
+            'new_business_manual': minute_results['new_business_manual'],
+            'auto_total': len(minute_results['auto_decisions'])
+        })
+        # 6. Распределяем заявки из бизнес-очереди по свободным экспертам
+        for i in range(self.business_specialists_count):
+            if self.business_specialists[i] <= 0 and self.business_queue:
+                next_app = self.business_queue.pop(0)
+                wait_time = current_time - next_app['arrival_time']
+                self.stats['business_wait_times'].append(wait_time)
+                # Эксперты обрабатывают бизнес-правила
+                proc_time = self.business_processing_time
+                self.business_specialists[i] = proc_time
+                minute_results['processed_business_manual'] += 1
+                self.stats['business_manual_processed'] += 1
+        # 7. Распределяем заявки из основной очереди по свободным специалистам
+        for i in range(self.specialists_count):
+            if self.specialists[i] <= 0 and self.manual_queue:
+                next_app = self.manual_queue.pop(0)
+                wait_time = current_time - next_app['arrival_time']
+                self.stats['wait_times'].append(wait_time)
+                if next_app['reason'] == 'business_rules':
+                    proc_time = self.business_processing_time
+                else:
+                    # Используем функцию processing_time_function
+                    proc_time = processing_time_function(
+                        lr_proba=next_app.get('lr_proba', 0.5),
+                        second_proba=next_app.get('second_proba', 0.5),
+                        threshold=threshold,
+                        base_time=self.base_processing_time,
+                        lr_weight=self.lr_weight,
+                        second_weight=self.second_weight
+                    )
+                self.specialists[i] = proc_time
+                minute_results['processed_manual'] += 1
+                self.stats['manual_processed'] += 1
+        minute_results['queue_size'] = len(self.manual_queue)
+        minute_results['business_queue_size'] = len(self.business_queue)
+        self.stats['queue_history'].append(len(self.manual_queue))
+        self.stats['business_queue_history'].append(len(self.business_queue))
+        self.stats['specialist_busy'].append(minute_results['specialists_busy'])
+        self.stats['business_specialist_busy'].append(minute_results['business_specialists_busy'])
+        return minute_results
+    def load_test_dataset(self, filepath):
+        df = pd.read_csv(filepath)
+        if 'SeriousDlqin2yrs' in df.columns:
+            df = df.drop(columns=['SeriousDlqin2yrs'])
+        return df.to_dict('records')
+    def get_queue_stats(self):
+        if self.stats['wait_times']:
+            avg_wait = np.mean(self.stats['wait_times'])
+            max_wait = np.max(self.stats['wait_times'])
+        else:
+            avg_wait = max_wait = 0
+        if self.stats['business_wait_times']:
+            avg_business_wait = np.mean(self.stats['business_wait_times'])
+            max_business_wait = np.max(self.stats['business_wait_times'])
+        else:
+            avg_business_wait = max_business_wait = 0
+        return {
+            'current_queue': len(self.manual_queue),
+            'current_business_queue': len(self.business_queue),
+            'avg_wait_minutes': avg_wait,
+            'max_wait_minutes': max_wait,
+            'avg_business_wait_minutes': avg_business_wait,
+            'max_business_wait_minutes': max_business_wait,
+            'queue_history': self.stats['queue_history'],
+            'business_queue_history': self.stats['business_queue_history'],
+            'specialist_busy': self.stats['specialist_busy'],
+            'business_specialist_busy': self.stats['business_specialist_busy'],
+            'business_rules_split': {
+                'manual': self.stats['business_rules_manual'],
+                'auto': self.stats['business_rules_auto']
+            }
+        }
+    # def reset(self):
+    #     self.specialists = [0] * self.specialists_count
+    #     self.business_specialists = [0] * self.business_specialists_count
+    #     self.manual_queue = []
+    #     self.business_queue = []
+    #     self.stats = {
+    #         'total_processed': 0,
+    #         'auto_approved': 0,
+    #         'auto_declined': 0,
+    #         'manual_sent': 0,
+    #         'manual_processed': 0,
+    #         'business_manual_sent': 0,
+    #         'business_manual_processed': 0,
+    #         'queue_history': [],
+    #         'business_queue_history': [],
+    #         'wait_times': [],
+    #         'business_wait_times': [],
+    #         'specialist_busy': [],
+    #         'business_specialist_busy': [],
+    #         'business_rules_manual': 0,
+    #         'business_rules_auto': 0
+    #     }

app/simulation/core/traffic_generator.py ADDED Viewed

	@@ -0,0 +1,234 @@

+import numpy as np
+import matplotlib.pyplot as plt
+from datetime import datetime
+class TrafficGenerator:
+    def __init__(self, total_applications=101503, random_seed=42):
+        self.total = total_applications
+        np.random.seed(random_seed)
+        # Параметры интенсивности с провалом после обеда
+        self.intensity_params = {
+            'background': 0.1,
+            'day_center': 13, 'day_amplitude': 0.9, 'day_width': 2.5,  # день поуже
+            'evening_center': 19.5, 'evening_amplitude': 1.3, 'evening_width': 2.2,  # вечер пораньше и пошире
+            'afternoon_dip_center': 15.5, 'afternoon_dip_strength': 0.3, 'afternoon_dip_width': 1.5,
+            # провал после обеда
+            'noise_level': 0.1
+        }
+    def _time_to_hours(self, time_tuple):
+        """Переводит (часы, минуты) в часы с дробной частью"""
+        return time_tuple[0] + time_tuple[1] / 60
+    def loan_intensity_periodic(self, t, impulses=None):
+        """
+        Функция интенсивности с провалом после обеда
+        t: время в часах (может быть дробным)
+        impulses: список словарей вида
+            [{'time': (16, 37), 'strength': 2.0}, ...]  # время как (часы, минуты)
+        """
+        t_cycle = t % 24
+        bg = self.intensity_params['background']
+        # Утренне-дневной пик (13:00)
+        day = self.intensity_params['day_amplitude'] * np.exp(
+            -(t_cycle - self.intensity_params['day_center']) ** 2 /
+            (2 * self.intensity_params['day_width'] ** 2)
+        )
+        # Вечерний пик (19:30)
+        evening_diff = np.minimum(
+            np.abs(t_cycle - self.intensity_params['evening_center']),
+            np.abs(t_cycle - self.intensity_params['evening_center'] + 24)
+        )
+        evening = self.intensity_params['evening_amplitude'] * np.exp(
+            -(evening_diff) ** 2 / (2 * self.intensity_params['evening_width'] ** 2)
+        )
+        # Провал после обеда (15:30)
+        dip_diff = np.minimum(
+            np.abs(t_cycle - self.intensity_params['afternoon_dip_center']),
+            np.abs(t_cycle - self.intensity_params['afternoon_dip_center'] + 24)
+        )
+        dip = -self.intensity_params['afternoon_dip_strength'] * np.exp(
+            -(dip_diff) ** 2 / (2 * self.intensity_params['afternoon_dip_width'] ** 2)
+        )
+        intensity = bg + day + evening + dip
+        intensity = np.maximum(intensity, 0.05)  # не ниже минимума
+        # Шум
+        if self.intensity_params['noise_level'] > 0:
+            noise = 1.0 + np.random.uniform(
+                -self.intensity_params['noise_level'],
+                self.intensity_params['noise_level']
+            )
+            intensity *= noise
+        # Импульсы
+        if impulses:
+            for imp in impulses:
+                imp_time = self._time_to_hours(imp['time']) % 24
+                # Используем гауссиану для плавного импульса (ширина ~30 минут)
+                imp_diff = np.minimum(
+                    np.abs(t_cycle - imp_time),
+                    np.abs(t_cycle - imp_time + 24)
+                )
+                imp_factor = 1.0 + imp['strength'] * np.exp(-(imp_diff) ** 2 / (2 * 0.25 ** 2))
+                intensity *= imp_factor
+        return intensity
+    def generate_minute_counts(self, start_hour=None, start_minute=0, impulses=None):
+        """
+        Возвращает массив количества заявок на каждую минуту (1440 значений)
+        start_hour: час старта (по умолчанию текущий)
+        start_minute: минута старта
+        impulses: список импульсов, например:
+            [{'time': (5, 30), 'strength': 2.0}, ...]  # импульс в 5:30 силой 2.0
+        """
+        if start_hour is None:
+            now = datetime.now()
+            start_hour = now.hour
+            start_minute = now.minute
+        start_time = start_hour + start_minute / 60
+        # Массив минут (от start_time до start_time + 24)
+        minutes = np.arange(0, 24, 1 / 60)
+        intensity_values = np.array([
+            self.loan_intensity_periodic(start_time + m, impulses)
+            for m in minutes
+        ])
+        total_intensity = np.sum(intensity_values)
+        scale_factor = self.total / total_intensity
+        minute_counts = np.floor(intensity_values * scale_factor).astype(int)
+        # Распределяем остаток (чтоб точно сошлось общее число)
+        total_assigned = np.sum(minute_counts)
+        if total_assigned < self.total:
+            remainder = self.total - total_assigned
+            top_minutes = np.argsort(intensity_values)[-remainder:]
+            minute_counts[top_minutes] += 1
+        return minute_counts
+    def generate_hourly_counts(self, start_hour=None, start_minute=0, impulses=None):
+        """
+        Возвращает массив количества заявок по часам (24 значения)
+        """
+        minute_counts = self.generate_minute_counts(start_hour, start_minute, impulses)
+        hourly_counts = [np.sum(minute_counts[i * 60:(i + 1) * 60]) for i in range(24)]
+        return hourly_counts
+    def generate_random_impulses(self, n_impulses=1, min_strength=1.5, max_strength=3.0):
+        """
+        Генерирует случайные импульсы
+        """
+        impulses = []
+        for _ in range(n_impulses):
+            hour = np.random.randint(0, 24)
+            minute = np.random.randint(0, 60)
+            strength = np.random.uniform(min_strength, max_strength)
+            impulses.append({'time': (hour, minute), 'strength': strength})
+        return impulses
+    def plot_distribution(self, start_hour=None, start_minute=0, impulses=None):
+        """Строит график распределения заявок по часам"""
+        hourly_counts = self.generate_hourly_counts(start_hour, start_minute, impulses)
+        if start_hour is None:
+            start_hour = datetime.now().hour
+        hours = [(start_hour + i) % 24 for i in range(24)]
+        sorted_pairs = sorted(zip(hours, hourly_counts))
+        hours_sorted, counts_sorted = zip(*sorted_pairs)
+        plt.figure(figsize=(14, 6))
+        # Цвета в зависимости от времени суток
+        colors = []
+        for h in hours_sorted:
+            if 0 <= h <= 5:
+                colors.append('#2c3e50')  # ночь
+            elif 6 <= h <= 11:
+                colors.append('#3498db')  # утро
+            elif 12 <= h <= 16:
+                colors.append('#f39c12')  # день (с провалом)
+            else:
+                colors.append('#e67e22')  # вечер
+        bars = plt.bar([str(h) for h in hours_sorted], counts_sorted,
+                       alpha=0.8, color=colors, edgecolor='black', linewidth=1)
+        # Средняя линия
+        mean_val = np.mean(counts_sorted)
+        plt.axhline(y=mean_val, color='red', linestyle='--',
+                    alpha=0.7, linewidth=2, label=f'Среднее: {mean_val:.0f}')
+        # Отметим импульсы на графике
+        if impulses:
+            for imp in impulses:
+                imp_hours = self._time_to_hours(imp['time']) % 24
+                # Найдём ближайший час
+                closest_hour = min(hours_sorted, key=lambda x: abs(x - imp_hours))
+                idx = list(hours_sorted).index(closest_hour)
+                plt.plot(idx, counts_sorted[idx], 'g*', markersize=15,
+                         label=f'Импульс {imp["strength"]:.1f}x' if idx == 0 else '')
+        # Отметим провал после обеда
+        dip_idx = [i for i, h in enumerate(hours_sorted) if 14 <= h <= 16]
+        if dip_idx:
+            plt.axvspan(dip_idx[0] - 0.4, dip_idx[-1] + 0.4, alpha=0.2, color='gray',
+                        label='Послеобеденный спад')
+        plt.xlabel('Час', fontsize=12)
+        plt.ylabel('Количество заявок', fontsize=12)
+        plt.title(f'Распределение заявок по часам (старт в {start_hour:02d}:{start_minute:02d})',
+                  fontsize=14, fontweight='bold')
+        plt.grid(True, alpha=0.3, axis='y')
+        plt.legend(loc='upper right')
+        plt.xticks(rotation=45)
+        plt.tight_layout()
+        plt.show()
+        # Статистика
+        print("\n📊 Статистика распределения:")
+        print(f"   Всего заявок: {sum(counts_sorted)}")
+        print(f"   Среднее: {mean_val:.0f} заявок/час")
+        print(f"   Максимум: {max(counts_sorted)} заявок")
+        print(f"   Минимум: {min(counts_sorted)} заявок")
+        return hours_sorted, counts_sorted
+# Пример использования
+# if __name__ == "__main__":
+#     # Создаём генератор
+#     gen = TrafficGenerator(total_applications=110000)
+#
+#     # 1. Без импульсов
+#     print("Без импульсов:")
+#     counts = gen.generate_minute_counts(start_hour=17)
+#     print(f"Всего минут: {len(counts)}")
+#     print(f"Всего заявок: {sum(counts)}")
+#
+#     # 2. С импульсом в 5:30 утра
+#     impulses = [{'time': (5, 30), 'strength': 2.0}]
+#     print("\nС импульсом в 5:30:")
+#     counts = gen.generate_minute_counts(start_hour=17, impulses=impulses)
+#
+#     # 3. Построить график
+#     gen.plot_distribution(start_hour=17, impulses=impulses)
+#
+#     # 4. Случайные импульсы
+#     random_impulses = gen.generate_random_impulses(n_impulses=2)
+#     print("\nСлучайные импульсы:", random_impulses)
+#     gen.plot_distribution(start_hour=17, impulses=random_impulses)

app/simulation/visualization/__init__.py ADDED Viewed

File without changes

app/simulation/visualization/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (194 Bytes). View file

app/simulation/visualization/__pycache__/animation.cpython-311.pyc ADDED Viewed

Binary file (15.7 kB). View file

app/simulation/visualization/__pycache__/plots.cpython-311.pyc ADDED Viewed

Binary file (20.8 kB). View file

app/simulation/visualization/animation.py ADDED Viewed

	@@ -0,0 +1,246 @@

+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+import numpy as np
+def minutes_to_time(minutes, start_time="00:00"):
+    start_hour, start_min = map(int, start_time.split(':'))
+    total_minutes = start_hour * 60 + start_min + minutes
+    hour = (total_minutes // 60) % 24
+    minute = total_minutes % 60
+    return f"{hour:02d}:{minute:02d}"
+def create_animation_frame_plotly(frame_data, specialists_count, second_model_name="XGBoost"):
+    # Фиксированная ось X для графиков
+    time_ticks = list(range(0, 1441, 180))
+    time_labels = [minutes_to_time(t, "00:00") for t in time_ticks]
+    fig = make_subplots(
+        rows=3, cols=2,
+        subplot_titles=('📈 Динамика входящего потока', '⚙️ Загрузка специалистов (%)',
+                        '👥 МОНИТОРИНГ РАБОТЫ СПЕЦИАЛИСТОВ', '',
+                        '📊 Сводная статистика обработки', '🎯 Оперативные показатели'),
+        specs=[
+            [{'type': 'scatter'}, {'type': 'scatter'}],
+            [{'type': 'heatmap', 'colspan': 2}, None],
+            [{'type': 'table'}, {'type': 'scatter'}]
+        ],
+        row_heights=[0.25, 0.40, 0.35],
+        vertical_spacing=0.1,
+    )
+    # --- РЯД 1: ГРАФИКИ ---
+    inflow_h = frame_data.get('inflow_history', [])
+    load_h = frame_data.get('load_history', [])
+    fig.add_trace(go.Scatter(y=inflow_h, fill='tozeroy', line=dict(color='#4361ee', width=2)), row=1, col=1)
+    fig.add_trace(go.Scatter(y=[l * 100 for l in load_h], fill='tozeroy', line=dict(color='#4cc9f0', width=2)), row=1,
+                  col=2)
+    for col in [1, 2]:
+        fig.update_xaxes(range=[0, 1440], tickvals=time_ticks, ticktext=time_labels, row=1, col=col)
+        fig.update_yaxes(rangemode="tozero", row=1, col=col)
+    # --- РЯД 2: HEATMAP (Строго 20 ячеек в ширину) ---
+    states = np.array(frame_data['specialist_states'])
+    cols = 20
+    rows = int(np.ceil(specialists_count / cols))
+    # Создаем матрицу, заполненную None (или NaN), чтобы пустые места не красились
+    z_matrix = np.full((rows, cols), np.nan)
+    for i, val in enumerate(states):
+        r, c = divmod(i, cols)
+        # Мапим значения: 0 -> 0.1 (голубой), 1-3 -> 0.4 (зеленый) и т.д.
+        if val == 0:
+            z_matrix[r, c] = 0.1
+        elif val <= 3:
+            z_matrix[r, c] = 0.4
+        elif val <= 7:
+            z_matrix[r, c] = 0.7
+        else:
+            z_matrix[r, c] = 1.0
+    # Настраиваем цвета: NaN будет прозрачным/фоновым
+    colorscale = [
+        [0.0, '#66ccff'],  # Свободен (0)
+        [0.4, '#4ade80'],  # 1-3 мин
+        [0.7, '#facc15'],  # 4-7 мин
+        [1.0, '#f87171']  # 8+ мин
+    ]
+    fig.add_trace(go.Heatmap(
+        z=z_matrix, colorscale=colorscale, showscale=False,
+        xgap=2, ygap=2, zmin=0, zmax=1, hoverinfo='none'
+    ), row=2, col=1)
+    # Легенда над хитмапом
+    free = sum(1 for t in states if t <= 0)
+    legend = (f"Свободно: <b>{free}</b> | <span style='color:#66ccff'>■</span> Свободен "
+              f"<span style='color:#4ade80'>■</span> 1-3м <span style='color:#facc15'>■</span> 4-7м "
+              f"<span style='color:#f87171'>■</span> 8м+")
+    fig.add_annotation(text=legend, xref="paper", yref="paper", x=0.5, y=0.70, showarrow=False, font=dict(size=14))
+    # --- РЯД 3: ТАБЛИЦА (Формальная) ---
+    cum = frame_data['cumulative']
+    fig.add_trace(go.Table(
+        header=dict(values=['Параметр', 'Значение'], fill_color='#1e293b', font=dict(color='white', size=15),
+                    height=35),
+        cells=dict(values=[
+            ['✅ Авто-одобрено', '❌ Авто-отказы', '👤 На рассмотрении (Manual)', '<b>ИТОГО ОБРАБОТАНО</b>'],
+            [cum['auto_approved'], cum['auto_declined'],
+             cum['manual_processed'] + cum['business_manual_processed'], f"<b>{cum['total_processed']}</b>"]
+        ], align='left', font=dict(size=14), height=35, fill_color='#f8f9fa')
+    ), row=3, col=1)
+    # --- ОПЕРАТИВНЫЕ ПОКАЗАТЕЛИ (Крупный заголовок) ---
+    q_models = frame_data['queue']  # Очередь к спецам
+    q_business = frame_data.get('business_queue', 0)  # Бизнес-очередь
+    # Расчет ожидания только для очереди моделей (как на левом графике)
+    avg_w = frame_data.get('avg_wait', 0)
+    status_card = (
+        f"<span style='font-size:22px; font-weight:bold;'>МОНИТОРИНГ</span><br><br>"
+        f"<span style='background-color:#dcfce7; color:#166534; padding:8px; border-radius:5px;'>"
+        f"<b>👤 ОЧЕРЕДЬ (СПЕЦ): {q_models}</b></span><br><br>"
+        f"<span style='font-size:18px; color:#666;'>"
+        f"⚙️ Бизнес-правила: {q_business}</span><br><br>"
+        f"🕒 Время: <b>{frame_data['time_str']}</b><br>"
+        f"⏳ Ожидание: <b>{avg_w:.1f} мин</b>"
+    )
+    fig.add_trace(go.Scatter(x=[0], y=[0], mode='text', text=[status_card], textfont=dict(size=16)), row=3, col=2)
+    # Очистка осей
+    fig.update_xaxes(visible=False, row=2, col=1);
+    fig.update_yaxes(visible=False, row=2, col=1)
+    fig.update_xaxes(visible=False, row=3, col=2);
+    fig.update_yaxes(visible=False, row=3, col=2)
+    # Фиксируем оси, чтобы график не "дышал" (это главная причина мерцания)
+    fig.update_yaxes(range=[0, 60], row=1, col=1)  # Замени 60 на твой макс. поток
+    fig.update_yaxes(range=[0, 105], row=1, col=2)  # Загрузка всегда до 100%
+    fig.update_layout(
+        height=950,
+        margin=dict(t=80, b=40, l=50, r=50),
+        template="plotly_white",
+        showlegend=False,
+        # ОТКЛЮЧАЕМ анимации переходов, которые создают эффект мигания
+        transition_duration=0,
+        hovermode=False
+    )
+    # Это заставит Plotly обновлять только данные, не перерисовывая всё полотно
+    fig.layout.datarevision = frame_data['time']
+    return fig
+from matplotlib.animation import FFMpegWriter
+import matplotlib.pyplot as plt
+import matplotlib.animation as animation
+import tempfile
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.animation as animation
+import tempfile
+import numpy as np
+import os
+# Внести изменения в функцию create_simulation_video в animation.py
+def create_simulation_video(frames, specialists_count, second_model_name, fps=24):
+    if not frames:
+        return None
+    # Настройка стиля
+    plt.style.use('seaborn-v0_8-whitegrid')
+    fig, axes = plt.subplots(2, 2, figsize=(16, 10), facecolor='#f8f9fa')
+    plt.subplots_adjust(hspace=0.4, wspace=0.25)
+    plt.close()
+    def update(i):
+        data = frames[i]
+        for ax in axes.flatten():
+            ax.clear()
+            ax.set_facecolor('white')
+        # 1. ДИНАМИКА ПОТОКА (Локализация)
+        y_inflow = data['inflow_history']
+        axes[0, 0].fill_between(range(len(y_inflow)), y_inflow, color='#4361ee', alpha=0.3)
+        axes[0, 0].plot(range(len(y_inflow)), y_inflow, color='#4361ee', linewidth=2)
+        axes[0, 0].set_xlim(0, 1440)  # Фиксация оси времени
+        axes[0, 0].set_title("ДИНАМИКА ПОТОКА (заявок/мин)", fontsize=12, fontweight='bold')
+        axes[0, 0].set_xlabel("Минуты симуляции")
+        # 2. ЗАГРУЗКА СИСТЕМЫ
+        y_load = [v * 100 for v in data['load_history']]
+        axes[0, 1].fill_between(range(len(y_load)), y_load, color='#4cc9f0', alpha=0.3)
+        axes[0, 1].plot(range(len(y_load)), y_load, color='#4cc9f0', linewidth=2)
+        axes[0, 1].axhline(y=80, color='#f72585', linestyle='--', alpha=0.6)
+        axes[0, 1].set_xlim(0, 1440)
+        axes[0, 1].set_ylim(0, 110)
+        axes[0, 1].set_title(f"ЗАГРУЖЕННОСТЬ СПЕЦИАЛИСТОВ %: {y_load[-1]:.1f}%", fontsize=12, fontweight='bold')
+        # 3. HEATMAP И ЛЕГЕНДА (Возвращаем информативность)
+        states = np.array(data['specialist_states'])
+        cols = 20
+        rows = int(np.ceil(specialists_count / cols))
+        z = np.zeros((rows, cols))
+        for idx, val in enumerate(states[:rows * cols]):
+            z[idx // cols, idx % cols] = val
+        im = axes[1, 0].imshow(z, cmap='RdYlGn_r', aspect='auto', vmin=0, vmax=10)
+        axes[1, 0].set_title(f"МОНИТОРИНГ: {specialists_count} СПЕЦИАЛИСТОВ", fontsize=12, fontweight='bold')
+        axes[1, 0].axis('off')
+        # Добавляем текстовую легенду под хитмапом
+        legend_text = "Цвета: Зеленый (Свободен) → Желтый (3-5 мин) → Красный (8+ мин)"
+        axes[1, 0].text(0.5, -0.1, legend_text, ha='center', transform=axes[1, 0].transAxes, fontsize=10)
+        # --- 4. РАЗДЕЛЕННЫЕ ОЧЕРЕДИ И СТАТИСТИКА ---
+        ax_stat = axes[1, 1]
+        ax_stat.clear()
+        ax_stat.axis('off')
+        # Цвета для очередей (краснеют, если очередь > 50)
+        q_mod_color = '#991b1b' if data['queue'] > 50 else '#166534'
+        q_biz_color = '#991b1b' if data.get('business_queue', 0) > 50 else '#1e293b'
+        # Две надписи очередей сверху
+        ax_stat.text(0.25, 0.9, "ОЧЕРЕДЬ\n(МОДЕЛИ)", fontsize=10, ha='center', fontweight='bold')
+        ax_stat.text(0.25, 0.78, f"{data['queue']}", fontsize=26, ha='center', fontweight='bold', color=q_mod_color)
+        ax_stat.text(0.75, 0.9, "ОЧЕРЕДЬ\n(БИЗНЕС ПРАВИЛА)", fontsize=10, ha='center', fontweight='bold')
+        ax_stat.text(0.75, 0.78, f"{data.get('business_queue', 0)}", fontsize=26, ha='center', fontweight='bold',
+                     color=q_biz_color)
+        # Сводная таблица ниже
+        cum = data['cumulative']
+        stats_text = (
+            f"Итоговые показатели к {data['time_str']}\n"
+            f"--------------------------------------\n"
+            f"ОБРАБОТАНО ВСЕГО:                 {cum['total_processed']}\n"
+            f"Авто-одобрено:                    {cum['auto_approved']}\n"
+            f"Авто-отказы:                      {cum['auto_declined']}\n"
+            f"Ручной разбор (модель):           {cum['manual_processed']}\n"
+            f"Ручной разбор (бизнес правила):   {cum['business_manual_processed']}\n"
+            f"--------------------------------------\n"
+            f"Используемая модель: {second_model_name}"
+        )
+        ax_stat.text(0.5, 0.3, stats_text, fontsize=10, fontfamily='monospace',
+                     ha='center', va='center', transform=ax_stat.transAxes,
+                     bbox=dict(facecolor='#f8f9fa', alpha=1, boxstyle='round,pad=1', edgecolor='#dee2e6'))
+        return axes.flatten()
+    ani = animation.FuncAnimation(fig, update, frames=len(frames), interval=1000 / fps)
+    tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
+    writer = animation.FFMpegWriter(fps=fps, bitrate=2000, extra_args=['-vcodec', 'libx264', '-pix_fmt', 'yuv420p'])
+    ani.save(tmp_file.name, writer=writer)
+    return tmp_file.name

app/simulation/visualization/plots.py ADDED Viewed

	@@ -0,0 +1,374 @@

+import matplotlib.pyplot as plt
+import matplotlib.animation as animation
+import numpy as np
+def minutes_to_time(minutes, start_time="00:00"):
+    """Преобразует минуты от старта в строку времени ЧЧ:ММ"""
+    start_hour, start_min = map(int, start_time.split(':'))
+    total_minutes = start_hour * 60 + start_min + minutes
+    hour = (total_minutes // 60) % 24
+    minute = total_minutes % 60
+    return f"{hour:02d}:{minute:02d}"
+def plot_queue_dynamics(queue_history, business_queue_history=None, start_time="00:00"):
+    """
+    Два отдельных графика для очередей с временной шкалой ЧЧ:ММ
+    """
+    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
+    # Создаем метки времени для каждого часа
+    total_minutes = len(queue_history)
+    hours = range(0, total_minutes, 60)  # каждый час
+    hour_labels = [minutes_to_time(m, start_time) for m in hours]
+    # График 1: Очередь моделей
+    ax1.plot(range(total_minutes), queue_history, 'b-', linewidth=1.5)
+    ax1.set_xticks(hours)
+    ax1.set_xticklabels(hour_labels, rotation=45)
+    ax1.set_xlabel('Время')
+    ax1.set_ylabel('Размер очереди')
+    ax1.set_title('Очередь моделей')
+    ax1.grid(True, alpha=0.3)
+    # График 2: Очередь бизнес-правил
+    if business_queue_history and len(business_queue_history) > 0:
+        ax2.plot(range(total_minutes), business_queue_history, 'orange', linewidth=1.5)
+        ax2.set_xticks(hours)
+        ax2.set_xticklabels(hour_labels, rotation=45)
+        ax2.set_xlabel('Время')
+        ax2.set_ylabel('Размер очереди')
+        ax2.set_title('Очередь бизнес-правил')
+        ax2.grid(True, alpha=0.3)
+    else:
+        ax2.text(0.5, 0.5, 'Нет данных', ha='center', va='center', transform=ax2.transAxes)
+        ax2.set_title('Очередь бизнес-правил')
+        ax2.set_xlabel('Время')
+    plt.tight_layout()
+    return plt
+def plot_specialist_load(specialist_busy_history, specialists_count, start_time="00:00"):
+    """График загрузки специалистов с временной шкалой ЧЧ:ММ"""
+    load_percent = [busy / specialists_count * 100 for busy in specialist_busy_history]
+    fig, ax = plt.subplots(figsize=(10, 4))
+    total_minutes = len(load_percent)
+    hours = range(0, total_minutes, 60)  # каждый час
+    hour_labels = [minutes_to_time(m, start_time) for m in hours]
+    ax.plot(range(total_minutes), load_percent, 'g-', linewidth=1.5)
+    ax.axhline(y=100, color='r', linestyle='--', alpha=0.5, label='Максимум')
+    ax.axhline(y=80, color='b', linestyle='--', alpha=0.5, label='Цель 80%')
+    ax.set_xticks(hours)
+    ax.set_xticklabels(hour_labels, rotation=45)
+    ax.set_xlabel('Время')
+    ax.set_ylabel('Загрузка (%)')
+    ax.set_title('Загрузка специалистов')
+    ax.legend()
+    ax.grid(True, alpha=0.3)
+    ax.set_ylim(0, 110)
+    plt.tight_layout()
+    return plt
+def plot_inflow(minute_counts, start_time="00:00"):
+    """
+    График входящего потока заявок с заливкой под кривой
+    """
+    fig, ax = plt.subplots(figsize=(14, 5))
+    total_minutes = len(minute_counts)
+    minutes = range(total_minutes)
+    # Заливка под кривой (area plot)
+    ax.fill_between(minutes, minute_counts, alpha=0.3, color='blue', label='Общий поток')
+    # Основной график (линия поверх заливки)
+    ax.plot(minutes, minute_counts, 'b-', linewidth=1.5, alpha=0.8)
+    # Скользящее среднее
+    window = 30
+    if total_minutes > window:
+        smoothed = np.convolve(minute_counts, np.ones(window) / window, mode='valid')
+        ax.plot(range(window - 1, total_minutes), smoothed,
+                'r-', linewidth=2.5, label=f'Среднее за 30 мин')
+        # Можно добавить заливку и для среднего (опционально)
+        # ax.fill_between(range(window - 1, total_minutes), smoothed, alpha=0.2, color='red')
+    # Метки времени
+    hours = range(0, total_minutes, 60)
+    hour_labels = [minutes_to_time(m, start_time) for m in hours]
+    ax.set_xticks(hours)
+    ax.set_xticklabels(hour_labels, rotation=45)
+    ax.set_xlabel('Время')
+    ax.set_ylabel('Количество заявок')
+    ax.set_title('Входящий поток заявок')
+    ax.legend()
+    ax.grid(True, alpha=0.3)
+    # Добавим горизонтальную линию среднего
+    mean_value = np.mean(minute_counts)
+    ax.axhline(y=mean_value, color='gray', linestyle='--', alpha=0.7,
+               label=f'Среднее: {mean_value:.1f}')
+    plt.tight_layout()
+    return plt
+def minutes_to_time(minutes, start_time="00:00"):
+    """Преобразует минуты от старта в строку времени ЧЧ:ММ"""
+    start_hour, start_min = map(int, start_time.split(':'))
+    total_minutes = start_hour * 60 + start_min + minutes
+    hour = (total_minutes // 60) % 24
+    minute = total_minutes % 60
+    return f"{hour:02d}:{minute:02d}"
+def plot_detailed_decisions(batch_stats, second_model_name="XGBoost", start_time="00:00"):
+    """
+    Набор графиков для каждого типа решений отдельно с временной шкалой ЧЧ:ММ
+    """
+    if not batch_stats:
+        return None
+    fig, axes = plt.subplots(3, 2, figsize=(14, 10))
+    times = [stat['time'] for stat in batch_stats]  # минуты
+    total_minutes = max(times) if times else 0
+    # Метки времени каждый час
+    hours = range(0, total_minutes + 60, 60)
+    hour_labels = [minutes_to_time(m, start_time) for m in hours]
+    # 1. Бизнес-правила (ручной разбор)
+    axes[0, 0].plot(times, [stat['business_manual'] for stat in batch_stats],
+                    'r-', linewidth=1.5)
+    axes[0, 0].fill_between(times, 0, [stat['business_manual'] for stat in batch_stats],
+                            alpha=0.2, color='red')
+    axes[0, 0].set_title('Ручной разбор: бизнес-правила', fontweight='bold')
+    axes[0, 0].set_xticks(hours)
+    axes[0, 0].set_xticklabels(hour_labels, rotation=45)
+    axes[0, 0].set_xlabel('Время')
+    axes[0, 0].set_ylabel('Заявок')
+    axes[0, 0].grid(True, alpha=0.3)
+    # 2. Бизнес-правила (авто отказ)
+    axes[0, 1].plot(times, [stat['business_auto'] for stat in batch_stats],
+                    'darkred', linewidth=1.5)
+    axes[0, 1].fill_between(times, 0, [stat['business_auto'] for stat in batch_stats],
+                            alpha=0.2, color='darkred')
+    axes[0, 1].set_title('Авто отказ: бизнес-правила', fontweight='bold')
+    axes[0, 1].set_xticks(hours)
+    axes[0, 1].set_xticklabels(hour_labels, rotation=45)
+    axes[0, 1].set_xlabel('Время')
+    axes[0, 1].set_ylabel('Заявок')
+    axes[0, 1].grid(True, alpha=0.3)
+    # 3. LR уверенные решения
+    axes[1, 0].plot(times, [stat['lr_confident'] for stat in batch_stats],
+                    'blue', linewidth=1.5)
+    axes[1, 0].fill_between(times, 0, [stat['lr_confident'] for stat in batch_stats],
+                            alpha=0.2, color='blue')
+    axes[1, 0].set_title('Уверенные решения: Logistic Regression', fontweight='bold')
+    axes[1, 0].set_xticks(hours)
+    axes[1, 0].set_xticklabels(hour_labels, rotation=45)
+    axes[1, 0].set_xlabel('Время')
+    axes[1, 0].set_ylabel('Заявок')
+    axes[1, 0].grid(True, alpha=0.3)
+    # 4. Вторая модель уверенные решения
+    axes[1, 1].plot(times, [stat['second_confident'] for stat in batch_stats],
+                    'green', linewidth=1.5)
+    axes[1, 1].fill_between(times, 0, [stat['second_confident'] for stat in batch_stats],
+                            alpha=0.2, color='green')
+    axes[1, 1].set_title(f'Уверенные решения: {second_model_name}', fontweight='bold')
+    axes[1, 1].set_xticks(hours)
+    axes[1, 1].set_xticklabels(hour_labels, rotation=45)
+    axes[1, 1].set_xlabel('Время')
+    axes[1, 1].set_ylabel('Заявок')
+    axes[1, 1].grid(True, alpha=0.3)
+    # 5. Ручной разбор от моделей
+    axes[2, 0].plot(times, [stat['second_uncertain'] for stat in batch_stats],
+                    'orange', linewidth=1.5)
+    axes[2, 0].fill_between(times, 0, [stat['second_uncertain'] for stat in batch_stats],
+                            alpha=0.2, color='orange')
+    axes[2, 0].set_title('Ручной разбор: модели неуверенны', fontweight='bold')
+    axes[2, 0].set_xticks(hours)
+    axes[2, 0].set_xticklabels(hour_labels, rotation=45)
+    axes[2, 0].set_xlabel('Время')
+    axes[2, 0].set_ylabel('Заявок')
+    axes[2, 0].grid(True, alpha=0.3)
+    # 6. Сравнительный график
+    axes[2, 1].plot(times, [stat['business_manual'] for stat in batch_stats],
+                    'r-', linewidth=1.5, label='Бизнес-правила', alpha=0.7)
+    axes[2, 1].plot(times, [stat['second_uncertain'] for stat in batch_stats],
+                    'orange', linewidth=1.5, label='Модели неуверенны', alpha=0.7)
+    axes[2, 1].set_title('Сравнение источников ручного разбора', fontweight='bold')
+    axes[2, 1].set_xticks(hours)
+    axes[2, 1].set_xticklabels(hour_labels, rotation=45)
+    axes[2, 1].set_xlabel('Время')
+    axes[2, 1].set_ylabel('Заявок')
+    axes[2, 1].legend()
+    axes[2, 1].grid(True, alpha=0.3)
+    plt.suptitle('Детальный анализ решений', fontsize=14, fontweight='bold')
+    plt.tight_layout()
+    return plt
+def plot_parameters_history(pid_history, second_model_name="XGBoost", start_time="00:00"):
+    """График изменения параметров регулятора"""
+    if pid_history is None or pid_history.empty:
+        return None
+    fig, axes = plt.subplots(3, 1, figsize=(12, 12))
+    total_minutes = len(pid_history)
+    times = range(total_minutes)
+    # Метки времени
+    hours = range(0, total_minutes, 60)
+    hour_labels = [minutes_to_time(m, start_time) for m in hours]
+    # 1. Отступы LR
+    axes[0].plot(times, pid_history['lr_low'], 'g-', linewidth=2, label='LR Low')
+    axes[0].plot(times, pid_history['lr_high'], 'r-', linewidth=2, label='LR High')
+    axes[0].set_ylabel('Отступ')
+    axes[0].set_title('Отступы Logistic Regression')
+    axes[0].legend()
+    axes[0].grid(True, alpha=0.3)
+    axes[0].set_xticks(hours)
+    axes[0].set_xticklabels(hour_labels, rotation=45)
+    # 2. Отступы второй модели (с именем из параметра)
+    axes[1].plot(times, pid_history['second_low'], 'g-', linewidth=2, label=f'{second_model_name} Low')
+    axes[1].plot(times, pid_history['second_high'], 'r-', linewidth=2, label=f'{second_model_name} High')
+    axes[1].set_ylabel('Отступ')
+    axes[1].set_title(f'Отступы {second_model_name}')
+    axes[1].legend()
+    axes[1].grid(True, alpha=0.3)
+    axes[1].set_xticks(hours)
+    axes[1].set_xticklabels(hour_labels, rotation=45)
+    # 3. Ошибка загрузки и выход регулятора
+    axes[2].plot(times, pid_history['error_load'], 'b-', label='Error load', alpha=0.7, linewidth=1.5)
+    axes[2].plot(times, pid_history['output'], 'r-', label='Output', linewidth=2, alpha=0.7)
+    axes[2].axhline(y=0, color='black', linestyle='-', linewidth=0.5)
+    axes[2].set_xlabel('Время')
+    axes[2].set_ylabel('Значение')
+    axes[2].set_title('Ошибка загрузки и выход регулятора')
+    axes[2].legend()
+    axes[2].grid(True, alpha=0.3)
+    axes[2].set_xticks(hours)
+    axes[2].set_xticklabels(hour_labels, rotation=45)
+    plt.tight_layout()
+    return plt
+# def plot_summary(processor):
+#     """Сводный дашборд"""
+#     fig, axes = plt.subplots(2, 3, figsize=(15, 10))
+#
+#     stats = processor.stats
+#
+#     # 1. Динамика очередей
+#     axes[0, 0].plot(stats['queue_history'], 'b-', linewidth=1.5, label='Очередь моделей')
+#     if 'business_queue_history' in stats:
+#         axes[0, 0].plot(stats['business_queue_history'], 'orange', linewidth=1.5, label='Очередь бизнес-правил')
+#     axes[0, 0].set_title('Динамика очередей')
+#     axes[0, 0].set_xlabel('Минута')
+#     axes[0, 0].set_ylabel('Заявок')
+#     axes[0, 0].legend()
+#     axes[0, 0].grid(True, alpha=0.3)
+#
+#     # 2. Загрузка специалистов (модели)
+#     load = [b / processor.specialists_count * 100 for b in stats['specialist_busy']]
+#     axes[0, 1].plot(load, 'g-', linewidth=1.5, label='Основные специалисты')
+#     axes[0, 1].axhline(y=100, color='r', linestyle='--', alpha=0.5, label='Максимум')
+#     if hasattr(processor, 'target_load'):
+#         axes[0, 1].axhline(y=processor.target_load * 100, color='b', linestyle='--',
+#                            alpha=0.5, label=f'Цель {processor.target_load * 100:.0f}%')
+#     axes[0, 1].set_title('Загрузка специалистов (модели)')
+#     axes[0, 1].set_xlabel('Минута')
+#     axes[0, 1].set_ylabel('%')
+#     axes[0, 1].legend()
+#     axes[0, 1].grid(True, alpha=0.3)
+#
+#     # 3. Загрузка экспертов
+#     if 'business_specialist_busy' in stats and stats['business_specialist_busy']:
+#         business_load = [b / processor.business_specialists_count * 100 for b in stats['business_specialist_busy']]
+#         axes[1, 0].plot(business_load, 'orange', linewidth=1.5, label='Эксперты')
+#         axes[1, 0].axhline(y=100, color='r', linestyle='--', alpha=0.5, label='Максимум')
+#         axes[1, 0].set_title('Загрузка экспертов (бизнес-правила)')
+#         axes[1, 0].set_xlabel('Минута')
+#         axes[1, 0].set_ylabel('%')
+#         axes[1, 0].legend()
+#         axes[1, 0].grid(True, alpha=0.3)
+#     else:
+#         axes[1, 0].text(0.5, 0.5, 'Нет данных по экспертам', ha='center', va='center')
+#         axes[1, 0].set_title('Загрузка экспертов')
+#
+#     # 4. Распределение решений
+#     sizes = [
+#         stats['auto_approved'],
+#         stats['auto_declined'],
+#         stats['manual_processed'],
+#         stats.get('business_manual_processed', 0)
+#     ]
+#     labels = ['Одобрено авто', 'Отказ авто', 'Ручной (модели)', 'Ручной (бизнес)']
+#     colors = ['#2ecc71', '#e74c3c', '#3498db', '#f39c12']
+#
+#     if sum(sizes) > 0:
+#         wedges, texts, autotexts = axes[1, 1].pie(sizes, labels=labels, autopct='%1.1f%%',
+#                                                   colors=colors, startangle=90)
+#         for autotext in autotexts:
+#             autotext.set_color('white')
+#             autotext.set_fontweight('bold')
+#     axes[1, 1].set_title('Итоговые решения')
+#
+#     # 5. Ключевые метрики (освободилось место)
+#     total = stats['total_processed']
+#     if total > 0:
+#         avg_wait = np.mean(stats['wait_times']) if stats['wait_times'] else 0
+#         avg_business_wait = np.mean(stats.get('business_wait_times', [0])) if stats.get('business_wait_times') else 0
+#
+#         metrics_text = f"""
+#         Всего заявок: {total:,}
+#         Одобрено авто: {stats['auto_approved']:,} ({stats['auto_approved'] / total * 100:.1f}%)
+#         Отказ авто: {stats['auto_declined']:,} ({stats['auto_declined'] / total * 100:.1f}%)
+#
+#         Ручной разбор (модели): {stats['manual_processed']:,} ({stats['manual_processed'] / total * 100:.1f}%)
+#         Ручной разбор (бизнес): {stats.get('business_manual_processed', 0):,}
+#
+#         Среднее время ожидания (модели): {avg_wait:.1f} мин
+#         Среднее время ожидания (бизнес): {avg_business_wait:.1f} мин
+#
+#         Средняя загрузка специалистов: {np.mean(load):.1f}%
+#         """
+#     else:
+#         metrics_text = "Нет данных"
+#
+#     axes[0, 2].text(0.1, 0.5, metrics_text, transform=axes[0, 2].transAxes,
+#                     fontsize=10, verticalalignment='center', fontfamily='monospace')
+#     axes[0, 2].axis('off')
+#     axes[0, 2].set_title('Ключевые метрики')
+#
+#     # 6. Пустой график или можно что-то еще
+#     axes[1, 2].axis('off')
+#
+#     plt.suptitle('Сводная статистика симуляции', fontsize=14, fontweight='bold')
+#     plt.tight_layout()
+#     return plt

app/simulation/visualization/simulation_20:11.gif ADDED Viewed

app/simulation/visualization/simulation_20:19.gif ADDED Viewed

app/simulation/visualization/simulation_20:25.gif ADDED Viewed

app/simulation/visualization/simulation_20:30.gif ADDED Viewed

app/utils/__pycache__/credit_preprocessor.cpython-311.pyc ADDED Viewed

Binary file (14.5 kB). View file

app/utils/__pycache__/data_loader.cpython-311.pyc ADDED Viewed

Binary file (1.51 kB). View file

app/utils/credit_preprocessor.py ADDED Viewed

	@@ -0,0 +1,329 @@

+import pandas as pd
+import numpy as np
+import joblib
+from sklearn.base import BaseEstimator, TransformerMixin
+class CreditDataPreprocessor(BaseEstimator, TransformerMixin):
+    # Полный препроцессинг данных
+    def __init__(self,
+                 NumberOfDependents_fill_value=0,
+                 NumberOfDependents_up_threshold=10,
+                 MonthlyIncome_fill_value=0,
+                 RevolvingUtilizationOfUnsecuredLines_drop_threshold=2,
+                 age_low_drop_threshold=18,
+                 age_up_drop_threshold=80,
+                 DebtRatio_up_threshold=5,
+                 PastDueRiskScore_weights=[1.0, 1.2, 1.3],
+                 NumberRealEstateLoansOrLines_drop_threshold=20,
+                 drop_special_codes=False):
+        self.NumberOfDependents_fill_value = NumberOfDependents_fill_value
+        self.NumberOfDependents_up_threshold = NumberOfDependents_up_threshold
+        self.MonthlyIncome_fill_value = MonthlyIncome_fill_value
+        self.RevolvingUtilizationOfUnsecuredLines_drop_threshold = RevolvingUtilizationOfUnsecuredLines_drop_threshold
+        self.age_low_drop_threshold = age_low_drop_threshold
+        self.age_up_drop_threshold = age_up_drop_threshold
+        self.DebtRatio_up_threshold = DebtRatio_up_threshold
+        self.PastDueRiskScore_weights = PastDueRiskScore_weights
+        self.NumberRealEstateLoansOrLines_drop_threshold = NumberRealEstateLoansOrLines_drop_threshold
+        self.drop_special_codes = drop_special_codes
+    def fit(self, X, y=None):
+        return self
+    def transform(self, X):
+        X_copy = X.copy()
+        X_copy['NumberOfDependents'] = X_copy['NumberOfDependents'].fillna(value=self.NumberOfDependents_fill_value)
+        X_copy['NumberOfDependents'] = X_copy['NumberOfDependents'].clip(0, self.NumberOfDependents_up_threshold).copy()
+        X_copy['MonthlyIncomeIsMissing'] = 0
+        X_copy.loc[X_copy['MonthlyIncome'].isna(), 'MonthlyIncomeIsMissing'] = 1
+        X_copy['MonthlyIncome'] = X['MonthlyIncome'].fillna(value=self.MonthlyIncome_fill_value)
+        X_copy['RevolvingUtilizationOverOne'] = 0.0
+        X_copy.loc[X_copy['RevolvingUtilizationOfUnsecuredLines'] > 1, 'RevolvingUtilizationOverOne'] = 1.0
+        X_copy['RevolvingUtilizationOfUnsecuredLines'] = X_copy['RevolvingUtilizationOfUnsecuredLines'].clip(0,
+                                                                                                             1).copy()
+        X_copy['DebtPayments'] = 0.0
+        X_copy.loc[X_copy['MonthlyIncome'] == 0, 'DebtPayments'] = X_copy.loc[X_copy['MonthlyIncome'] == 0, 'DebtRatio']
+        X_copy.loc[X_copy['MonthlyIncome'] != 0, 'DebtPayments'] = X_copy.loc[
+                                                                       X_copy['MonthlyIncome'] != 0, 'DebtRatio'] * \
+                                                                   X_copy.loc[
+                                                                       X_copy['MonthlyIncome'] != 0, 'MonthlyIncome']
+        X_copy['DebtRatio'] = X_copy['DebtRatio'].clip(0, self.DebtRatio_up_threshold).copy()
+        X_copy['DebtPayments_over_10k'] = 0.0
+        X_copy.loc[X_copy['DebtPayments'] > 10000, 'DebtPayments_over_10k'] = 1.0
+        X_copy['DebtPayments'] = X_copy['DebtPayments'].clip(0, 10000).copy()
+        X_copy['MonthlyIncome_over_20k'] = 0.0
+        X_copy.loc[X_copy['MonthlyIncome'] >= 20000, 'MonthlyIncome_over_20k'] = 1.0
+        X_copy['MonthlyIncome'] = X_copy['MonthlyIncome'].clip(0, 20000)
+        X_copy['Code96'] = 0.0
+        X_copy['Code98'] = 0.0
+        X_copy.loc[X_copy['NumberOfTime30-59DaysPastDueNotWorse'] == 96, 'Code96'] = 1.0
+        X_copy.loc[X_copy['NumberOfTime30-59DaysPastDueNotWorse'] == 98, 'Code98'] = 1.0
+        X_copy['PastDueRiskScore'] = (
+                self.PastDueRiskScore_weights[0] * X_copy['NumberOfTime30-59DaysPastDueNotWorse'] +
+                self.PastDueRiskScore_weights[1] * X_copy['NumberOfTime60-89DaysPastDueNotWorse'] +
+                self.PastDueRiskScore_weights[2] * X_copy['NumberOfTimes90DaysLate'])
+        X_copy.loc[X_copy['NumberOfTime30-59DaysPastDueNotWorse'] == 96, 'PastDueRiskScore'] = 96
+        X_copy.loc[X_copy['NumberOfTime30-59DaysPastDueNotWorse'] == 98, 'PastDueRiskScore'] = 98
+        X_copy = X_copy.drop(columns=['NumberOfTime30-59DaysPastDueNotWorse', 'NumberOfTime60-89DaysPastDueNotWorse',
+                                      'NumberOfTimes90DaysLate'])
+        X_copy['NumberOfOpenCreditLinesAndLoans_over_30'] = 0.0
+        X_copy.loc[X_copy['NumberOfOpenCreditLinesAndLoans'] > 30, 'NumberOfOpenCreditLinesAndLoans_over_30'] = 1.0
+        X_copy['NumberOfOpenCreditLinesAndLoans'] = X_copy['NumberOfOpenCreditLinesAndLoans'].clip(0, 30).copy()
+        X_copy['NumberRealEstateLoansOrLines_over_5'] = 0.0
+        X_copy.loc[X_copy['NumberRealEstateLoansOrLines'] > 5, 'NumberRealEstateLoansOrLines_over_5'] = 1.0
+        X_copy['NumberRealEstateLoansOrLines'] = X_copy['NumberRealEstateLoansOrLines'].clip(0, 5).copy()
+        X_copy['ConsumerCredit_Group'] = pd.cut(X_copy['NumberOfOpenCreditLinesAndLoans'],
+                                                bins=[0, 1, 2, 6, 15, 31],
+                                                labels=[
+                                                    '0_loans',
+                                                    '1_loans',
+                                                    '2-5_loans',
+                                                    '6-14_loans',
+                                                    '16-30_loans'
+                                                ])
+        consumer_dummy = pd.get_dummies(X_copy['ConsumerCredit_Group'], prefix='Consumer', drop_first=False).astype(
+            'float')
+        X_copy['RealEstateLoans_Group'] = pd.cut(X_copy['NumberRealEstateLoansOrLines'],
+                                                 bins=[-1, 0, 3, 100],
+                                                 labels=[
+                                                     '0_loans',
+                                                     '1-3_loans',
+                                                     '4+_loans',
+                                                 ])
+        estate_dummy = pd.get_dummies(X_copy['RealEstateLoans_Group'], prefix='RealEstateLoans',
+                                      drop_first=False).astype('float')
+        X_copy = pd.concat([X_copy, consumer_dummy, estate_dummy], axis=1).copy()
+        X_copy = X_copy.drop(columns=['ConsumerCredit_Group',
+                                      'RealEstateLoans_Group']).copy()
+        X_copy = X_copy.drop(columns=['Consumer_6-14_loans',
+                                      'RealEstateLoans_0_loans']).copy()
+        X_copy = X_copy.drop(columns=['NumberOfOpenCreditLinesAndLoans',
+                                      'NumberRealEstateLoansOrLines',
+                                      'MonthlyIncomeIsMissing',
+                                      'MonthlyIncome_over_20k',
+                                      'Consumer_0_loans',
+                                      'NumberOfOpenCreditLinesAndLoans_over_30']).copy()
+        if self.drop_special_codes:
+            X_copy = X_copy.drop(columns=['Code96', 'Code98'])
+        return X_copy
+    def fit_transform(self, X, y=None):
+        return self.fit(X, y).transform(X)
+    def clean_train(self, X, y=None):
+        mask = (
+                (X[
+                     'RevolvingUtilizationOfUnsecuredLines'] <= self.RevolvingUtilizationOfUnsecuredLines_drop_threshold) &
+                (X['age'] >= self.age_low_drop_threshold) &
+                (X['age'] <= self.age_up_drop_threshold) &
+                (X['NumberRealEstateLoansOrLines'] <= self.NumberRealEstateLoansOrLines_drop_threshold)
+        )
+        X_clean = X[mask].copy()
+        if y is not None:
+            y_clean = y[mask].copy()
+            return X_clean, y_clean
+        return X_clean
+from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler, MaxAbsScaler
+class CreditScaler(BaseEstimator, TransformerMixin):
+    """
+    Масштабирует только не-булевые колонки.
+    Можно задать различные способы масштабирования
+    """
+    def __init__(self, scaler_type='standard'):
+        """
+        Параметр scaler_type - тип scaler'а.
+        Доступные типы:
+        - 'standard': StandardScaler (среднее=0, дисперсия=1)
+        - 'robust': RobustScaler (устойчив к выбросам)
+        - 'minmax': MinMaxScaler (приводит к [0, 1])
+        - 'maxabs': MaxAbsScaler (приводит к [-1, 1])
+        """
+        self.boolean_columns = [
+            'RevolvingUtilizationOverOne',
+            'DebtPayments_over_10k',
+            'Code96',
+            'Code98',
+            'NumberRealEstateLoansOrLines_over_5',
+            'Consumer_1_loans',
+            'Consumer_2-5_loans',
+            'Consumer_16-30_loans',
+            'RealEstateLoans_1-3_loans',
+            'RealEstateLoans_4+_loans'
+        ]
+        self.scaler_type = scaler_type
+        self._create_scaler()
+        # Эти переменные заполнятся во время fit
+        self.columns_to_scale_ = None
+        self.n_features_in_ = None
+        self.feature_names_in_ = None
+    def _create_scaler(self):
+        """Создает scaler по типу"""
+        if self.scaler_type == 'standard':
+            self.scaler = StandardScaler()
+        elif self.scaler_type == 'robust':
+            self.scaler = RobustScaler()
+        elif self.scaler_type == 'minmax':
+            self.scaler = MinMaxScaler()
+        elif self.scaler_type == 'maxabs':
+            self.scaler = MaxAbsScaler()
+        else:
+            raise ValueError(
+                f"Unknown scaler_type: {self.scaler_type}. "
+                f"Available: standard, robust, minmax, maxabs"
+            )
+    def fit(self, X, y=None):
+        """
+        Определяет колонки для масштабирования (все, кроме булевых)
+        и обучает scaler.
+        """
+        self.feature_names_in_ = X.columns.tolist()
+        self.n_features_in_ = len(self.feature_names_in_)
+        self.columns_to_scale_ = [
+            col for col in self.feature_names_in_
+            if col not in self.boolean_columns
+        ]
+        self.scaler.fit(X[self.columns_to_scale_])
+        return self
+    def transform(self, X, y=None):
+        """
+        Масштабирует только не-булевы колонки.
+        """
+        X_copy = X.copy()
+        X_copy[self.columns_to_scale_] = self.scaler.transform(X_copy[self.columns_to_scale_])
+        return X_copy
+    def fit_transform(self, X, y=None):
+        return self.fit(X, y).transform(X, y)
+    def get_feature_names_out(self, input_features=None):
+        """Для совместимости с sklearn"""
+        if input_features is not None:
+            return input_features
+        return self.feature_names_in_ if self.feature_names_in_ is not None else []
+    def set_params(self, **params):
+        """Для совместимости с GridSearchCV"""
+        if 'scaler_type' in params and params['scaler_type'] != self.scaler_type:
+            self.scaler_type = params['scaler_type']
+            self._create_scaler()
+        return super().set_params(**params)
+def check_business_rules(age, monthly_income, monthly_debt, debt_ratio,
+                         late_90, late_60_89, late_30_59, credit_lines,
+                         real_estate, utilization, dependents):
+    # КРИТИЧЕСКИЕ ПРАВИЛА - сразу отказ
+    if age < 18:
+        return {
+            'needs_manual': False,
+            'message': 'Возраст менее 18 лет - кредит не выдаётся',
+            'decision': 1  # отказ
+        }
+    # СПЕЦИАЛЬНЫЕ БАНКОВСКИЕ КОДЫ - сразу ручной разбор
+    if (late_90 == 98) or (late_60_89 == 98) or (late_30_59 == 98):
+        return {
+            'needs_manual': True,
+            'message': 'Код 98: Списание долга как безнадежного',
+            'decision': None
+        }
+    if (late_90 == 96) or (late_60_89 == 96) or (late_30_59 == 96):
+        return {
+            'needs_manual': True,
+            'message': 'Код 96: Изъятие залога или реализация имущества',
+            'decision': None
+        }
+    # КРИТИЧЕСКИЕ ПРАВИЛА - сразу ручной разбор
+    if age > 80:
+        return {
+            'needs_manual': True,
+            'message': 'Возраст > 80 лет - требуется ручной разбор (индивидуальные условия)',
+            'decision': None
+        }
+    if monthly_income > 1000000:
+        return {
+            'needs_manual': True,
+            'message': 'Доход свыше 1,000,000 $ - требуется ручной разбор',
+            'decision': None
+        }
+    if monthly_debt > 1000000:
+        return {
+            'needs_manual': True,
+            'message': 'Платежи свыше 1,000,000 $ - требуется ручной разбор',
+            'decision': None
+        }
+    if utilization > 2:
+        return {
+            'needs_manual': True,
+            'message': 'Использование кредитных средств превышает 200%',
+            'decision': None
+        }
+    if real_estate > 20:
+        return {
+            'needs_manual': True,
+            'message': 'Количество кредитов под залог недвижимости слишком велико - ручной разбор',
+            'decision': None
+        }
+    # 4. ВСЕ ПРОВЕРКИ ПРОЙДЕНЫ - допуск к авторазбору моделью
+    return {
+        'needs_manual': False,
+        'decision': None,
+    }

app/utils/data_loader.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import streamlit as st
+import joblib
+import os
+@st.cache_resource
+def load_artifacts(models_path, preprocessor_path):
+    """Загрузка препроцессоров и моделей"""
+    preprocessor = joblib.load(os.path.join(preprocessor_path, 'preprocessor_150.pkl'))
+    scaler = joblib.load(os.path.join(preprocessor_path, 'scaler_150.pkl'))
+    models = {}
+    model_files = {
+        'Logistic Regression': 'logreg_150_model.pkl',
+        'XGBoost': 'xgb_150_model.pkl',
+        'LightGBM': 'lgbm_150_model.pkl',
+        'CatBoost': 'catboost_150_model.pkl',
+        'Random Forest': 'rfc_150_model.pkl'
+    }
+    for name, filename in model_files.items():
+        path = os.path.join(models_path, filename)
+        if os.path.exists(path):
+            models[name] = joblib.load(path)
+    return preprocessor, scaler, models

catboost_info/catboost_training.json ADDED Viewed

	@@ -0,0 +1,104 @@

+{
+"meta":{"test_sets":[],"test_metrics":[],"learn_metrics":[{"best_value":"Min","name":"Logloss"}],"launch_mode":"Train","parameters":"","iteration_count":100,"learn_sets":["learn"],"name":"experiment"},
+"iterations":[
+{"learn":[0.6582255385],"iteration":0,"passed_time":0.1064302509,"remaining_time":10.53659484},
+{"learn":[0.628929721],"iteration":1,"passed_time":0.1525804859,"remaining_time":7.476443808},
+{"learn":[0.6055823656],"iteration":2,"passed_time":0.1890582018,"remaining_time":6.112881857},
+{"learn":[0.585745295],"iteration":3,"passed_time":0.2252067115,"remaining_time":5.404961076},
+{"learn":[0.5691497866],"iteration":4,"passed_time":0.2406276923,"remaining_time":4.571926155},
+{"learn":[0.5553995801],"iteration":5,"passed_time":0.2564603788,"remaining_time":4.017879268},
+{"learn":[0.5431466822],"iteration":6,"passed_time":0.2927427626,"remaining_time":3.889296704},
+{"learn":[0.5321745596],"iteration":7,"passed_time":0.326945827,"remaining_time":3.75987701},
+{"learn":[0.5230197248],"iteration":8,"passed_time":0.3622708005,"remaining_time":3.662960316},
+{"learn":[0.5150673326],"iteration":9,"passed_time":0.3964141569,"remaining_time":3.567727412},
+{"learn":[0.5085723427],"iteration":10,"passed_time":0.4321486694,"remaining_time":3.496475598},
+{"learn":[0.5029521178],"iteration":11,"passed_time":0.4666833149,"remaining_time":3.422344309},
+{"learn":[0.4982952699],"iteration":12,"passed_time":0.4887040018,"remaining_time":3.270557551},
+{"learn":[0.4940193081],"iteration":13,"passed_time":0.5244568892,"remaining_time":3.221663748},
+{"learn":[0.4903079864],"iteration":14,"passed_time":0.5588645355,"remaining_time":3.166899035},
+{"learn":[0.4877126951],"iteration":15,"passed_time":0.5737366867,"remaining_time":3.012117605},
+{"learn":[0.4849442288],"iteration":16,"passed_time":0.594951629,"remaining_time":2.904763836},
+{"learn":[0.4823937275],"iteration":17,"passed_time":0.6301627283,"remaining_time":2.870741318},
+{"learn":[0.4803765605],"iteration":18,"passed_time":0.6663060298,"remaining_time":2.840567811},
+{"learn":[0.4781523185],"iteration":19,"passed_time":0.687587555,"remaining_time":2.75035022},
+{"learn":[0.4767059201],"iteration":20,"passed_time":0.7219467433,"remaining_time":2.715894891},
+{"learn":[0.475163465],"iteration":21,"passed_time":0.7562504736,"remaining_time":2.681251679},
+{"learn":[0.4741219727],"iteration":22,"passed_time":0.777308792,"remaining_time":2.602294651},
+{"learn":[0.473008657],"iteration":23,"passed_time":0.8123926421,"remaining_time":2.5725767},
+{"learn":[0.4722618181],"iteration":24,"passed_time":0.8479381142,"remaining_time":2.543814343},
+{"learn":[0.4714753648],"iteration":25,"passed_time":0.8643292969,"remaining_time":2.460014153},
+{"learn":[0.470390758],"iteration":26,"passed_time":0.904391114,"remaining_time":2.445205605},
+{"learn":[0.4696201438],"iteration":27,"passed_time":0.9384147212,"remaining_time":2.413066426},
+{"learn":[0.46859249],"iteration":28,"passed_time":0.9729809498,"remaining_time":2.382125774},
+{"learn":[0.4677104182],"iteration":29,"passed_time":1.007459887,"remaining_time":2.350739737},
+{"learn":[0.4673133885],"iteration":30,"passed_time":1.019293767,"remaining_time":2.268750643},
+{"learn":[0.4667841252],"iteration":31,"passed_time":1.031243396,"remaining_time":2.191392216},
+{"learn":[0.4664536674],"iteration":32,"passed_time":1.066954367,"remaining_time":2.166240684},
+{"learn":[0.4662622609],"iteration":33,"passed_time":1.076521387,"remaining_time":2.089717986},
+{"learn":[0.466086662],"iteration":34,"passed_time":1.088162018,"remaining_time":2.020872318},
+{"learn":[0.4657380808],"iteration":35,"passed_time":1.122871037,"remaining_time":1.996215177},
+{"learn":[0.4651284039],"iteration":36,"passed_time":1.157755597,"remaining_time":1.971313584},
+{"learn":[0.4646531445],"iteration":37,"passed_time":1.195417263,"remaining_time":1.95041764},
+{"learn":[0.4641257326],"iteration":38,"passed_time":1.230054783,"remaining_time":1.92393184},
+{"learn":[0.4637898175],"iteration":39,"passed_time":1.246455715,"remaining_time":1.869683573},
+{"learn":[0.4633285186],"iteration":40,"passed_time":1.280777487,"remaining_time":1.843070043},
+{"learn":[0.4628428368],"iteration":41,"passed_time":1.315556006,"remaining_time":1.816720199},
+{"learn":[0.4626124403],"iteration":42,"passed_time":1.349455031,"remaining_time":1.788812483},
+{"learn":[0.4623858706],"iteration":43,"passed_time":1.362016739,"remaining_time":1.73347585},
+{"learn":[0.4621286714],"iteration":44,"passed_time":1.398965243,"remaining_time":1.709846409},
+{"learn":[0.4617974001],"iteration":45,"passed_time":1.433543555,"remaining_time":1.682855478},
+{"learn":[0.4614775166],"iteration":46,"passed_time":1.466964625,"remaining_time":1.654236705},
+{"learn":[0.46142579],"iteration":47,"passed_time":1.479586041,"remaining_time":1.602884878},
+{"learn":[0.4614011205],"iteration":48,"passed_time":1.486797243,"remaining_time":1.547482845},
+{"learn":[0.4611845342],"iteration":49,"passed_time":1.521749302,"remaining_time":1.521749302},
+{"learn":[0.4609852804],"iteration":50,"passed_time":1.557818146,"remaining_time":1.496727238},
+{"learn":[0.4604321277],"iteration":51,"passed_time":1.596752012,"remaining_time":1.473924934},
+{"learn":[0.4601645791],"iteration":52,"passed_time":1.646375224,"remaining_time":1.459993123},
+{"learn":[0.459804458],"iteration":53,"passed_time":1.683746309,"remaining_time":1.434302411},
+{"learn":[0.4592589475],"iteration":54,"passed_time":1.71827358,"remaining_time":1.405860201},
+{"learn":[0.4589643366],"iteration":55,"passed_time":1.752749892,"remaining_time":1.377160629},
+{"learn":[0.4585201818],"iteration":56,"passed_time":1.787086539,"remaining_time":1.348153003},
+{"learn":[0.4582657803],"iteration":57,"passed_time":1.822507553,"remaining_time":1.319746849},
+{"learn":[0.4580557799],"iteration":58,"passed_time":1.844325617,"remaining_time":1.281650005},
+{"learn":[0.457864554],"iteration":59,"passed_time":1.877865186,"remaining_time":1.251910124},
+{"learn":[0.4576167412],"iteration":60,"passed_time":1.915169063,"remaining_time":1.224452351},
+{"learn":[0.4575874936],"iteration":61,"passed_time":1.924591958,"remaining_time":1.17958862},
+{"learn":[0.457362279],"iteration":62,"passed_time":1.960633385,"remaining_time":1.151483099},
+{"learn":[0.4572946663],"iteration":63,"passed_time":1.972484848,"remaining_time":1.109522727},
+{"learn":[0.4569830294],"iteration":64,"passed_time":2.007666031,"remaining_time":1.08105094},
+{"learn":[0.456610445],"iteration":65,"passed_time":2.042187927,"remaining_time":1.052036205},
+{"learn":[0.4560918865],"iteration":66,"passed_time":2.076725989,"remaining_time":1.022865039},
+{"learn":[0.4558479503],"iteration":67,"passed_time":2.110631805,"remaining_time":0.9932384965},
+{"learn":[0.455740418],"iteration":68,"passed_time":2.124730753,"remaining_time":0.9545891789},
+{"learn":[0.455501269],"iteration":69,"passed_time":2.159214566,"remaining_time":0.9253776709},
+{"learn":[0.4554787935],"iteration":70,"passed_time":2.168732127,"remaining_time":0.8858201647},
+{"learn":[0.4552744806],"iteration":71,"passed_time":2.205001553,"remaining_time":0.8575006039},
+{"learn":[0.455234248],"iteration":72,"passed_time":2.220812656,"remaining_time":0.8213964619},
+{"learn":[0.455137986],"iteration":73,"passed_time":2.255120136,"remaining_time":0.7923395074},
+{"learn":[0.4549484305],"iteration":74,"passed_time":2.28992153,"remaining_time":0.7633071767},
+{"learn":[0.4548062199],"iteration":75,"passed_time":2.324904798,"remaining_time":0.7341804624},
+{"learn":[0.4546474797],"iteration":76,"passed_time":2.360039856,"remaining_time":0.7049469699},
+{"learn":[0.4545581835],"iteration":77,"passed_time":2.372090859,"remaining_time":0.6690512679},
+{"learn":[0.4544265313],"iteration":78,"passed_time":2.40573901,"remaining_time":0.6395002433},
+{"learn":[0.4544030978],"iteration":79,"passed_time":2.413539667,"remaining_time":0.6033849167},
+{"learn":[0.4543650724],"iteration":80,"passed_time":2.422935896,"remaining_time":0.568342988},
+{"learn":[0.4542698101],"iteration":81,"passed_time":2.455982135,"remaining_time":0.5391180296},
+{"learn":[0.4540294101],"iteration":82,"passed_time":2.490015159,"remaining_time":0.5100031048},
+{"learn":[0.4539463005],"iteration":83,"passed_time":2.501245001,"remaining_time":0.4764276192},
+{"learn":[0.4537784829],"iteration":84,"passed_time":2.53640835,"remaining_time":0.4476014736},
+{"learn":[0.4536943889],"iteration":85,"passed_time":2.572798317,"remaining_time":0.4188276329},
+{"learn":[0.4536386999],"iteration":86,"passed_time":2.608432288,"remaining_time":0.3897657442},
+{"learn":[0.4533342039],"iteration":87,"passed_time":2.644785463,"remaining_time":0.3606525632},
+{"learn":[0.4531946585],"iteration":88,"passed_time":2.679509774,"remaining_time":0.3311753653},
+{"learn":[0.4529846134],"iteration":89,"passed_time":2.713719672,"remaining_time":0.301524408},
+{"learn":[0.4529583581],"iteration":90,"passed_time":2.728664447,"remaining_time":0.2698679124},
+{"learn":[0.4528171854],"iteration":91,"passed_time":2.765771076,"remaining_time":0.2405018327},
+{"learn":[0.4526575987],"iteration":92,"passed_time":2.800900134,"remaining_time":0.2108204402},
+{"learn":[0.4526170824],"iteration":93,"passed_time":2.822734239,"remaining_time":0.1801745259},
+{"learn":[0.4525149982],"iteration":94,"passed_time":2.857135885,"remaining_time":0.1503755729},
+{"learn":[0.4524663385],"iteration":95,"passed_time":2.892015112,"remaining_time":0.1205006297},
+{"learn":[0.4524315166],"iteration":96,"passed_time":2.908353878,"remaining_time":0.08994908901},
+{"learn":[0.4523241677],"iteration":97,"passed_time":2.943535644,"remaining_time":0.060072156},
+{"learn":[0.4523010903],"iteration":98,"passed_time":2.964563379,"remaining_time":0.02994508464},
+{"learn":[0.4522791181],"iteration":99,"passed_time":2.980352983,"remaining_time":0}
+]}

catboost_info/learn/events.out.tfevents ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f1fe5092567732e395a3420a1205c4190f0e63d50edc678509bd4104fc34a503
+size 5398

catboost_info/learn_error.tsv ADDED Viewed

	@@ -0,0 +1,101 @@

+iter	Logloss
+0	0.6582255385
+1	0.628929721
+2	0.6055823656
+3	0.585745295
+4	0.5691497866
+5	0.5553995801
+6	0.5431466822
+7	0.5321745596
+8	0.5230197248
+9	0.5150673326
+10	0.5085723427
+11	0.5029521178
+12	0.4982952699
+13	0.4940193081
+14	0.4903079864
+15	0.4877126951
+16	0.4849442288
+17	0.4823937275
+18	0.4803765605
+19	0.4781523185
+20	0.4767059201
+21	0.475163465
+22	0.4741219727
+23	0.473008657
+24	0.4722618181
+25	0.4714753648
+26	0.470390758
+27	0.4696201438
+28	0.46859249
+29	0.4677104182
+30	0.4673133885
+31	0.4667841252
+32	0.4664536674
+33	0.4662622609
+34	0.466086662
+35	0.4657380808
+36	0.4651284039
+37	0.4646531445
+38	0.4641257326
+39	0.4637898175
+40	0.4633285186
+41	0.4628428368
+42	0.4626124403
+43	0.4623858706
+44	0.4621286714
+45	0.4617974001
+46	0.4614775166
+47	0.46142579
+48	0.4614011205
+49	0.4611845342
+50	0.4609852804
+51	0.4604321277
+52	0.4601645791
+53	0.459804458
+54	0.4592589475
+55	0.4589643366
+56	0.4585201818
+57	0.4582657803
+58	0.4580557799
+59	0.457864554
+60	0.4576167412
+61	0.4575874936
+62	0.457362279
+63	0.4572946663
+64	0.4569830294
+65	0.456610445
+66	0.4560918865
+67	0.4558479503
+68	0.455740418
+69	0.455501269
+70	0.4554787935
+71	0.4552744806
+72	0.455234248
+73	0.455137986
+74	0.4549484305
+75	0.4548062199
+76	0.4546474797
+77	0.4545581835
+78	0.4544265313
+79	0.4544030978
+80	0.4543650724
+81	0.4542698101
+82	0.4540294101
+83	0.4539463005
+84	0.4537784829
+85	0.4536943889
+86	0.4536386999
+87	0.4533342039
+88	0.4531946585
+89	0.4529846134
+90	0.4529583581
+91	0.4528171854
+92	0.4526575987
+93	0.4526170824
+94	0.4525149982
+95	0.4524663385
+96	0.4524315166
+97	0.4523241677
+98	0.4523010903
+99	0.4522791181

catboost_info/time_left.tsv ADDED Viewed

	@@ -0,0 +1,101 @@

+iter	Passed	Remaining
+0	106	10536
+1	152	7476
+2	189	6112
+3	225	5404
+4	240	4571
+5	256	4017
+6	292	3889
+7	326	3759
+8	362	3662
+9	396	3567
+10	432	3496
+11	466	3422
+12	488	3270
+13	524	3221
+14	558	3166
+15	573	3012
+16	594	2904
+17	630	2870
+18	666	2840
+19	687	2750
+20	721	2715
+21	756	2681
+22	777	2602
+23	812	2572
+24	847	2543
+25	864	2460
+26	904	2445
+27	938	2413
+28	972	2382
+29	1007	2350
+30	1019	2268
+31	1031	2191
+32	1066	2166
+33	1076	2089
+34	1088	2020
+35	1122	1996
+36	1157	1971
+37	1195	1950
+38	1230	1923
+39	1246	1869
+40	1280	1843
+41	1315	1816
+42	1349	1788
+43	1362	1733
+44	1398	1709
+45	1433	1682
+46	1466	1654
+47	1479	1602
+48	1486	1547
+49	1521	1521
+50	1557	1496
+51	1596	1473
+52	1646	1459
+53	1683	1434
+54	1718	1405
+55	1752	1377
+56	1787	1348
+57	1822	1319
+58	1844	1281
+59	1877	1251
+60	1915	1224
+61	1924	1179
+62	1960	1151
+63	1972	1109
+64	2007	1081
+65	2042	1052
+66	2076	1022
+67	2110	993
+68	2124	954
+69	2159	925
+70	2168	885
+71	2205	857
+72	2220	821
+73	2255	792
+74	2289	763
+75	2324	734
+76	2360	704
+77	2372	669
+78	2405	639
+79	2413	603
+80	2422	568
+81	2455	539
+82	2490	510
+83	2501	476
+84	2536	447
+85	2572	418
+86	2608	389
+87	2644	360
+88	2679	331
+89	2713	301
+90	2728	269
+91	2765	240
+92	2800	210
+93	2822	180
+94	2857	150
+95	2892	120
+96	2908	89
+97	2943	60
+98	2964	29
+99	2980	0

catboost_info/tmp/cat_feature_index.17f2f383-11eeddeb-4a42fe68-9d5bbcbb.tmp ADDED Viewed

Binary file (4 Bytes). View file

datasets/.DS_Store ADDED Viewed

Binary file (8.2 kB). View file