diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..88bdef86a8c42930379763c2c05afc1d7267557f
Binary files /dev/null and b/.DS_Store differ
diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..e79f24b4ad453e7c4cb335dd8bc1e06a1f4a4e68 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.csv filter=lfs diff=lfs merge=lfs -text
diff --git a/app/.DS_Store b/app/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..4a65af87771625b57ec3f9f8fd7b627b996ba558
Binary files /dev/null and b/app/.DS_Store differ
diff --git a/app/.streamlit/config.toml b/app/.streamlit/config.toml
new file mode 100644
index 0000000000000000000000000000000000000000..74812cd03e02b4f059f0fdb02781b9d0a16cb546
--- /dev/null
+++ b/app/.streamlit/config.toml
@@ -0,0 +1,2 @@
+[client]
+showSidebarNavigation = false
\ No newline at end of file
diff --git a/app/__init__.py b/app/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/app/__pycache__/__init__.cpython-311.pyc b/app/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7028699e2e7028011cd037f3ab691bffb3de06d4
Binary files /dev/null and b/app/__pycache__/__init__.cpython-311.pyc differ
diff --git a/app/main.py b/app/main.py
new file mode 100644
index 0000000000000000000000000000000000000000..a33732bb7043f95c64c7bcd5802d022b4c3cb7d2
--- /dev/null
+++ b/app/main.py
@@ -0,0 +1,33 @@
+import streamlit as st
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(__file__)))
+
+st.set_page_config(
+ page_title="GiveMeSomeCredit",
+ page_icon="🏦",
+ layout="wide",
+ initial_sidebar_state="collapsed" # ← сворачивает сайдбар по умолчанию
+)
+
+
+st.title("🏦 GiveMeSomeCredit - Кредитный скоринг")
+st.markdown("---")
+
+col1, col2 = st.columns(2)
+
+with col1:
+ st.subheader("📝 Анкета")
+ if st.button("Перейти к анкете"):
+ st.switch_page("pages/application.py") # ← вызовет main()
+
+with col2:
+ st.subheader("📊 Симуляция")
+ if st.button("Перейти к симуляции"):
+ st.switch_page("pages/simulation.py") # ← вызовет main()
+
+st.markdown("---")
+
+# streamlit run app/main.py
+
diff --git a/app/models/__pycache__/escalation.cpython-311.pyc b/app/models/__pycache__/escalation.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..55007ca35959fbf9ed3e85f6fbd4e953abcda134
Binary files /dev/null and b/app/models/__pycache__/escalation.cpython-311.pyc differ
diff --git a/app/models/__pycache__/interpretation.cpython-311.pyc b/app/models/__pycache__/interpretation.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0625ac46ec9b574f94902d48903e82f8a904b997
Binary files /dev/null and b/app/models/__pycache__/interpretation.cpython-311.pyc differ
diff --git a/app/models/escalation.py b/app/models/escalation.py
new file mode 100644
index 0000000000000000000000000000000000000000..aeab70b6b3f56fad41a1297608eed5e54eceab2e
--- /dev/null
+++ b/app/models/escalation.py
@@ -0,0 +1,267 @@
+import numpy as np
+
+
+def check_business_rules(df):
+ """
+ Батчевая проверка бизнес-правил
+
+ Возвращает:
+ - manual_mask: булев массив (True = в ручной разбор)
+ - auto_reject_mask: булев массив (True = сразу отказ)
+ - messages: массив сообщений
+ - auto_decisions: массив решений для auto_reject_mask (всегда 1 - отказ)
+ """
+ n = len(df)
+ manual_mask = np.zeros(n, dtype=bool)
+ auto_reject_mask = np.zeros(n, dtype=bool)
+ messages = [''] * n
+ auto_decisions = np.zeros(n, dtype=int)
+
+ # Извлекаем колонки
+ age = df['age'].fillna(0).values
+ monthly_income = df['MonthlyIncome'].fillna(0).values
+ debt_ratio = df['DebtRatio'].fillna(0).values
+ monthly_debt = np.where(monthly_income > 0,
+ debt_ratio * monthly_income,
+ debt_ratio)
+
+ late_90 = df['NumberOfTimes90DaysLate'].fillna(0).values
+ late_60_89 = df['NumberOfTime60-89DaysPastDueNotWorse'].fillna(0).values
+ late_30_59 = df['NumberOfTime30-59DaysPastDueNotWorse'].fillna(0).values
+
+ real_estate = df['NumberRealEstateLoansOrLines'].fillna(0).values
+ utilization = df['RevolvingUtilizationOfUnsecuredLines'].fillna(0).values
+
+ # 1. КРИТИЧЕСКИЕ ПРАВИЛА - сразу отказ
+ mask = (age < 18)
+ auto_reject_mask[mask] = True
+ auto_decisions[mask] = 1
+ messages = np.where(mask, 'Возраст менее 18 лет - кредит не выдаётся', messages)
+
+ # 2. СПЕЦИАЛЬНЫЕ БАНКОВСКИЕ КОДЫ - сразу ручной разбор
+ mask = (late_90 == 98) | (late_60_89 == 98) | (late_30_59 == 98)
+ manual_mask[mask] = True
+ messages = np.where(mask, 'Код 98: Списание долга как безнадежного', messages)
+
+ mask = (late_90 == 96) | (late_60_89 == 96) | (late_30_59 == 96)
+ manual_mask[mask] = True
+ messages = np.where(mask, 'Код 96: Изъятие залога или реализация имущества', messages)
+
+ # 3. КРИТИЧЕСКИЕ ПРАВИЛА - сразу ручной разбор
+ mask = (age > 80)
+ manual_mask[mask] = True
+ messages = np.where(mask, 'Возраст > 80 лет - требуется ручной разбор (индивидуальные условия)', messages)
+
+ mask = (monthly_income > 1000000)
+ manual_mask[mask] = True
+ messages = np.where(mask, 'Доход свыше 1,000,000 $ - требуется ручной разбор', messages)
+
+ mask = (monthly_debt > 1000000)
+ manual_mask[mask] = True
+ messages = np.where(mask, 'Платежи свыше 1,000,000 $ - требуется ручной разбор', messages)
+
+ mask = (utilization > 2)
+ manual_mask[mask] = True
+ messages = np.where(mask, 'Использование кредитных средств превышает 200%', messages)
+
+ mask = (real_estate > 20)
+ manual_mask[mask] = True
+ messages = np.where(mask, 'Количество кредитов под залог недвижимости слишком велико - ручной разбор', messages)
+
+ # print(f"age: min={age.min()}, max={age.max()}")
+ # print(f"income: max={monthly_income.max()}")
+ # print(f"late_90: values 96/98: {np.sum((late_90 == 96) | (late_90 == 98))}")
+ # print(f"utilization: max={utilization.max()}")
+ # print(f"real_estate: max={real_estate.max()}")
+
+ return manual_mask, auto_reject_mask, messages, auto_decisions
+
+
+def escalation_decision(applications_df, lr_model, second_model, second_model_name,
+ threshold=0.5, lr_margins=[0.35], second_margins=[0.4],
+ preprocessor=None, scaler=None):
+ """
+ Универсальная эскалационная логика
+
+ 1. Бизнес-правила:
+ - часть заявок сразу в ручной разбор
+ - часть заявок сразу отказ
+ 2. Оставшиеся -> LR
+ 3. Если LR неуверена -> вторая модель
+ """
+ n = len(applications_df)
+ decisions = [None] * n
+ manual_mask = np.zeros(n, dtype=bool)
+
+ # СЧЁТЧИКИ
+ stats = {
+ 'business_manual': 0, # ручной разбор по бизнес-правилам
+ 'business_auto': 0, # авто отказ по бизнес-правилам
+ 'lr_confident': 0, # уверенно решены LR
+ 'second_confident': 0, # уверенно решены второй моделью
+ 'second_uncertain': 0, # неуверенность второй модели → ручной
+ 'total': n
+ }
+
+ # 1. Бизнес-правила
+ bus_manual_mask, bus_reject_mask, bus_messages, bus_decisions = check_business_rules(applications_df)
+ # После check_business_rules
+ #print(f"Бизнес-правила: manual={bus_manual_mask.sum()}, auto_reject={bus_reject_mask.sum()}")
+ # Обрабатываем сразу отказ
+ for i in range(n):
+ if bus_reject_mask[i]:
+ stats['business_auto'] += 1
+ decisions[i] = {
+ 'final_decision': 1,
+ 'model_used': 'Business Rules',
+ 'needs_review': False,
+ 'probability': 1.0,
+ 'message': bus_messages[i],
+ 'lr_proba': None,
+ 'second_proba': None,
+ 'decision_path': [f"❌ Бизнес-правила: {bus_messages[i]}"]
+ }
+
+ # Обрабатываем сразу ручной разбор
+ for i in range(n):
+ if bus_manual_mask[i]:
+ stats['business_manual'] += 1
+ manual_mask[i] = True
+ decisions[i] = {
+ 'final_decision': None,
+ 'model_used': 'Business Rules',
+ 'needs_review': True,
+ 'probability': None,
+ 'message': bus_messages[i],
+ 'lr_proba': None,
+ 'second_proba': None,
+ 'decision_path': [f"⚠️ Бизнес-правила: {bus_messages[i]}"]
+ }
+
+ # 2. Заявки, которые идут к моделям (не отсеялись бизнес-правилами)
+ model_indices = [i for i in range(n) if decisions[i] is None]
+
+ if not model_indices:
+ return decisions, manual_mask, stats
+
+ # 3. Обработка моделями
+ df_models = applications_df.iloc[model_indices]
+
+ # Препроцессинг
+ processed = preprocessor.transform(df_models)
+ processed_scaled = scaler.transform(processed)
+
+ # LR предсказания (батч)
+ lr_probas = lr_model.predict_proba(processed_scaled)[:, 1]
+
+ # Определяем отступы для LR
+ if len(lr_margins) == 1:
+ lr_low = lr_high = lr_margins[0]
+ else:
+ lr_low, lr_high = lr_margins[0], lr_margins[1]
+
+ # Проверяем уверенность LR
+ lr_confident = np.zeros(len(model_indices), dtype=bool)
+ lr_margin_values = np.zeros(len(model_indices))
+
+ for j, proba in enumerate(lr_probas):
+ if proba < threshold:
+ margin = threshold - proba
+ lr_confident[j] = margin >= lr_low
+ else:
+ margin = proba - threshold
+ lr_confident[j] = margin >= lr_high
+ lr_margin_values[j] = margin
+
+ # Обрабатываем уверенные LR
+ for j, idx in enumerate(model_indices):
+ if lr_confident[j]:
+ stats['lr_confident'] += 1
+ decisions[idx] = {
+ 'final_decision': int(lr_probas[j] >= threshold),
+ 'probability': lr_probas[j],
+ 'model_used': 'Logistic Regression',
+ 'needs_review': False,
+ 'lr_proba': lr_probas[j],
+ 'second_proba': None,
+ 'lr_margin': lr_margin_values[j],
+ 'lr_confident': True,
+ 'second_used': False,
+ 'decision_path': [
+ f"1️⃣ Logistic Regression: {lr_probas[j]:.1%} (отступ: {lr_margin_values[j]:.1%})",
+ f" ✅ LR уверена - финальное решение"
+ ]
+ }
+
+ # Неуверенные LR - идут ко второй модели
+ uncertain_indices = [model_indices[j] for j in range(len(model_indices)) if not lr_confident[j]]
+
+ if uncertain_indices:
+ # Находим позиции неуверенных заявок
+ uncertain_positions = [j for j in range(len(model_indices)) if not lr_confident[j]]
+ processed_uncertain_scaled = processed_scaled.iloc[uncertain_positions]
+
+ # Вторая модель (батч)
+ second_probas = second_model.predict_proba(processed_uncertain_scaled)[:, 1]
+
+ # Определяем отступы для второй модели
+ if len(second_margins) == 1:
+ second_low = second_high = second_margins[0]
+ else:
+ second_low, second_high = second_margins[0], second_margins[1]
+
+ # Проверяем уверенность второй модели
+ for k, idx in enumerate(uncertain_indices):
+ proba = second_probas[k]
+ if proba < threshold:
+ second_margin = threshold - proba
+ second_confident = second_margin >= second_low
+ else:
+ second_margin = proba - threshold
+ second_confident = second_margin >= second_high
+
+ # Формируем decision_path
+ path = [
+ f"1️⃣ Logistic Regression: {lr_probas[uncertain_positions[k]]:.1%} (отступ: {lr_margin_values[uncertain_positions[k]]:.1%})",
+ f" ⚠️ LR не уверена → вызываем {second_model_name}",
+ f"2️⃣ {second_model_name}: {proba:.1%} (отступ: {second_margin:.1%})"
+ ]
+
+ if second_confident:
+ stats['second_confident'] += 1
+ path.append(f" ✅ {second_model_name} уверен - финальное решение")
+ decisions[idx] = {
+ 'final_decision': int(proba >= threshold),
+ 'probability': proba,
+ 'model_used': second_model_name,
+ 'needs_review': False,
+ 'lr_proba': lr_probas[uncertain_positions[k]],
+ 'second_proba': proba,
+ 'lr_margin': lr_margin_values[uncertain_positions[k]],
+ 'second_margin': second_margin,
+ 'lr_confident': False,
+ 'second_confident': True,
+ 'second_used': True,
+ 'decision_path': path
+ }
+ else:
+ stats['second_uncertain'] += 1
+ path.append(f" ⚠️ {second_model_name} не уверен → ручной разбор")
+ manual_mask[idx] = True
+ decisions[idx] = {
+ 'final_decision': None,
+ 'probability': proba,
+ 'model_used': 'Manual Review',
+ 'needs_review': True,
+ 'lr_proba': lr_probas[uncertain_positions[k]],
+ 'second_proba': proba,
+ 'lr_margin': lr_margin_values[uncertain_positions[k]],
+ 'second_margin': second_margin,
+ 'lr_confident': False,
+ 'second_confident': False,
+ 'second_used': True,
+ 'message': 'Модели не уверены в решении',
+ 'decision_path': path
+ }
+
+ return decisions, manual_mask, stats
\ No newline at end of file
diff --git a/app/models/interpretation.py b/app/models/interpretation.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d5e086144cf3235724ea44e5206e6a46ce1b35f
--- /dev/null
+++ b/app/models/interpretation.py
@@ -0,0 +1,194 @@
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+FEATURE_DESCRIPTIONS = { ... }
+
+def get_feature_display_name(feature_name):
+ if feature_name in FEATURE_DESCRIPTIONS:
+ return FEATURE_DESCRIPTIONS[feature_name]
+ name = feature_name.replace('_', ' ').title()
+ name = name.replace('Over', '>')
+ name = name.replace('Loans', 'Кредитов')
+ return name
+
+
+def interpret_lr(features, lr_model, feature_names):
+ """Интерпретация логистической регрессии"""
+ if isinstance(features, np.ndarray):
+ features = pd.DataFrame(features, columns=feature_names)
+ coefficients = lr_model.coef_[0]
+ intercept = lr_model.intercept_[0]
+
+ importance_df = pd.DataFrame({
+ 'feature': feature_names,
+ 'coefficient': coefficients,
+ 'value': features.iloc[0].values
+ })
+ importance_df['logit_contribution'] = importance_df['coefficient'] * importance_df['value']
+ importance_df['abs_logit'] = abs(importance_df['logit_contribution'])
+ importance_df = importance_df.sort_values('abs_logit', ascending=False)
+
+ base_proba = lr_model.predict_proba(features)[0, 1]
+ marginal_effects = []
+ features_array = features.values
+
+ for i, feature in enumerate(feature_names):
+ features_zero = features_array.copy()
+ features_zero[0, i] = 0
+ zero_proba = lr_model.predict_proba(features_zero)[0, 1]
+ marginal_effect = base_proba - zero_proba
+ marginal_effects.append({
+ 'feature': feature,
+ 'marginal_effect': marginal_effect,
+ 'abs_marginal': abs(marginal_effect)
+ })
+
+ marginal_df = pd.DataFrame(marginal_effects).sort_values('abs_marginal', ascending=False)
+
+ logit = intercept + importance_df['logit_contribution'].sum()
+ proba = 1 / (1 + np.exp(-logit))
+
+ return {
+ 'logit_contributions': importance_df,
+ 'marginal_effects': marginal_df,
+ 'probability': proba,
+ 'logit': logit,
+ 'intercept': intercept
+ }
+
+def plot_feature_importance_sns(importance_df, value_col='logit_contribution', title="Вклад признаков в логит"):
+ df = importance_df.head(10).copy()
+ df = df.sort_values(value_col, ascending=True)
+
+ fig, ax = plt.subplots(figsize=(10, 6), facecolor='#f8f9fa')
+ ax.set_facecolor('#f8f9fa')
+
+ colors = ['#d7191c' if x > 0 else '#1a9641' if x < 0 else '#ffffbf' for x in df[value_col]]
+ bars = ax.barh(df['feature'], df[value_col], color=colors, edgecolor='white', linewidth=1.5, alpha=0.9)
+
+ for bar, val in zip(bars, df[value_col]):
+ if abs(val) > 0.02:
+ x_pos = val - 0.02 if val > 0 else val + 0.02
+ ha = 'right' if val > 0 else 'left'
+ ax.text(x_pos, bar.get_y() + bar.get_height() / 2, f'{val:.3f}', ha=ha, va='center', fontsize=9)
+
+ ax.axvline(x=0, color='#495057', linestyle='-', linewidth=1, alpha=0.3)
+ ax.grid(axis='x', alpha=0.15, linestyle='--', color='#adb5bd')
+ ax.set_axisbelow(True)
+ ax.set_xlabel('Вклад в логит', fontsize=11)
+ ax.set_ylabel('')
+ ax.set_title(title, fontsize=12, fontweight='bold', pad=15)
+ ax.set_yticklabels([get_feature_display_name(x) for x in df['feature']], fontsize=10)
+ ax.set_yticklabels([get_feature_display_name(x) for x in df['feature']], fontsize=10)
+ sns.despine(top=True, right=True, left=False, bottom=False)
+ plt.tight_layout()
+ return fig
+
+def plot_marginal_effects_sns(marginal_df, title="Влияние на вероятность дефолта"):
+ df = marginal_df.head(10).copy()
+ df = df.sort_values('marginal_effect', ascending=True)
+
+ fig, ax = plt.subplots(figsize=(10, 6), facecolor='#f8f9fa')
+ ax.set_facecolor('#f8f9fa')
+
+ colors = ['#d7191c' if x > 0 else '#1a9641' if x < 0 else '#ffffbf' for x in df['marginal_effect']]
+ bars = ax.barh(df['feature'], df['marginal_effect'], color=colors, edgecolor='white', linewidth=1.5, alpha=0.9)
+
+ for bar, val in zip(bars, df['marginal_effect']):
+ if abs(val) > 0.01:
+ x_pos = val - 0.01 if val > 0 else val + 0.01
+ ha = 'right' if val > 0 else 'left'
+ ax.text(x_pos, bar.get_y() + bar.get_height() / 2, f'{val:.1%}', ha=ha, va='center', fontsize=9)
+
+ ax.axvline(x=0, color='#495057', linestyle='-', linewidth=1, alpha=0.3)
+ ax.grid(axis='x', alpha=0.15, linestyle='--', color='#adb5bd')
+ ax.set_axisbelow(True)
+ ax.set_xlabel('Изменение вероятности', fontsize=11)
+ ax.set_ylabel('')
+ ax.set_title(title, fontsize=12, fontweight='bold', pad=15)
+ ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'{x:.0%}'))
+ ax.set_yticklabels([get_feature_display_name(x) for x in df['feature']], fontsize=10)
+ sns.despine(top=True, right=True, left=False, bottom=False)
+ plt.tight_layout()
+ return fig
+
+
+def plot_shap_analysis(second_model, processed_scaled, feature_names, second_model_name):
+ """Отображение SHAP анализа для tree-based моделей"""
+ import streamlit as st
+ st.markdown("---")
+ st.subheader(f"⚡ Детальный анализ: {second_model_name} (SHAP)")
+
+ with st.spinner("🔄 Рассчитываем SHAP значения..."):
+ try:
+ import shap
+
+ # Создаем explainer и считаем SHAP
+ explainer = shap.TreeExplainer(second_model)
+ shap_values = explainer.shap_values(processed_scaled)
+
+ # Для бинарной классификации
+ if isinstance(shap_values, list):
+ shap_values = shap_values[1]
+
+ # 1. Waterfall plot
+ fig, ax = plt.subplots(figsize=(12, 7))
+ shap.waterfall_plot(
+ shap.Explanation(
+ values=shap_values[0],
+ base_values=explainer.expected_value,
+ data=processed_scaled.iloc[0].values,
+ feature_names=feature_names
+ ),
+ show=False,
+ )
+ plt.tight_layout()
+ st.pyplot(fig)
+
+ # 2. Объяснение как читать график
+ with st.expander("📋 Как читать SHAP график?"):
+ st.markdown("""
+ - **f(x)** = итоговое предсказание модели
+ - **base value** = среднее предсказание по всем клиентам
+ - 🔴 Красное → признаки, повышающие риск
+ - 🔵 Синее → признаки, снижающие риск
+ """)
+
+ # 3. Таблица с SHAP значениями
+ shap_df = pd.DataFrame({
+ 'feature': feature_names,
+ 'shap_value': shap_values[0],
+ 'abs_shap': abs(shap_values[0])
+ }).sort_values('abs_shap', ascending=False)
+
+ shap_df['description'] = shap_df['feature'].apply(get_feature_display_name)
+
+ st.markdown("### 📋 Факторы, влияющие на решение:")
+
+ col1, col2 = st.columns(2)
+
+ with col1:
+ pos = shap_df[shap_df['shap_value'] > 0].head(5)
+ if len(pos) > 0:
+ st.markdown("**🔴 Повышают риск:**")
+ for _, row in pos.iterrows():
+ st.markdown(f"- {row['description']}: +{row['shap_value']:.3f}")
+
+ with col2:
+ neg = shap_df[shap_df['shap_value'] < 0].head(5)
+ if len(neg) > 0:
+ st.markdown("**🟢 Снижают риск:**")
+ for _, row in neg.iterrows():
+ st.markdown(f"- {row['description']}: {row['shap_value']:.3f}")
+
+ with st.expander("📋 Все SHAP значения"):
+ display_df = shap_df[['feature', 'description', 'shap_value']].copy()
+ display_df.columns = ['Признак', 'Описание', 'SHAP']
+ display_df['SHAP'] = display_df['SHAP'].round(3)
+ st.dataframe(display_df.sort_values('SHAP', ascending=False), width='stretch')
+
+ except Exception as e:
+ st.error(f"❌ Ошибка SHAP: {e}")
+ st.info("Установите shap: `pip install shap`")
diff --git a/app/pages/__pycache__/application.cpython-311.pyc b/app/pages/__pycache__/application.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e95c5298464fb902ab504f6ecba15da1d4f5669e
Binary files /dev/null and b/app/pages/__pycache__/application.cpython-311.pyc differ
diff --git a/app/pages/__pycache__/simulation.cpython-311.pyc b/app/pages/__pycache__/simulation.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dc46ab9369af7a640d1a6b23d191fa04a3ddfded
Binary files /dev/null and b/app/pages/__pycache__/simulation.cpython-311.pyc differ
diff --git a/app/pages/application.py b/app/pages/application.py
new file mode 100644
index 0000000000000000000000000000000000000000..a51c17673e808bdff903387dcea96e6f594c571e
--- /dev/null
+++ b/app/pages/application.py
@@ -0,0 +1,329 @@
+import streamlit as st
+import pandas as pd
+import os
+from app.utils.data_loader import load_artifacts
+from app.models.escalation import escalation_decision
+from app.models.interpretation import (
+ interpret_lr, plot_feature_importance_sns,
+ plot_marginal_effects_sns, plot_shap_analysis,
+ get_feature_display_name
+)
+from app.utils.credit_preprocessor import check_business_rules
+
+# Пути
+PROJECT_PATH = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
+MODELS_PATH = os.path.join(PROJECT_PATH, 'models/best/train_150/')
+PREPROCESSOR_PATH = os.path.join(PROJECT_PATH, 'preprocessors/')
+
+
+def main():
+ st.title("🏦 Кредитный скоринг - Анкета")
+
+ # Загрузка артефактов
+ preprocessor, scaler, models = load_artifacts(MODELS_PATH, PREPROCESSOR_PATH)
+
+ # Инициализация статистики
+ if 'stats' not in st.session_state:
+ st.session_state.stats = {
+ 'total': 0,
+ 'manual': 0,
+ 'lr_confident': 0,
+ 'second_used': 0,
+ 'second_confident': 0,
+ 'approved': 0,
+ 'declined': 0
+ }
+
+ if 'step' not in st.session_state:
+ st.session_state.step = 'input'
+
+ # ВВОД ДАННЫХ
+
+ if st.session_state.step == 'input':
+ st.header("📋 Анкета заемщика")
+
+ with st.form("credit_form"):
+ st.subheader("👤 Личная информация")
+ col1, col2 = st.columns(2)
+ with col1:
+ age = st.number_input("Возраст", 0, 150, 35)
+ with col2:
+ dependents = st.number_input("Иждивенцы", 0, 20, 0)
+
+ st.subheader("💰 Ежемесячный доход")
+ income_method = st.radio("Способ указания дохода", ["Слайдер (до 20,000$)", "Точное значение"],
+ horizontal=True)
+
+ st.subheader("💳 Ежемесячные платежи")
+ debt_method = st.radio("Способ указания платежей", ["Слайдер (до 10,000$)", "Точное значение"],
+ horizontal=True)
+
+ st.subheader("📊 Кредитная история")
+ credit_lines = st.number_input("Открытых кредитов и карт", 0, 100, 5)
+ real_estate = st.number_input("Кредитов под залог недвижимости", 0, 100, 1)
+
+ st.subheader("📈 Использование лимитов")
+ util_method = st.radio("Уровень использования",
+ ["Норма (0-100%)", "Овердрафт (100-200%)", "Экстремальный (>200%)"], horizontal=True)
+
+ st.subheader("⏱️ Просрочки за последние 2 года")
+ col1, col2, col3 = st.columns(3)
+ with col1:
+ late_30_59 = st.number_input("30-59 дней", 0, 100, 0)
+ with col2:
+ late_60_89 = st.number_input("60-89 дней", 0, 100, 0)
+ with col3:
+ late_90 = st.number_input("90+ дней", 0, 100, 0)
+
+ submitted = st.form_submit_button("➡️ Далее: указать точные значения")
+
+ if submitted:
+ st.session_state.update({
+ 'age': age, 'dependents': dependents, 'income_method': income_method,
+ 'debt_method': debt_method, 'credit_lines': credit_lines,
+ 'real_estate': real_estate, 'util_method': util_method,
+ 'late_30_59': late_30_59, 'late_60_89': late_60_89, 'late_90': late_90
+ })
+ st.session_state.step = 'values'
+ st.rerun()
+
+
+ # ВВОД ТОЧНЫХ ЗНАЧЕНИЙ
+
+ elif st.session_state.step == 'values':
+ st.header("💰 Укажите точные значения")
+
+ with st.form("values_form"):
+ col1, col2 = st.columns(2)
+ with col1:
+ st.subheader("Доход")
+ if st.session_state.income_method == "Слайдер (до 20,000$)":
+ monthly_income = st.slider("Ежемесячный доход ($)", 0, 20000, 5000)
+ else:
+ monthly_income = st.number_input("Ежемесячный доход ($)", 0, 1000000, 5000)
+
+ with col2:
+ st.subheader("Платежи")
+ if st.session_state.debt_method == "Слайдер (до 10,000$)":
+ monthly_debt = st.slider("Ежемесячные платежи ($)", 0, 10000, 1500)
+ else:
+ monthly_debt = st.number_input("Ежемесячные платежи ($)", 0, 1000000, 1500)
+
+ st.subheader("📈 Использование лимитов")
+ if st.session_state.util_method == "Норма (0-100%)":
+ util_value = st.slider("Процент использования", 0, 100, 20)
+ utilization = util_value / 100
+ elif st.session_state.util_method == "Овердрафт (100-200%)":
+ util_value = st.slider("Процент использования", 100, 200, 120)
+ utilization = util_value / 100
+ else:
+ st.warning("Экстремальное использование (>200%) - автоматический ручной разбор")
+ utilization = st.number_input("Процент использования", 200, 1000, 200) / 100
+
+ submitted = st.form_submit_button("✅ Получить решение")
+
+ # САЙДБАР
+ with st.sidebar:
+ st.markdown("---")
+ st.subheader("⚙️ Настройки")
+
+ with st.expander("🎯 Пороги уверенности", expanded=False):
+ threshold = st.slider("Порог одобрения", 0.3, 0.7, 0.5, 0.05)
+ lr_margin = st.slider("Отступ LR", 0.2, 0.5, 0.35, 0.05)
+ second_margin = st.slider("Отступ второй модели", 0.2, 0.5, 0.4, 0.05)
+
+ with st.expander("🤖 Выбор модели", expanded=False):
+ available_models = [name for name in models.keys() if name != 'Logistic Regression']
+ second_model_name = st.selectbox("Модель для эскалации", available_models)
+
+ with st.expander("📊 Статистика", expanded=False):
+ stats = st.session_state.stats
+ if stats['total'] > 0:
+ st.metric("Всего заявок", stats['total'])
+ st.metric("Ручной разбор", f"{stats['manual'] / stats['total']:.1%}")
+ st.metric("LR уверена", f"{stats['lr_confident'] / stats['total']:.1%}")
+ if stats['second_used'] > 0:
+ st.metric("Вторая модель уверена",
+ f"{stats['second_confident'] / stats['second_used']:.1%}")
+
+ if st.button("🔄 Сброс"):
+ st.session_state.stats = {'total': 0, 'manual': 0, 'lr_confident': 0,
+ 'second_used': 0, 'second_confident': 0,
+ 'approved': 0, 'declined': 0}
+ st.rerun()
+ else:
+ st.info("Нет данных")
+
+ with st.expander("ℹ️ О проекте", expanded=False):
+ st.markdown(f"""
+ **Модели:**
+ - Logistic Regression
+ - {', '.join(available_models)}
+
+ **AUC:** 0.8578 (LR), ~0.87 (остальные)
+ """)
+
+ st.session_state.threshold = threshold
+ st.session_state.lr_margin = lr_margin
+ st.session_state.second_margin = second_margin
+ st.session_state.second_model_name = second_model_name
+
+ if submitted:
+ debt_ratio = monthly_debt / monthly_income if monthly_income > 0 else monthly_debt
+
+ # Подготовка данных (ОДИН РАЗ)
+ input_data = pd.DataFrame([{
+ 'RevolvingUtilizationOfUnsecuredLines': utilization,
+ 'age': st.session_state.age,
+ 'NumberOfTime30-59DaysPastDueNotWorse': st.session_state.late_30_59,
+ 'DebtRatio': debt_ratio,
+ 'MonthlyIncome': monthly_income,
+ 'NumberOfOpenCreditLinesAndLoans': st.session_state.credit_lines,
+ 'NumberOfTimes90DaysLate': st.session_state.late_90,
+ 'NumberRealEstateLoansOrLines': st.session_state.real_estate,
+ 'NumberOfTime60-89DaysPastDueNotWorse': st.session_state.late_60_89,
+ 'NumberOfDependents': st.session_state.dependents
+ }])
+
+ st.markdown("---")
+
+ with st.spinner("🔄 Анализ заявки..."):
+ lr_model = models['Logistic Regression']
+ second_model = models[second_model_name]
+
+ # Единый вызов эскалации (включает бизнес-правила)
+ decisions, manual_mask, task = escalation_decision(
+ input_data,
+ lr_model,
+ second_model,
+ second_model_name,
+ threshold=st.session_state.threshold,
+ lr_margins=[st.session_state.lr_margin],
+ second_margins=[st.session_state.second_margin],
+ preprocessor=preprocessor,
+ scaler=scaler
+ )
+ decision = decisions[0]
+
+ # Для интерпретации LR нужны обработанные данные
+ processed = preprocessor.transform(input_data)
+ processed_scaled = scaler.transform(processed)
+
+ # Обновление статистики
+ st.session_state.stats['total'] += 1
+ if decision['needs_review']:
+ st.session_state.stats['manual'] += 1
+ else:
+ if decision['final_decision'] == 0:
+ st.session_state.stats['approved'] += 1
+ else:
+ st.session_state.stats['declined'] += 1
+
+ if decision.get('lr_confident', False):
+ st.session_state.stats['lr_confident'] += 1
+
+ if decision.get('second_used', False):
+ st.session_state.stats['second_used'] += 1
+ if decision.get('second_confident', False):
+ st.session_state.stats['second_confident'] += 1
+
+ # ОТОБРАЖЕНИЕ РЕЗУЛЬТАТОВ
+ st.subheader("🔄 Цепочка принятия решения")
+ for step in decision['decision_path']:
+ st.write(step)
+
+ col1, col2 = st.columns(2)
+ with col1:
+ st.markdown("**🏦 Logistic Regression**")
+ st.metric("Вероятность", f"{decision['lr_proba']:.1%}")
+ st.write(f"Отступ: {decision['lr_margin']:.1%}")
+ if decision['lr_confident']:
+ st.success("✅ Уверена")
+ else:
+ st.warning("⚠️ Не уверена")
+
+ with col2:
+ st.markdown(f"**⚡ {second_model_name}**")
+ if decision['second_used']:
+ st.metric("Вероятность", f"{decision['second_proba']:.1%}")
+ st.write(f"Отступ: {decision['second_margin']:.1%}")
+ if decision['second_confident']:
+ st.success("✅ Уверен")
+ else:
+ st.warning("⚠️ Не уверен")
+ else:
+ st.info("⏳ Не вызывался")
+
+ st.markdown("---")
+ if decision['needs_review']:
+ st.warning("👨💼 **РУЧНОЙ РАЗБОР**")
+ st.info("Модели не уверены - требуется проверка специалистом")
+ else:
+ col1, col2 = st.columns(2)
+ with col1:
+ if decision['final_decision'] == 0:
+ st.success("✅ **КРЕДИТ ОДОБРЕН**")
+ else:
+ st.error("❌ **КРЕДИТ НЕ ОДОБРЕН**")
+ with col2:
+ st.metric("Модель", decision['model_used'])
+
+ # ДЕТАЛЬНЫЙ АНАЛИЗ LR
+ st.markdown("---")
+ st.subheader("🔍 Детальный анализ: Logistic Regression")
+
+ feature_names = processed_scaled.columns.tolist()
+ interpretation = interpret_lr(processed_scaled, lr_model, feature_names)
+
+ tab1, tab2 = st.tabs(["📊 Вклад в логит", "📈 Влияние на вероятность"])
+
+ with tab1:
+ st.markdown("🔴 Положительный вклад = ↑ риск, 🟢 Отрицательный = ↓ риск")
+ fig1 = plot_feature_importance_sns(interpretation['logit_contributions'])
+ st.pyplot(fig1)
+
+ with st.expander("📋 Все вклады"):
+ display_df = interpretation['logit_contributions'][
+ ['feature', 'value', 'coefficient', 'logit_contribution']].copy()
+ display_df['Описание'] = display_df['feature'].apply(get_feature_display_name)
+ display_df = display_df[['Описание', 'value', 'coefficient', 'logit_contribution']]
+ display_df.columns = ['Признак', 'Значение', 'Коэф', 'Вклад']
+ display_df = display_df.round(3)
+ st.dataframe(display_df)
+
+ with tab2:
+ st.markdown("🔴 Положительное = фактор ↑ риск, 🟢 Отрицательное = ↓ риск")
+ fig2 = plot_marginal_effects_sns(interpretation['marginal_effects'])
+ st.pyplot(fig2)
+
+ with st.expander("📋 Все эффекты"):
+ display_df = interpretation['marginal_effects'][['feature', 'marginal_effect']].copy()
+ display_df['Описание'] = display_df['feature'].apply(get_feature_display_name)
+ display_df = display_df[['Описание', 'marginal_effect']]
+ display_df.columns = ['Признак', 'Влияние']
+ display_df['Влияние'] = display_df['Влияние'].map('{:.1%}'.format)
+ st.dataframe(display_df)
+
+ st.info(f"Итоговая вероятность дефолта (LR): {interpretation['probability']:.1%}")
+
+ # ДЕТАЛЬНЫЙ АНАЛИЗ ВТОРОЙ МОДЕЛИ (SHAP для tree-based)
+ if decision['second_used'] and second_model_name in ['XGBoost', 'LightGBM', 'Random Forest', 'CatBoost']:
+ plot_shap_analysis(second_model, processed_scaled, feature_names, second_model_name)
+
+ # КНОПКА НАЗАД
+ if st.button("◀️ Вернуться к выбору способов"):
+ st.session_state.step = 'input'
+ st.rerun()
+
+ st.markdown("---")
+ col1, col2, col3 = st.columns([1, 2, 1])
+ with col2:
+ if st.button("🏠 На главную", use_container_width=True):
+ st.switch_page("main.py")
+
+ st.markdown("---")
+ st.caption("🏦 GiveMeSomeCredit - Интерпретируемый кредитный скоринг | Модели: Logistic Regression + выбор")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/app/pages/simulation.py b/app/pages/simulation.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed5961bbd75193b8973e70fc9007745320494061
--- /dev/null
+++ b/app/pages/simulation.py
@@ -0,0 +1,345 @@
+import streamlit as st
+import pandas as pd
+import numpy as np
+import os
+import sys
+import tempfile
+import time
+from datetime import datetime
+from PIL import Image
+import matplotlib.pyplot as plt
+
+# Остальные импорты...
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from app.utils.data_loader import load_artifacts
+from app.simulation.core.traffic_generator import TrafficGenerator
+from app.simulation.core.processor import ApplicationProcessor
+from app.simulation.controllers.pid import PIDController
+from app.simulation.visualization.plots import (
+ plot_queue_dynamics,
+ plot_specialist_load,
+ plot_inflow,
+ plot_parameters_history,
+ plot_detailed_decisions
+)
+# ============================================================================
+# БЛОК АНИМАЦИИ: Импорт функций для визуализации
+# ============================================================================
+from app.simulation.visualization.animation import create_simulation_video
+
+# ============================================================================
+
+
+def minutes_to_time(minutes, start_time="00:00"):
+ """Преобразует минуты от старта в строку времени ЧЧ:ММ"""
+ start_hour, start_min = map(int, start_time.split(':'))
+ total_minutes = start_hour * 60 + start_min + minutes
+ hour = (total_minutes // 60) % 24
+ minute = total_minutes % 60
+ return f"{hour:02d}:{minute:02d}"
+
+
+def main():
+ st.title("📊 Симуляция работы системы")
+
+ # Загрузка артефактов
+ PROJECT_PATH = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
+ MODELS_PATH = os.path.join(PROJECT_PATH, 'models/best/train_150/')
+ PREPROCESSOR_PATH = os.path.join(PROJECT_PATH, 'preprocessors/')
+ TEST_DATA_PATH = os.path.join(PROJECT_PATH, 'datasets/cs-test.csv')
+
+ preprocessor, scaler, models = load_artifacts(MODELS_PATH, PREPROCESSOR_PATH)
+
+ available_models = [name for name in models.keys() if name != 'Logistic Regression']
+
+ # В сайдбаре добавляем выбор
+ st.sidebar.subheader("🤖 Выбор модели")
+ second_model_name = st.sidebar.selectbox(
+ "Вторая модель для эскалации",
+ available_models,
+ index=0
+ )
+
+ # Параметры симуляции
+ st.sidebar.header("⚙️ Параметры")
+ # ============================================================================
+ # БЛОК АНИМАЦИИ: Ограничение количества специалистов до 400 для таблицы 20x20
+ # ============================================================================
+ specialists_count = st.sidebar.slider("Количество специалистов (модели)", 10, 400, 100, 10)
+ # ============================================================================
+ business_specialists_count = st.sidebar.slider("Количество экспертов (бизнес-правила)", 1, 100, 30, 1)
+
+ business_time = st.sidebar.slider("Время обработки бизнес правил(мин)", 5, 30, 15, 5)
+ base_time = st.sidebar.slider("Базовое время обработки (мин)", 2, 15, 5)
+
+ target_load = st.sidebar.slider(
+ "Целевая загрузка специалистов", 0.5, 1.0, 0.8, 0.05,
+ help="0.8 = 80% - оставляем запас на пики")
+
+ st.sidebar.subheader("🎯 Порог одобрения")
+ fixed_threshold = st.sidebar.slider(
+ "Порог (фиксированный)",
+ 0.3, 0.7, 0.5, 0.05,
+ help="Порог одобрения - стратегический параметр, не меняется PID"
+ )
+
+ st.sidebar.subheader("🎯 Начальные отступы (%)")
+
+ lr_low_pct = st.sidebar.slider("LR нижний отступ (% от порога)", 0, 100, 20, 5,
+ help="% от расстояния между 0 и порогом")
+ lr_high_pct = st.sidebar.slider("LR верхний отступ (% от 1-порога)", 0, 100, 20, 5,
+ help="% от расстояния между порогом и 1")
+ second_low_pct = st.sidebar.slider("Вторая модель нижний (%)", 0, 100, 20, 5)
+ second_high_pct = st.sidebar.slider("Вторая модель верхний (%)", 0, 100, 20, 5)
+
+ # Преобразуем проценты в абсолютные значения
+ init_lr_low = fixed_threshold * lr_low_pct / 100
+ init_lr_high = (1 - fixed_threshold) * lr_high_pct / 100
+ init_second_low = fixed_threshold * second_low_pct / 100
+ init_second_high = (1 - fixed_threshold) * second_high_pct / 100
+
+ # Параметры PID
+ st.sidebar.subheader("🎛️ PID регулятор")
+ use_pid = st.sidebar.checkbox("Включить PID", value=True)
+
+ # ============================================================================
+ # БЛОК АНИМАЦИИ: Переключатель для создания GIF
+ # ============================================================================
+ st.sidebar.subheader("🎬 Анимация")
+ create_gif = st.sidebar.checkbox("Создать GIF после симуляции", value=False)
+ gif_fps = st.sidebar.slider("FPS для GIF", 5, 30, 10, 5)
+ # ============================================================================
+
+ if use_pid:
+ kp = st.sidebar.slider("P (пропорциональный)", 0.0, 1.0, 0.33)
+ ki = st.sidebar.slider("I (интегральный)", 0.0, 1.0, 0.03)
+ kd = st.sidebar.slider("D (дифференциальный)", 0.0, 1.0, 0.22)
+ w_load = st.sidebar.slider("Вес загрузки", 0.0, 1.0, 0.3)
+
+ # Кнопка запуска
+ if st.button("🎬 Запустить симуляцию 24 часа"):
+ with st.spinner(f"Загрузка данных и симуляция..."):
+ # 1. Загружаем тестовый датасет
+ test_df = pd.read_csv(TEST_DATA_PATH)
+ if 'SeriousDlqin2yrs' in test_df.columns:
+ test_df = test_df.drop(columns=['SeriousDlqin2yrs'])
+ test_pool = test_df.to_dict('records')
+
+ # 2. Генерируем распределение заявок по минутам
+ current_time = datetime.now()
+ start_hour = current_time.hour
+ start_minute = current_time.minute
+
+ gen = TrafficGenerator(total_applications=len(test_pool))
+ minute_counts = gen.generate_minute_counts(start_hour=start_hour, start_minute=start_minute)
+
+ # Сохраняем для графиков
+ st.session_state.start_time = f"{start_hour:02d}:{start_minute:02d}"
+ st.session_state.minute_counts = minute_counts
+
+ # 3. Создаём процессор
+ processor = ApplicationProcessor(
+ lr_model=models['Logistic Regression'],
+ second_model=models[second_model_name],
+ second_model_name=second_model_name,
+ specialists_count=specialists_count,
+ business_specialists_count=business_specialists_count,
+ base_processing_time=base_time,
+ business_processing_time=business_time
+ )
+
+ # 4. Создаём PID если нужно
+ if use_pid:
+ pid = PIDController(
+ init_threshold=fixed_threshold,
+ kp_load=kp, ki_load=ki, kd_load=kd,
+ load_weight=w_load,
+ init_lr_low=init_lr_low,
+ init_lr_high=init_lr_high,
+ init_second_low=init_second_low,
+ init_second_high=init_second_high,
+ target_load=target_load
+ )
+ else:
+ pid = None
+
+ # 5. Симуляция по минутам
+ pool_copy = test_pool.copy()
+ idx = 0
+ progress_bar = st.progress(0)
+ n_steps = len(minute_counts)
+
+ # ============================================================================
+ # БЛОК АНИМАЦИИ: Сбор данных для кадров
+ # ============================================================================
+ animation_frames = [] # список для хранения кадров анимации
+ # ============================================================================
+
+ for step, n_apps in enumerate(minute_counts):
+ # Берём заявки из пула
+ batch = pool_copy[idx:idx + n_apps]
+ idx += n_apps
+
+ # Получаем текущие параметры
+ if pid:
+ margins = pid.get_margins()
+ lr_margins = [margins['lr_low'], margins['lr_high']]
+ second_margins = [margins['second_low'], margins['second_high']]
+ threshold = fixed_threshold
+ else:
+ lr_margins = [0.35]
+ second_margins = [0.4]
+ threshold = fixed_threshold
+
+ # Обрабатываем батч
+ result = processor.process_batch(
+ batch, preprocessor, scaler,
+ threshold=threshold,
+ lr_margins=lr_margins,
+ second_margins=second_margins,
+ current_time=step
+ )
+
+ # Обновляем PID
+ if pid:
+ load = result['specialists_busy'] / specialists_count
+ pid.update(load)
+
+ # ============================================================================
+ # БЛОК АНИМАЦИИ: Сохраняем кадр каждые 10 минут (чтобы не было 1440 кадров)
+ # ============================================================================
+ # --- Внутри цикла симуляции в simulation.py ---
+ # Записываем КАЖДУЮ минуту для плавности
+ if step % 1 == 0 or step == n_steps - 1:
+ specialist_states = processor.specialists.copy()
+
+ frame_data = {
+ 'time': step,
+ 'step': step, # Добавь это поле для совместимости с кодом видео
+ 'time_str': minutes_to_time(step, st.session_state.start_time),
+ 'inflow': n_apps,
+ 'inflow_history': st.session_state.minute_counts[:step + 1],
+ 'load_history': [v / specialists_count for v in processor.stats['specialist_busy'][:step + 1]],
+ 'queue': result['queue_size'],
+ 'business_queue': result.get('business_queue_size', 0),
+ 'load': load if pid else 0,
+ 'specialist_states': specialist_states,
+ 'cumulative': {
+ 'total_processed': processor.stats['total_processed'],
+ 'auto_approved': processor.stats['auto_approved'],
+ 'auto_declined': processor.stats['auto_declined'],
+ 'manual_processed': processor.stats['manual_processed'],
+ 'business_manual_processed': processor.stats.get('business_manual_processed', 0)
+ }
+ }
+ animation_frames.append(frame_data)
+ # ============================================================================
+
+ # Обновляем прогресс
+ progress_bar.progress((step + 1) / n_steps)
+
+ # 6. Сохраняем результаты
+ st.session_state.processor = processor
+ st.session_state.pid_history = pid.get_history() if pid else None
+ st.session_state.simulation_done = True
+ st.session_state.batch_stats = processor.batch_stats
+ # ============================================================================
+ # БЛОК АНИМАЦИИ: Сохраняем кадры в session_state
+ # ============================================================================
+ st.session_state.animation_frames = animation_frames
+ # ============================================================================
+
+ # Отображение результатов
+ if st.session_state.get('simulation_done', False):
+ st.success("✅ Симуляция завершена!")
+
+ stats = st.session_state.processor.stats
+
+ # Быстрая статистика
+ col1, col2, col3, col4, col5 = st.columns(5)
+ col1.metric("Всего заявок", stats['total_processed'])
+ col2.metric("Одобрено авто", stats['auto_approved'])
+ col3.metric("Отказ авто", stats['auto_declined'])
+ col4.metric("Ручной разбор", stats['manual_processed'])
+ manual_rate = stats['manual_sent'] / stats['total_processed'] * 100 if stats['total_processed'] > 0 else 0
+ col5.metric("Ручной разбор %", f"{manual_rate:.1f}%")
+
+ # Графики - ТОЛЬКО ВЫЗОВЫ ФУНКЦИЙ ИЗ plots.py
+ st.subheader("📈 Графики")
+
+ # Очереди
+ st.pyplot(plot_queue_dynamics(
+ queue_history=stats['queue_history'],
+ business_queue_history=stats.get('business_queue_history'),
+ start_time=st.session_state.get('start_time', '00:00')
+ ))
+ plt.close()
+
+ # Загрузка специалистов
+ st.pyplot(plot_specialist_load(
+ specialist_busy_history=stats['specialist_busy'],
+ specialists_count=specialists_count,
+ start_time=st.session_state.get('start_time', '00:00')
+ ))
+ plt.close()
+ st.pyplot(plot_inflow(
+ minute_counts=st.session_state.minute_counts,
+ start_time=st.session_state.get('start_time', '00:00')
+ ))
+ plt.close()
+ # Детальный анализ решений
+ st.pyplot(plot_detailed_decisions(
+ batch_stats=st.session_state.batch_stats,
+ second_model_name=second_model_name,
+ start_time=st.session_state.get('start_time', '00:00')
+ ))
+ plt.close()
+ # Параметры PID
+ st.pyplot(plot_parameters_history(
+ pid_history=st.session_state.pid_history,
+ second_model_name=second_model_name,
+ start_time=st.session_state.get('start_time', '00:00')
+ ))
+ plt.close()
+ # ============================================================================
+ # НОВЫЙ БЛОК: Генерация видео (Стратегия для HuggingFace)
+ # ============================================================================
+ if st.session_state.get('animation_frames'):
+ st.divider()
+ st.subheader("🎥 Настройки видео-отчета")
+
+ col_v1, col_v2 = st.columns(2)
+ with col_v1:
+ # Слайдер для шага кадров (среза)
+ v_step = st.slider("Шаг кадров (1 = каждая минута)", 1, 30, 1,
+ help="Чем меньше шаг, тем плавнее видео, но дольше рендеринг")
+ with col_v2:
+ # Слайдер для FPS
+ v_fps = st.slider("Скорость видео (FPS)", 10, 60, 24,
+ help="Количество кадров в секунду")
+
+ if st.button("🎬 Сгенерировать видео", type="primary", use_container_width=True):
+ with st.spinner("Рендеринг видео..."):
+ from app.simulation.visualization.animation import create_simulation_video
+
+ # Используем выбранные в слайдерах параметры
+ video_path = create_simulation_video(
+ st.session_state.animation_frames[::v_step],
+ specialists_count,
+ second_model_name,
+ fps=v_fps # Передаем FPS в функцию
+ )
+ st.video(video_path)
+ st.success("✅ Видео готово! Вы можете его скачать или перематывать.")
+
+ # --- ВОТ ЭТОТ БЛОК У ТЕБЯ УЖЕ ЕСТЬ В КОНЦЕ ФАЙЛА ---
+ st.write("")
+ col1, col2, col3 = st.columns([1, 2, 1])
+ with col2:
+ if st.button("🏠 На главную", use_container_width=True):
+ st.switch_page("main.py")
+
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/app/simulation/.DS_Store b/app/simulation/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..35cb044494b4a3e60ca56fc179829cb2a84c6a4a
Binary files /dev/null and b/app/simulation/.DS_Store differ
diff --git a/app/simulation/__init__.py b/app/simulation/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/app/simulation/__pycache__/__init__.cpython-311.pyc b/app/simulation/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f05221ac03b1518fa37b3dac5893eaed0f0eb44d
Binary files /dev/null and b/app/simulation/__pycache__/__init__.cpython-311.pyc differ
diff --git a/app/simulation/controllers/__init__.py b/app/simulation/controllers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/app/simulation/controllers/__pycache__/__init__.cpython-311.pyc b/app/simulation/controllers/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..aaf94963a46d407448974f5efa0ac375e4d71142
Binary files /dev/null and b/app/simulation/controllers/__pycache__/__init__.cpython-311.pyc differ
diff --git a/app/simulation/controllers/__pycache__/base.cpython-311.pyc b/app/simulation/controllers/__pycache__/base.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0c0626648814d15d4fc343b866f96416e903e85f
Binary files /dev/null and b/app/simulation/controllers/__pycache__/base.cpython-311.pyc differ
diff --git a/app/simulation/controllers/__pycache__/pid.cpython-311.pyc b/app/simulation/controllers/__pycache__/pid.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3fe209e3134817abb86000fb714484f6f0c076fc
Binary files /dev/null and b/app/simulation/controllers/__pycache__/pid.cpython-311.pyc differ
diff --git a/app/simulation/controllers/base.py b/app/simulation/controllers/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..4170ac18f40a6bd42ece7904231a001b9d2075fd
--- /dev/null
+++ b/app/simulation/controllers/base.py
@@ -0,0 +1,28 @@
+from abc import ABC, abstractmethod
+
+
+class BaseController(ABC):
+ """Базовый класс для всех контроллеров"""
+
+ def __init__(self, name="Base"):
+ self.name = name
+ self.history = []
+
+ @abstractmethod
+ def update(self, current_state, target_state, dt=1.0):
+ """
+ Рассчитывает новые параметры управления
+
+ Параметры:
+ - current_state: текущее состояние системы (очередь, загрузка)
+ - target_state: целевое состояние
+ - dt: шаг времени
+
+ Возвращает:
+ - новые пороги и отступы
+ """
+ pass
+
+ def get_margins(self, hour=None):
+ """Возвращает текущие отступы для LR и второй модели"""
+ pass
\ No newline at end of file
diff --git a/app/simulation/controllers/pid.py b/app/simulation/controllers/pid.py
new file mode 100644
index 0000000000000000000000000000000000000000..5430889ce278ea6a555efab21fe6ab9423468cd6
--- /dev/null
+++ b/app/simulation/controllers/pid.py
@@ -0,0 +1,129 @@
+import numpy as np
+import pandas as pd
+from .base import BaseController
+
+
+class PIDController(BaseController):
+ """PID-регулятор для управления отступами на основе загрузки специалистов"""
+
+ def __init__(self, name="PID",
+ kp_load=0.1, ki_load=0.01, kd_load=0.05,
+ load_weight=1.0,
+ # Начальные значения параметров
+ init_threshold=0.5,
+ init_lr_low=0.3, init_lr_high=0.4,
+ init_second_low=0.35, init_second_high=0.45,
+ target_load=0.8):
+ super().__init__(name)
+
+ # Коэффициенты PID для загрузки
+ self.kp_load = kp_load
+ self.ki_load = ki_load
+ self.kd_load = kd_load
+
+ self.load_weight = load_weight
+ self.target_load = target_load
+
+ # Состояния PID
+ self.prev_error_load = 0
+ self.integral_load = 0
+
+ # Начальные параметры
+ self.init_threshold = init_threshold
+ self.init_lr_low = init_lr_low
+ self.init_lr_high = init_lr_high
+ self.init_second_low = init_second_low
+ self.init_second_high = init_second_high
+
+ # Текущие параметры (отступы)
+ self.threshold = init_threshold
+ self.lr_low = init_lr_low
+ self.lr_high = init_lr_high
+ self.second_low = init_second_low
+ self.second_high = init_second_high
+
+ # Границы отступов
+ self.bounds = {
+ 'lr_low': (0.05, self.threshold - 0.05),
+ 'lr_high': (0.05, 1 - self.threshold - 0.05),
+ 'second_low': (0.05, self.threshold - 0.05),
+ 'second_high': (0.05, 1 - self.threshold - 0.05)
+ }
+
+ # Ограничение интеграла
+ self.integral_limit = 1.0
+
+ def update(self, current_load):
+ """
+ current_load: текущая загрузка специалистов (0-1)
+ Остальные параметры оставлены для совместимости, но не используются
+ """
+ # Ошибка по загрузке
+ error_load = self.target_load - current_load
+
+ # PID для загрузки
+ P_load = self.kp_load * error_load
+ self.integral_load += error_load
+ self.integral_load = np.clip(self.integral_load, -self.integral_limit, self.integral_limit)
+ I_load = self.ki_load * self.integral_load
+ D_load = self.kd_load * (error_load - self.prev_error_load)
+ self.prev_error_load = error_load
+
+ # Выход регулятора
+ output_load = P_load + I_load + D_load
+ output = self.load_weight * output_load
+
+ # Адаптируем отступы
+ self._update_parameters(output)
+
+ # Сохраняем историю
+ self.history.append({
+ 'time': len(self.history),
+ 'error_load': error_load,
+ 'output': output,
+ 'threshold': self.threshold,
+ 'lr_low': self.lr_low,
+ 'lr_high': self.lr_high,
+ 'second_low': self.second_low,
+ 'second_high': self.second_high,
+ 'load': current_load,
+ })
+
+ return self.get_margins()
+
+ def _update_parameters(self, output):
+ """Обновляет отступы на основе выхода регулятора"""
+ delta = output * 0.1
+ self.lr_low = np.clip(
+ self.lr_low + delta,
+ self.bounds['lr_low'][0],
+ self.bounds['lr_low'][1]
+ )
+ self.lr_high = np.clip(
+ self.lr_high + delta,
+ self.bounds['lr_high'][0],
+ self.bounds['lr_high'][1]
+ )
+ self.second_low = np.clip(
+ self.second_low + delta,
+ self.bounds['second_low'][0],
+ self.bounds['second_low'][1]
+ )
+ self.second_high = np.clip(
+ self.second_high + delta,
+ self.bounds['second_high'][0],
+ self.bounds['second_high'][1]
+ )
+
+ def get_margins(self, hour=None):
+ """Возвращает текущие отступы"""
+ return {
+ 'lr_low': self.lr_low,
+ 'lr_high': self.lr_high,
+ 'second_low': self.second_low,
+ 'second_high': self.second_high
+ }
+
+ def get_history(self):
+ """Возвращает историю для визуализации"""
+ return pd.DataFrame(self.history)
\ No newline at end of file
diff --git a/app/simulation/core/__init__.py b/app/simulation/core/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/app/simulation/core/__pycache__/__init__.cpython-311.pyc b/app/simulation/core/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5fcb5c89eb00626bc09ebac2c5c2999814604176
Binary files /dev/null and b/app/simulation/core/__pycache__/__init__.cpython-311.pyc differ
diff --git a/app/simulation/core/__pycache__/processor.cpython-311.pyc b/app/simulation/core/__pycache__/processor.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4ed8b7512ff2b1c11f09edce40825190c332d36c
Binary files /dev/null and b/app/simulation/core/__pycache__/processor.cpython-311.pyc differ
diff --git a/app/simulation/core/__pycache__/traffic_generator.cpython-311.pyc b/app/simulation/core/__pycache__/traffic_generator.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..68c301dd1e821e19f42513bbecccdbba7a3c1bef
Binary files /dev/null and b/app/simulation/core/__pycache__/traffic_generator.cpython-311.pyc differ
diff --git a/app/simulation/core/processor.py b/app/simulation/core/processor.py
new file mode 100644
index 0000000000000000000000000000000000000000..956717465d4b98fa7fd3c2b335757421225c330a
--- /dev/null
+++ b/app/simulation/core/processor.py
@@ -0,0 +1,339 @@
+import numpy as np
+import pandas as pd
+from app.models.escalation import escalation_decision
+from app.models.escalation import check_business_rules
+
+
+def processing_time_function(lr_proba, second_proba, threshold=0.5, base_time=5,
+ lr_weight=1.0, second_weight=1.5):
+ """
+ Генерирует время обработки для заявок, попавших в ручной разбор
+ """
+ total_weight = lr_weight + second_weight
+ proba = (lr_proba * lr_weight + second_proba * second_weight) / total_weight
+
+ margin = abs(proba - threshold)
+ max_margin = max(threshold, 1 - threshold)
+ uncertainty = 1 - (margin / max_margin)
+
+ mean_time = base_time * (1 + 3 * uncertainty)
+ processing_time = np.random.exponential(scale=mean_time)
+
+ return max(1, processing_time)
+
+
+class ApplicationProcessor:
+ def __init__(self, lr_model, second_model, second_model_name,
+ specialists_count=5, # основные специалисты (модели)
+ business_specialists_count=2, # эксперты (бизнес-правила)
+ base_processing_time=5,
+ business_processing_time=10, # эксперты дольше копаются
+ lr_weight=1.0, second_weight=1.5):
+ self.lr_model = lr_model
+ self.second_model = second_model
+ self.second_model_name = second_model_name
+ self.specialists_count = specialists_count
+ self.business_specialists_count = business_specialists_count
+ self.base_processing_time = base_processing_time
+ self.business_processing_time = business_processing_time
+ self.lr_weight = lr_weight
+ self.second_weight = second_weight
+
+ self.specialists = [0] * specialists_count
+ self.business_specialists = [0] * business_specialists_count # отдельный пул
+ self.manual_queue = [] # очередь от моделей
+ self.business_queue = [] # очередь от бизнес-правил
+
+ self.stats = {
+ 'total_processed': 0,
+ 'auto_approved': 0,
+ 'auto_declined': 0,
+ 'manual_sent': 0,
+ 'manual_processed': 0,
+ 'business_manual_sent': 0,
+ 'business_manual_processed': 0,
+ 'queue_history': [],
+ 'business_queue_history': [],
+ 'wait_times': [],
+ 'business_wait_times': [],
+ 'specialist_busy': [],
+ 'business_specialist_busy': [],
+ 'business_rules_manual': 0,
+ 'business_rules_auto': 0
+ }
+ self.batch_stats = []
+
+ def process_batch(self, applications_batch, preprocessor, scaler,
+ threshold, lr_margins, second_margins, current_time):
+ """
+ Обрабатывает батч заявок за текущую минуту (батчевая версия)
+ """
+ minute_results = {
+ 'new_apps': len(applications_batch),
+ 'auto_decisions': [],
+ 'new_manual': 0,
+ 'new_business_manual': 0,
+ 'processed_manual': 0,
+ 'processed_business_manual': 0,
+ 'queue_size': 0,
+ 'business_queue_size': 0,
+ 'specialists_busy': sum(1 for s in self.specialists if s > 0),
+ 'business_specialists_busy': sum(1 for s in self.business_specialists if s > 0),
+ 'business_rules': 0
+ }
+
+ # 1. Уменьшаем время работы специалистов
+ self.specialists = [max(0, s - 1) for s in self.specialists]
+ self.business_specialists = [max(0, s - 1) for s in self.business_specialists]
+
+ if not applications_batch:
+ minute_results['queue_size'] = len(self.manual_queue)
+ minute_results['business_queue_size'] = len(self.business_queue)
+ self.stats['queue_history'].append(len(self.manual_queue))
+ self.stats['business_queue_history'].append(len(self.business_queue))
+ self.stats['specialist_busy'].append(minute_results['specialists_busy'])
+ self.stats['business_specialist_busy'].append(minute_results['business_specialists_busy'])
+ return minute_results
+
+ # 2. Превращаем батч в DataFrame для удобства
+ df = pd.DataFrame(applications_batch)
+
+ # 3. Применяем бизнес-правила ко всем заявкам (БАТЧЕВО)
+ manual_mask, auto_reject_mask, messages, auto_decisions = check_business_rules(df)
+
+ # Сохраняем статистику по бизнес-правилам
+ business_manual_count = manual_mask.sum()
+ business_auto_count = auto_reject_mask.sum()
+
+ # Инициализируем
+ n = len(applications_batch)
+ model_indices = []
+
+ # 4. Обрабатываем результаты бизнес-правил
+ for idx in range(n):
+ if manual_mask[idx]:
+ # Ручной разбор по бизнес-правилам - в отдельную очередь
+ self.business_queue.append({
+ 'app': applications_batch[idx],
+ 'arrival_time': current_time,
+ 'reason': 'business_rules',
+ 'message': messages[idx],
+ 'lr_proba': None,
+ 'second_proba': None
+ })
+ minute_results['new_business_manual'] += 1
+ minute_results['business_rules'] += 1
+ self.stats['business_rules_manual'] += 1
+ self.stats['business_manual_sent'] += 1
+
+ elif auto_reject_mask[idx]:
+ # Автоматический отказ по бизнес-правилам
+ decision = {
+ 'final_decision': auto_decisions[idx], # всегда 1
+ 'model_used': 'Business Rules',
+ 'probability': 1.0,
+ 'needs_review': False,
+ 'message': messages[idx]
+ }
+ minute_results['auto_decisions'].append(decision)
+ self.stats['auto_declined'] += 1
+ self.stats['business_rules_auto'] += 1
+ self.stats['total_processed'] += 1
+
+ else:
+ # Заявка идет в модели
+ model_indices.append(idx)
+
+ # Инициализируем переменные для статистики моделей
+ lr_confident_count = 0
+ second_confident_count = 0
+ second_uncertain_count = 0
+
+ # 5. Батчевая обработка моделей
+ if model_indices:
+ # Берём только заявки, которые прошли бизнес-правила
+ df_models = df.iloc[model_indices].copy()
+
+ # Формируем DataFrame для моделей
+ model_df = pd.DataFrame({
+ 'RevolvingUtilizationOfUnsecuredLines': df_models['RevolvingUtilizationOfUnsecuredLines'],
+ 'age': df_models['age'],
+ 'NumberOfTime30-59DaysPastDueNotWorse': df_models['NumberOfTime30-59DaysPastDueNotWorse'],
+ 'DebtRatio': df_models['DebtRatio'].fillna(0),
+ 'MonthlyIncome': df_models['MonthlyIncome'].fillna(0),
+ 'NumberOfOpenCreditLinesAndLoans': df_models['NumberOfOpenCreditLinesAndLoans'],
+ 'NumberOfTimes90DaysLate': df_models['NumberOfTimes90DaysLate'],
+ 'NumberRealEstateLoansOrLines': df_models['NumberRealEstateLoansOrLines'],
+ 'NumberOfTime60-89DaysPastDueNotWorse': df_models['NumberOfTime60-89DaysPastDueNotWorse'],
+ 'NumberOfDependents': df_models['NumberOfDependents'].fillna(0)
+ })
+
+ # Вызываем escalation_decision для всего батча
+ batch_decisions, batch_manual_mask, stats = escalation_decision(
+ model_df,
+ self.lr_model,
+ self.second_model,
+ self.second_model_name,
+ threshold=threshold,
+ lr_margins=lr_margins,
+ second_margins=second_margins,
+ preprocessor=preprocessor,
+ scaler=scaler
+ )
+
+ # Сохраняем статистику из escalation_decision
+ lr_confident_count = stats['lr_confident']
+ second_confident_count = stats['second_confident']
+ second_uncertain_count = stats['second_uncertain']
+
+ # print(f"Статистика батча: бизнес-ручной={business_manual_count}, "
+ # f"бизнес-отказ={business_auto_count}, "
+ # f"LR уверен={lr_confident_count}, "
+ # f"вторая уверен={second_confident_count}, "
+ # f"вторая не уверен={second_uncertain_count}")
+
+ # Распределяем результаты по исходным индексам
+ for local_idx, orig_idx in enumerate(model_indices):
+ decision = batch_decisions[local_idx]
+
+ if decision['needs_review']:
+ self.manual_queue.append({
+ 'app': applications_batch[orig_idx],
+ 'arrival_time': current_time,
+ 'reason': 'model_uncertainty',
+ 'decision': decision,
+ 'lr_proba': decision.get('lr_proba'),
+ 'second_proba': decision.get('second_proba')
+ })
+ minute_results['new_manual'] += 1
+ self.stats['manual_sent'] += 1
+ else:
+ minute_results['auto_decisions'].append(decision)
+ if decision['final_decision'] == 0:
+ self.stats['auto_approved'] += 1
+ else:
+ self.stats['auto_declined'] += 1
+
+ self.stats['total_processed'] += 1
+
+ # Сохраняем общую статистику батча
+ self.batch_stats.append({
+ 'time': current_time,
+ 'business_manual': business_manual_count,
+ 'business_auto': business_auto_count,
+ 'lr_confident': lr_confident_count,
+ 'second_confident': second_confident_count,
+ 'second_uncertain': second_uncertain_count,
+ 'total_in_batch': len(applications_batch),
+ 'new_manual': minute_results['new_manual'],
+ 'new_business_manual': minute_results['new_business_manual'],
+ 'auto_total': len(minute_results['auto_decisions'])
+ })
+
+ # 6. Распределяем заявки из бизнес-очереди по свободным экспертам
+ for i in range(self.business_specialists_count):
+ if self.business_specialists[i] <= 0 and self.business_queue:
+ next_app = self.business_queue.pop(0)
+
+ wait_time = current_time - next_app['arrival_time']
+ self.stats['business_wait_times'].append(wait_time)
+
+ # Эксперты обрабатывают бизнес-правила
+ proc_time = self.business_processing_time
+
+ self.business_specialists[i] = proc_time
+ minute_results['processed_business_manual'] += 1
+ self.stats['business_manual_processed'] += 1
+
+ # 7. Распределяем заявки из основной очереди по свободным специалистам
+ for i in range(self.specialists_count):
+ if self.specialists[i] <= 0 and self.manual_queue:
+ next_app = self.manual_queue.pop(0)
+
+ wait_time = current_time - next_app['arrival_time']
+ self.stats['wait_times'].append(wait_time)
+
+ if next_app['reason'] == 'business_rules':
+ proc_time = self.business_processing_time
+ else:
+ # Используем функцию processing_time_function
+ proc_time = processing_time_function(
+ lr_proba=next_app.get('lr_proba', 0.5),
+ second_proba=next_app.get('second_proba', 0.5),
+ threshold=threshold,
+ base_time=self.base_processing_time,
+ lr_weight=self.lr_weight,
+ second_weight=self.second_weight
+ )
+
+ self.specialists[i] = proc_time
+ minute_results['processed_manual'] += 1
+ self.stats['manual_processed'] += 1
+
+ minute_results['queue_size'] = len(self.manual_queue)
+ minute_results['business_queue_size'] = len(self.business_queue)
+ self.stats['queue_history'].append(len(self.manual_queue))
+ self.stats['business_queue_history'].append(len(self.business_queue))
+ self.stats['specialist_busy'].append(minute_results['specialists_busy'])
+ self.stats['business_specialist_busy'].append(minute_results['business_specialists_busy'])
+
+ return minute_results
+
+ def load_test_dataset(self, filepath):
+ df = pd.read_csv(filepath)
+ if 'SeriousDlqin2yrs' in df.columns:
+ df = df.drop(columns=['SeriousDlqin2yrs'])
+ return df.to_dict('records')
+
+ def get_queue_stats(self):
+ if self.stats['wait_times']:
+ avg_wait = np.mean(self.stats['wait_times'])
+ max_wait = np.max(self.stats['wait_times'])
+ else:
+ avg_wait = max_wait = 0
+
+ if self.stats['business_wait_times']:
+ avg_business_wait = np.mean(self.stats['business_wait_times'])
+ max_business_wait = np.max(self.stats['business_wait_times'])
+ else:
+ avg_business_wait = max_business_wait = 0
+
+ return {
+ 'current_queue': len(self.manual_queue),
+ 'current_business_queue': len(self.business_queue),
+ 'avg_wait_minutes': avg_wait,
+ 'max_wait_minutes': max_wait,
+ 'avg_business_wait_minutes': avg_business_wait,
+ 'max_business_wait_minutes': max_business_wait,
+ 'queue_history': self.stats['queue_history'],
+ 'business_queue_history': self.stats['business_queue_history'],
+ 'specialist_busy': self.stats['specialist_busy'],
+ 'business_specialist_busy': self.stats['business_specialist_busy'],
+ 'business_rules_split': {
+ 'manual': self.stats['business_rules_manual'],
+ 'auto': self.stats['business_rules_auto']
+ }
+ }
+
+ # def reset(self):
+ # self.specialists = [0] * self.specialists_count
+ # self.business_specialists = [0] * self.business_specialists_count
+ # self.manual_queue = []
+ # self.business_queue = []
+ # self.stats = {
+ # 'total_processed': 0,
+ # 'auto_approved': 0,
+ # 'auto_declined': 0,
+ # 'manual_sent': 0,
+ # 'manual_processed': 0,
+ # 'business_manual_sent': 0,
+ # 'business_manual_processed': 0,
+ # 'queue_history': [],
+ # 'business_queue_history': [],
+ # 'wait_times': [],
+ # 'business_wait_times': [],
+ # 'specialist_busy': [],
+ # 'business_specialist_busy': [],
+ # 'business_rules_manual': 0,
+ # 'business_rules_auto': 0
+ # }
\ No newline at end of file
diff --git a/app/simulation/core/traffic_generator.py b/app/simulation/core/traffic_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..0d1dcd1be00ca827463555716bc77677e6777f53
--- /dev/null
+++ b/app/simulation/core/traffic_generator.py
@@ -0,0 +1,234 @@
+import numpy as np
+import matplotlib.pyplot as plt
+from datetime import datetime
+
+
+class TrafficGenerator:
+ def __init__(self, total_applications=101503, random_seed=42):
+ self.total = total_applications
+ np.random.seed(random_seed)
+
+ # Параметры интенсивности с провалом после обеда
+ self.intensity_params = {
+ 'background': 0.1,
+ 'day_center': 13, 'day_amplitude': 0.9, 'day_width': 2.5, # день поуже
+ 'evening_center': 19.5, 'evening_amplitude': 1.3, 'evening_width': 2.2, # вечер пораньше и пошире
+ 'afternoon_dip_center': 15.5, 'afternoon_dip_strength': 0.3, 'afternoon_dip_width': 1.5,
+ # провал после обеда
+ 'noise_level': 0.1
+ }
+
+ def _time_to_hours(self, time_tuple):
+ """Переводит (часы, минуты) в часы с дробной частью"""
+ return time_tuple[0] + time_tuple[1] / 60
+
+ def loan_intensity_periodic(self, t, impulses=None):
+ """
+ Функция интенсивности с провалом после обеда
+
+ t: время в часах (может быть дробным)
+ impulses: список словарей вида
+ [{'time': (16, 37), 'strength': 2.0}, ...] # время как (часы, минуты)
+ """
+ t_cycle = t % 24
+
+ bg = self.intensity_params['background']
+
+ # Утренне-дневной пик (13:00)
+ day = self.intensity_params['day_amplitude'] * np.exp(
+ -(t_cycle - self.intensity_params['day_center']) ** 2 /
+ (2 * self.intensity_params['day_width'] ** 2)
+ )
+
+ # Вечерний пик (19:30)
+ evening_diff = np.minimum(
+ np.abs(t_cycle - self.intensity_params['evening_center']),
+ np.abs(t_cycle - self.intensity_params['evening_center'] + 24)
+ )
+ evening = self.intensity_params['evening_amplitude'] * np.exp(
+ -(evening_diff) ** 2 / (2 * self.intensity_params['evening_width'] ** 2)
+ )
+
+ # Провал после обеда (15:30)
+ dip_diff = np.minimum(
+ np.abs(t_cycle - self.intensity_params['afternoon_dip_center']),
+ np.abs(t_cycle - self.intensity_params['afternoon_dip_center'] + 24)
+ )
+ dip = -self.intensity_params['afternoon_dip_strength'] * np.exp(
+ -(dip_diff) ** 2 / (2 * self.intensity_params['afternoon_dip_width'] ** 2)
+ )
+
+ intensity = bg + day + evening + dip
+ intensity = np.maximum(intensity, 0.05) # не ниже минимума
+
+ # Шум
+ if self.intensity_params['noise_level'] > 0:
+ noise = 1.0 + np.random.uniform(
+ -self.intensity_params['noise_level'],
+ self.intensity_params['noise_level']
+ )
+ intensity *= noise
+
+ # Импульсы
+ if impulses:
+ for imp in impulses:
+ imp_time = self._time_to_hours(imp['time']) % 24
+ # Используем гауссиану для плавного импульса (ширина ~30 минут)
+ imp_diff = np.minimum(
+ np.abs(t_cycle - imp_time),
+ np.abs(t_cycle - imp_time + 24)
+ )
+ imp_factor = 1.0 + imp['strength'] * np.exp(-(imp_diff) ** 2 / (2 * 0.25 ** 2))
+ intensity *= imp_factor
+
+ return intensity
+
+ def generate_minute_counts(self, start_hour=None, start_minute=0, impulses=None):
+ """
+ Возвращает массив количества заявок на каждую минуту (1440 значений)
+
+ start_hour: час старта (по умолчанию текущий)
+ start_minute: минута старта
+ impulses: список импульсов, например:
+ [{'time': (5, 30), 'strength': 2.0}, ...] # импульс в 5:30 силой 2.0
+ """
+ if start_hour is None:
+ now = datetime.now()
+ start_hour = now.hour
+ start_minute = now.minute
+
+ start_time = start_hour + start_minute / 60
+
+ # Массив минут (от start_time до start_time + 24)
+ minutes = np.arange(0, 24, 1 / 60)
+ intensity_values = np.array([
+ self.loan_intensity_periodic(start_time + m, impulses)
+ for m in minutes
+ ])
+
+ total_intensity = np.sum(intensity_values)
+ scale_factor = self.total / total_intensity
+
+ minute_counts = np.floor(intensity_values * scale_factor).astype(int)
+
+ # Распределяем остаток (чтоб точно сошлось общее число)
+ total_assigned = np.sum(minute_counts)
+ if total_assigned < self.total:
+ remainder = self.total - total_assigned
+ top_minutes = np.argsort(intensity_values)[-remainder:]
+ minute_counts[top_minutes] += 1
+
+ return minute_counts
+
+ def generate_hourly_counts(self, start_hour=None, start_minute=0, impulses=None):
+ """
+ Возвращает массив количества заявок по часам (24 значения)
+ """
+ minute_counts = self.generate_minute_counts(start_hour, start_minute, impulses)
+ hourly_counts = [np.sum(minute_counts[i * 60:(i + 1) * 60]) for i in range(24)]
+ return hourly_counts
+
+ def generate_random_impulses(self, n_impulses=1, min_strength=1.5, max_strength=3.0):
+ """
+ Генерирует случайные импульсы
+ """
+ impulses = []
+ for _ in range(n_impulses):
+ hour = np.random.randint(0, 24)
+ minute = np.random.randint(0, 60)
+ strength = np.random.uniform(min_strength, max_strength)
+ impulses.append({'time': (hour, minute), 'strength': strength})
+ return impulses
+
+ def plot_distribution(self, start_hour=None, start_minute=0, impulses=None):
+ """Строит график распределения заявок по часам"""
+ hourly_counts = self.generate_hourly_counts(start_hour, start_minute, impulses)
+
+ if start_hour is None:
+ start_hour = datetime.now().hour
+
+ hours = [(start_hour + i) % 24 for i in range(24)]
+ sorted_pairs = sorted(zip(hours, hourly_counts))
+ hours_sorted, counts_sorted = zip(*sorted_pairs)
+
+ plt.figure(figsize=(14, 6))
+
+ # Цвета в зависимости от времени суток
+ colors = []
+ for h in hours_sorted:
+ if 0 <= h <= 5:
+ colors.append('#2c3e50') # ночь
+ elif 6 <= h <= 11:
+ colors.append('#3498db') # утро
+ elif 12 <= h <= 16:
+ colors.append('#f39c12') # день (с провалом)
+ else:
+ colors.append('#e67e22') # вечер
+
+ bars = plt.bar([str(h) for h in hours_sorted], counts_sorted,
+ alpha=0.8, color=colors, edgecolor='black', linewidth=1)
+
+ # Средняя линия
+ mean_val = np.mean(counts_sorted)
+ plt.axhline(y=mean_val, color='red', linestyle='--',
+ alpha=0.7, linewidth=2, label=f'Среднее: {mean_val:.0f}')
+
+ # Отметим импульсы на графике
+ if impulses:
+ for imp in impulses:
+ imp_hours = self._time_to_hours(imp['time']) % 24
+ # Найдём ближайший час
+ closest_hour = min(hours_sorted, key=lambda x: abs(x - imp_hours))
+ idx = list(hours_sorted).index(closest_hour)
+ plt.plot(idx, counts_sorted[idx], 'g*', markersize=15,
+ label=f'Импульс {imp["strength"]:.1f}x' if idx == 0 else '')
+
+ # Отметим провал после обеда
+ dip_idx = [i for i, h in enumerate(hours_sorted) if 14 <= h <= 16]
+ if dip_idx:
+ plt.axvspan(dip_idx[0] - 0.4, dip_idx[-1] + 0.4, alpha=0.2, color='gray',
+ label='Послеобеденный спад')
+
+ plt.xlabel('Час', fontsize=12)
+ plt.ylabel('Количество заявок', fontsize=12)
+ plt.title(f'Распределение заявок по часам (старт в {start_hour:02d}:{start_minute:02d})',
+ fontsize=14, fontweight='bold')
+ plt.grid(True, alpha=0.3, axis='y')
+ plt.legend(loc='upper right')
+ plt.xticks(rotation=45)
+ plt.tight_layout()
+ plt.show()
+
+ # Статистика
+ print("\n📊 Статистика распределения:")
+ print(f" Всего заявок: {sum(counts_sorted)}")
+ print(f" Среднее: {mean_val:.0f} заявок/час")
+ print(f" Максимум: {max(counts_sorted)} заявок")
+ print(f" Минимум: {min(counts_sorted)} заявок")
+
+ return hours_sorted, counts_sorted
+
+
+# Пример использования
+# if __name__ == "__main__":
+# # Создаём генератор
+# gen = TrafficGenerator(total_applications=110000)
+#
+# # 1. Без импульсов
+# print("Без импульсов:")
+# counts = gen.generate_minute_counts(start_hour=17)
+# print(f"Всего минут: {len(counts)}")
+# print(f"Всего заявок: {sum(counts)}")
+#
+# # 2. С импульсом в 5:30 утра
+# impulses = [{'time': (5, 30), 'strength': 2.0}]
+# print("\nС импульсом в 5:30:")
+# counts = gen.generate_minute_counts(start_hour=17, impulses=impulses)
+#
+# # 3. Построить график
+# gen.plot_distribution(start_hour=17, impulses=impulses)
+#
+# # 4. Случайные импульсы
+# random_impulses = gen.generate_random_impulses(n_impulses=2)
+# print("\nСлучайные импульсы:", random_impulses)
+# gen.plot_distribution(start_hour=17, impulses=random_impulses)
\ No newline at end of file
diff --git a/app/simulation/visualization/__init__.py b/app/simulation/visualization/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/app/simulation/visualization/__pycache__/__init__.cpython-311.pyc b/app/simulation/visualization/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ef3a2640a504433372534c26fe12298bc1f56c9e
Binary files /dev/null and b/app/simulation/visualization/__pycache__/__init__.cpython-311.pyc differ
diff --git a/app/simulation/visualization/__pycache__/animation.cpython-311.pyc b/app/simulation/visualization/__pycache__/animation.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0a02b8bc0fd32084f4ce63f1dff6685369a7ae31
Binary files /dev/null and b/app/simulation/visualization/__pycache__/animation.cpython-311.pyc differ
diff --git a/app/simulation/visualization/__pycache__/plots.cpython-311.pyc b/app/simulation/visualization/__pycache__/plots.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..79adb43bda8faca1c746dd259a06491403b4d386
Binary files /dev/null and b/app/simulation/visualization/__pycache__/plots.cpython-311.pyc differ
diff --git a/app/simulation/visualization/animation.py b/app/simulation/visualization/animation.py
new file mode 100644
index 0000000000000000000000000000000000000000..dfa4da9ec4166c1715ea716815f0eb56c6cafe86
--- /dev/null
+++ b/app/simulation/visualization/animation.py
@@ -0,0 +1,246 @@
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+import numpy as np
+
+
+def minutes_to_time(minutes, start_time="00:00"):
+ start_hour, start_min = map(int, start_time.split(':'))
+ total_minutes = start_hour * 60 + start_min + minutes
+ hour = (total_minutes // 60) % 24
+ minute = total_minutes % 60
+ return f"{hour:02d}:{minute:02d}"
+
+
+def create_animation_frame_plotly(frame_data, specialists_count, second_model_name="XGBoost"):
+ # Фиксированная ось X для графиков
+ time_ticks = list(range(0, 1441, 180))
+ time_labels = [minutes_to_time(t, "00:00") for t in time_ticks]
+
+ fig = make_subplots(
+ rows=3, cols=2,
+ subplot_titles=('📈 Динамика входящего потока', '⚙️ Загрузка специалистов (%)',
+ '👥 МОНИТОРИНГ РАБОТЫ СПЕЦИАЛИСТОВ', '',
+ '📊 Сводная статистика обработки', '🎯 Оперативные показатели'),
+ specs=[
+ [{'type': 'scatter'}, {'type': 'scatter'}],
+ [{'type': 'heatmap', 'colspan': 2}, None],
+ [{'type': 'table'}, {'type': 'scatter'}]
+ ],
+ row_heights=[0.25, 0.40, 0.35],
+ vertical_spacing=0.1,
+ )
+
+ # --- РЯД 1: ГРАФИКИ ---
+ inflow_h = frame_data.get('inflow_history', [])
+ load_h = frame_data.get('load_history', [])
+
+ fig.add_trace(go.Scatter(y=inflow_h, fill='tozeroy', line=dict(color='#4361ee', width=2)), row=1, col=1)
+ fig.add_trace(go.Scatter(y=[l * 100 for l in load_h], fill='tozeroy', line=dict(color='#4cc9f0', width=2)), row=1,
+ col=2)
+
+ for col in [1, 2]:
+ fig.update_xaxes(range=[0, 1440], tickvals=time_ticks, ticktext=time_labels, row=1, col=col)
+ fig.update_yaxes(rangemode="tozero", row=1, col=col)
+
+ # --- РЯД 2: HEATMAP (Строго 20 ячеек в ширину) ---
+ states = np.array(frame_data['specialist_states'])
+ cols = 20
+ rows = int(np.ceil(specialists_count / cols))
+
+ # Создаем матрицу, заполненную None (или NaN), чтобы пустые места не красились
+ z_matrix = np.full((rows, cols), np.nan)
+ for i, val in enumerate(states):
+ r, c = divmod(i, cols)
+ # Мапим значения: 0 -> 0.1 (голубой), 1-3 -> 0.4 (зеленый) и т.д.
+ if val == 0:
+ z_matrix[r, c] = 0.1
+ elif val <= 3:
+ z_matrix[r, c] = 0.4
+ elif val <= 7:
+ z_matrix[r, c] = 0.7
+ else:
+ z_matrix[r, c] = 1.0
+
+ # Настраиваем цвета: NaN будет прозрачным/фоновым
+ colorscale = [
+ [0.0, '#66ccff'], # Свободен (0)
+ [0.4, '#4ade80'], # 1-3 мин
+ [0.7, '#facc15'], # 4-7 мин
+ [1.0, '#f87171'] # 8+ мин
+ ]
+
+ fig.add_trace(go.Heatmap(
+ z=z_matrix, colorscale=colorscale, showscale=False,
+ xgap=2, ygap=2, zmin=0, zmax=1, hoverinfo='none'
+ ), row=2, col=1)
+
+ # Легенда над хитмапом
+ free = sum(1 for t in states if t <= 0)
+ legend = (f"Свободно: {free} | ■ Свободен "
+ f"■ 1-3м ■ 4-7м "
+ f"■ 8м+")
+ fig.add_annotation(text=legend, xref="paper", yref="paper", x=0.5, y=0.70, showarrow=False, font=dict(size=14))
+
+ # --- РЯД 3: ТАБЛИЦА (Формальная) ---
+ cum = frame_data['cumulative']
+ fig.add_trace(go.Table(
+ header=dict(values=['Параметр', 'Значение'], fill_color='#1e293b', font=dict(color='white', size=15),
+ height=35),
+ cells=dict(values=[
+ ['✅ Авто-одобрено', '❌ Авто-отказы', '👤 На рассмотрении (Manual)', 'ИТОГО ОБРАБОТАНО'],
+ [cum['auto_approved'], cum['auto_declined'],
+ cum['manual_processed'] + cum['business_manual_processed'], f"{cum['total_processed']}"]
+ ], align='left', font=dict(size=14), height=35, fill_color='#f8f9fa')
+ ), row=3, col=1)
+
+ # --- ОПЕРАТИВНЫЕ ПОКАЗАТЕЛИ (Крупный заголовок) ---
+ q_models = frame_data['queue'] # Очередь к спецам
+ q_business = frame_data.get('business_queue', 0) # Бизнес-очередь
+
+ # Расчет ожидания только для очереди моделей (как на левом графике)
+ avg_w = frame_data.get('avg_wait', 0)
+
+ status_card = (
+ f"МОНИТОРИНГ
"
+ f""
+ f"👤 ОЧЕРЕДЬ (СПЕЦ): {q_models}
"
+ f""
+ f"⚙️ Бизнес-правила: {q_business}
"
+ f"🕒 Время: {frame_data['time_str']}
"
+ f"⏳ Ожидание: {avg_w:.1f} мин"
+ )
+
+ fig.add_trace(go.Scatter(x=[0], y=[0], mode='text', text=[status_card], textfont=dict(size=16)), row=3, col=2)
+
+ # Очистка осей
+ fig.update_xaxes(visible=False, row=2, col=1);
+ fig.update_yaxes(visible=False, row=2, col=1)
+ fig.update_xaxes(visible=False, row=3, col=2);
+ fig.update_yaxes(visible=False, row=3, col=2)
+
+ # Фиксируем оси, чтобы график не "дышал" (это главная причина мерцания)
+ fig.update_yaxes(range=[0, 60], row=1, col=1) # Замени 60 на твой макс. поток
+ fig.update_yaxes(range=[0, 105], row=1, col=2) # Загрузка всегда до 100%
+
+ fig.update_layout(
+ height=950,
+ margin=dict(t=80, b=40, l=50, r=50),
+ template="plotly_white",
+ showlegend=False,
+ # ОТКЛЮЧАЕМ анимации переходов, которые создают эффект мигания
+ transition_duration=0,
+ hovermode=False
+ )
+
+ # Это заставит Plotly обновлять только данные, не перерисовывая всё полотно
+ fig.layout.datarevision = frame_data['time']
+ return fig
+
+
+from matplotlib.animation import FFMpegWriter
+
+import matplotlib.pyplot as plt
+import matplotlib.animation as animation
+import tempfile
+import numpy as np
+
+import matplotlib.pyplot as plt
+import matplotlib.animation as animation
+import tempfile
+import numpy as np
+import os
+
+
+# Внести изменения в функцию create_simulation_video в animation.py
+def create_simulation_video(frames, specialists_count, second_model_name, fps=24):
+ if not frames:
+ return None
+
+ # Настройка стиля
+ plt.style.use('seaborn-v0_8-whitegrid')
+ fig, axes = plt.subplots(2, 2, figsize=(16, 10), facecolor='#f8f9fa')
+ plt.subplots_adjust(hspace=0.4, wspace=0.25)
+ plt.close()
+
+ def update(i):
+ data = frames[i]
+ for ax in axes.flatten():
+ ax.clear()
+ ax.set_facecolor('white')
+
+ # 1. ДИНАМИКА ПОТОКА (Локализация)
+ y_inflow = data['inflow_history']
+ axes[0, 0].fill_between(range(len(y_inflow)), y_inflow, color='#4361ee', alpha=0.3)
+ axes[0, 0].plot(range(len(y_inflow)), y_inflow, color='#4361ee', linewidth=2)
+ axes[0, 0].set_xlim(0, 1440) # Фиксация оси времени
+ axes[0, 0].set_title("ДИНАМИКА ПОТОКА (заявок/мин)", fontsize=12, fontweight='bold')
+ axes[0, 0].set_xlabel("Минуты симуляции")
+
+ # 2. ЗАГРУЗКА СИСТЕМЫ
+ y_load = [v * 100 for v in data['load_history']]
+ axes[0, 1].fill_between(range(len(y_load)), y_load, color='#4cc9f0', alpha=0.3)
+ axes[0, 1].plot(range(len(y_load)), y_load, color='#4cc9f0', linewidth=2)
+ axes[0, 1].axhline(y=80, color='#f72585', linestyle='--', alpha=0.6)
+ axes[0, 1].set_xlim(0, 1440)
+ axes[0, 1].set_ylim(0, 110)
+ axes[0, 1].set_title(f"ЗАГРУЖЕННОСТЬ СПЕЦИАЛИСТОВ %: {y_load[-1]:.1f}%", fontsize=12, fontweight='bold')
+
+ # 3. HEATMAP И ЛЕГЕНДА (Возвращаем информативность)
+ states = np.array(data['specialist_states'])
+ cols = 20
+ rows = int(np.ceil(specialists_count / cols))
+ z = np.zeros((rows, cols))
+ for idx, val in enumerate(states[:rows * cols]):
+ z[idx // cols, idx % cols] = val
+
+ im = axes[1, 0].imshow(z, cmap='RdYlGn_r', aspect='auto', vmin=0, vmax=10)
+ axes[1, 0].set_title(f"МОНИТОРИНГ: {specialists_count} СПЕЦИАЛИСТОВ", fontsize=12, fontweight='bold')
+ axes[1, 0].axis('off')
+
+ # Добавляем текстовую легенду под хитмапом
+ legend_text = "Цвета: Зеленый (Свободен) → Желтый (3-5 мин) → Красный (8+ мин)"
+ axes[1, 0].text(0.5, -0.1, legend_text, ha='center', transform=axes[1, 0].transAxes, fontsize=10)
+
+ # --- 4. РАЗДЕЛЕННЫЕ ОЧЕРЕДИ И СТАТИСТИКА ---
+ ax_stat = axes[1, 1]
+ ax_stat.clear()
+ ax_stat.axis('off')
+
+ # Цвета для очередей (краснеют, если очередь > 50)
+ q_mod_color = '#991b1b' if data['queue'] > 50 else '#166534'
+ q_biz_color = '#991b1b' if data.get('business_queue', 0) > 50 else '#1e293b'
+
+ # Две надписи очередей сверху
+ ax_stat.text(0.25, 0.9, "ОЧЕРЕДЬ\n(МОДЕЛИ)", fontsize=10, ha='center', fontweight='bold')
+ ax_stat.text(0.25, 0.78, f"{data['queue']}", fontsize=26, ha='center', fontweight='bold', color=q_mod_color)
+
+ ax_stat.text(0.75, 0.9, "ОЧЕРЕДЬ\n(БИЗНЕС ПРАВИЛА)", fontsize=10, ha='center', fontweight='bold')
+ ax_stat.text(0.75, 0.78, f"{data.get('business_queue', 0)}", fontsize=26, ha='center', fontweight='bold',
+ color=q_biz_color)
+
+ # Сводная таблица ниже
+ cum = data['cumulative']
+ stats_text = (
+ f"Итоговые показатели к {data['time_str']}\n"
+ f"--------------------------------------\n"
+ f"ОБРАБОТАНО ВСЕГО: {cum['total_processed']}\n"
+ f"Авто-одобрено: {cum['auto_approved']}\n"
+ f"Авто-отказы: {cum['auto_declined']}\n"
+ f"Ручной разбор (модель): {cum['manual_processed']}\n"
+ f"Ручной разбор (бизнес правила): {cum['business_manual_processed']}\n"
+ f"--------------------------------------\n"
+ f"Используемая модель: {second_model_name}"
+ )
+
+ ax_stat.text(0.5, 0.3, stats_text, fontsize=10, fontfamily='monospace',
+ ha='center', va='center', transform=ax_stat.transAxes,
+ bbox=dict(facecolor='#f8f9fa', alpha=1, boxstyle='round,pad=1', edgecolor='#dee2e6'))
+
+ return axes.flatten()
+
+ ani = animation.FuncAnimation(fig, update, frames=len(frames), interval=1000 / fps)
+ tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
+
+ writer = animation.FFMpegWriter(fps=fps, bitrate=2000, extra_args=['-vcodec', 'libx264', '-pix_fmt', 'yuv420p'])
+ ani.save(tmp_file.name, writer=writer)
+ return tmp_file.name
\ No newline at end of file
diff --git a/app/simulation/visualization/plots.py b/app/simulation/visualization/plots.py
new file mode 100644
index 0000000000000000000000000000000000000000..aa1c2d0d14d72b0be645ba8c4cb76e49ce4d8101
--- /dev/null
+++ b/app/simulation/visualization/plots.py
@@ -0,0 +1,374 @@
+import matplotlib.pyplot as plt
+import matplotlib.animation as animation
+import numpy as np
+
+
+
+def minutes_to_time(minutes, start_time="00:00"):
+ """Преобразует минуты от старта в строку времени ЧЧ:ММ"""
+ start_hour, start_min = map(int, start_time.split(':'))
+ total_minutes = start_hour * 60 + start_min + minutes
+ hour = (total_minutes // 60) % 24
+ minute = total_minutes % 60
+ return f"{hour:02d}:{minute:02d}"
+
+
+def plot_queue_dynamics(queue_history, business_queue_history=None, start_time="00:00"):
+ """
+ Два отдельных графика для очередей с временной шкалой ЧЧ:ММ
+ """
+ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
+
+ # Создаем метки времени для каждого часа
+ total_minutes = len(queue_history)
+ hours = range(0, total_minutes, 60) # каждый час
+ hour_labels = [minutes_to_time(m, start_time) for m in hours]
+
+ # График 1: Очередь моделей
+ ax1.plot(range(total_minutes), queue_history, 'b-', linewidth=1.5)
+ ax1.set_xticks(hours)
+ ax1.set_xticklabels(hour_labels, rotation=45)
+ ax1.set_xlabel('Время')
+ ax1.set_ylabel('Размер очереди')
+ ax1.set_title('Очередь моделей')
+ ax1.grid(True, alpha=0.3)
+
+ # График 2: Очередь бизнес-правил
+ if business_queue_history and len(business_queue_history) > 0:
+ ax2.plot(range(total_minutes), business_queue_history, 'orange', linewidth=1.5)
+ ax2.set_xticks(hours)
+ ax2.set_xticklabels(hour_labels, rotation=45)
+ ax2.set_xlabel('Время')
+ ax2.set_ylabel('Размер очереди')
+ ax2.set_title('Очередь бизнес-правил')
+ ax2.grid(True, alpha=0.3)
+ else:
+ ax2.text(0.5, 0.5, 'Нет данных', ha='center', va='center', transform=ax2.transAxes)
+ ax2.set_title('Очередь бизнес-правил')
+ ax2.set_xlabel('Время')
+
+ plt.tight_layout()
+ return plt
+
+
+def plot_specialist_load(specialist_busy_history, specialists_count, start_time="00:00"):
+ """График загрузки специалистов с временной шкалой ЧЧ:ММ"""
+ load_percent = [busy / specialists_count * 100 for busy in specialist_busy_history]
+
+ fig, ax = plt.subplots(figsize=(10, 4))
+
+ total_minutes = len(load_percent)
+ hours = range(0, total_minutes, 60) # каждый час
+ hour_labels = [minutes_to_time(m, start_time) for m in hours]
+
+ ax.plot(range(total_minutes), load_percent, 'g-', linewidth=1.5)
+ ax.axhline(y=100, color='r', linestyle='--', alpha=0.5, label='Максимум')
+ ax.axhline(y=80, color='b', linestyle='--', alpha=0.5, label='Цель 80%')
+
+ ax.set_xticks(hours)
+ ax.set_xticklabels(hour_labels, rotation=45)
+ ax.set_xlabel('Время')
+ ax.set_ylabel('Загрузка (%)')
+ ax.set_title('Загрузка специалистов')
+ ax.legend()
+ ax.grid(True, alpha=0.3)
+ ax.set_ylim(0, 110)
+
+ plt.tight_layout()
+ return plt
+
+
+def plot_inflow(minute_counts, start_time="00:00"):
+ """
+ График входящего потока заявок с заливкой под кривой
+ """
+ fig, ax = plt.subplots(figsize=(14, 5))
+
+ total_minutes = len(minute_counts)
+ minutes = range(total_minutes)
+
+ # Заливка под кривой (area plot)
+ ax.fill_between(minutes, minute_counts, alpha=0.3, color='blue', label='Общий поток')
+
+ # Основной график (линия поверх заливки)
+ ax.plot(minutes, minute_counts, 'b-', linewidth=1.5, alpha=0.8)
+
+ # Скользящее среднее
+ window = 30
+ if total_minutes > window:
+ smoothed = np.convolve(minute_counts, np.ones(window) / window, mode='valid')
+ ax.plot(range(window - 1, total_minutes), smoothed,
+ 'r-', linewidth=2.5, label=f'Среднее за 30 мин')
+
+ # Можно добавить заливку и для среднего (опционально)
+ # ax.fill_between(range(window - 1, total_minutes), smoothed, alpha=0.2, color='red')
+
+ # Метки времени
+ hours = range(0, total_minutes, 60)
+ hour_labels = [minutes_to_time(m, start_time) for m in hours]
+
+ ax.set_xticks(hours)
+ ax.set_xticklabels(hour_labels, rotation=45)
+ ax.set_xlabel('Время')
+ ax.set_ylabel('Количество заявок')
+ ax.set_title('Входящий поток заявок')
+ ax.legend()
+ ax.grid(True, alpha=0.3)
+
+ # Добавим горизонтальную линию среднего
+ mean_value = np.mean(minute_counts)
+ ax.axhline(y=mean_value, color='gray', linestyle='--', alpha=0.7,
+ label=f'Среднее: {mean_value:.1f}')
+
+ plt.tight_layout()
+ return plt
+
+
+def minutes_to_time(minutes, start_time="00:00"):
+ """Преобразует минуты от старта в строку времени ЧЧ:ММ"""
+ start_hour, start_min = map(int, start_time.split(':'))
+ total_minutes = start_hour * 60 + start_min + minutes
+ hour = (total_minutes // 60) % 24
+ minute = total_minutes % 60
+ return f"{hour:02d}:{minute:02d}"
+
+
+def plot_detailed_decisions(batch_stats, second_model_name="XGBoost", start_time="00:00"):
+ """
+ Набор графиков для каждого типа решений отдельно с временной шкалой ЧЧ:ММ
+ """
+ if not batch_stats:
+ return None
+
+ fig, axes = plt.subplots(3, 2, figsize=(14, 10))
+
+ times = [stat['time'] for stat in batch_stats] # минуты
+ total_minutes = max(times) if times else 0
+
+ # Метки времени каждый час
+ hours = range(0, total_minutes + 60, 60)
+ hour_labels = [minutes_to_time(m, start_time) for m in hours]
+
+ # 1. Бизнес-правила (ручной разбор)
+ axes[0, 0].plot(times, [stat['business_manual'] for stat in batch_stats],
+ 'r-', linewidth=1.5)
+ axes[0, 0].fill_between(times, 0, [stat['business_manual'] for stat in batch_stats],
+ alpha=0.2, color='red')
+ axes[0, 0].set_title('Ручной разбор: бизнес-правила', fontweight='bold')
+ axes[0, 0].set_xticks(hours)
+ axes[0, 0].set_xticklabels(hour_labels, rotation=45)
+ axes[0, 0].set_xlabel('Время')
+ axes[0, 0].set_ylabel('Заявок')
+ axes[0, 0].grid(True, alpha=0.3)
+
+ # 2. Бизнес-правила (авто отказ)
+ axes[0, 1].plot(times, [stat['business_auto'] for stat in batch_stats],
+ 'darkred', linewidth=1.5)
+ axes[0, 1].fill_between(times, 0, [stat['business_auto'] for stat in batch_stats],
+ alpha=0.2, color='darkred')
+ axes[0, 1].set_title('Авто отказ: бизнес-правила', fontweight='bold')
+ axes[0, 1].set_xticks(hours)
+ axes[0, 1].set_xticklabels(hour_labels, rotation=45)
+ axes[0, 1].set_xlabel('Время')
+ axes[0, 1].set_ylabel('Заявок')
+ axes[0, 1].grid(True, alpha=0.3)
+
+ # 3. LR уверенные решения
+ axes[1, 0].plot(times, [stat['lr_confident'] for stat in batch_stats],
+ 'blue', linewidth=1.5)
+ axes[1, 0].fill_between(times, 0, [stat['lr_confident'] for stat in batch_stats],
+ alpha=0.2, color='blue')
+ axes[1, 0].set_title('Уверенные решения: Logistic Regression', fontweight='bold')
+ axes[1, 0].set_xticks(hours)
+ axes[1, 0].set_xticklabels(hour_labels, rotation=45)
+ axes[1, 0].set_xlabel('Время')
+ axes[1, 0].set_ylabel('Заявок')
+ axes[1, 0].grid(True, alpha=0.3)
+
+ # 4. Вторая модель уверенные решения
+ axes[1, 1].plot(times, [stat['second_confident'] for stat in batch_stats],
+ 'green', linewidth=1.5)
+ axes[1, 1].fill_between(times, 0, [stat['second_confident'] for stat in batch_stats],
+ alpha=0.2, color='green')
+ axes[1, 1].set_title(f'Уверенные решения: {second_model_name}', fontweight='bold')
+ axes[1, 1].set_xticks(hours)
+ axes[1, 1].set_xticklabels(hour_labels, rotation=45)
+ axes[1, 1].set_xlabel('Время')
+ axes[1, 1].set_ylabel('Заявок')
+ axes[1, 1].grid(True, alpha=0.3)
+
+ # 5. Ручной разбор от моделей
+ axes[2, 0].plot(times, [stat['second_uncertain'] for stat in batch_stats],
+ 'orange', linewidth=1.5)
+ axes[2, 0].fill_between(times, 0, [stat['second_uncertain'] for stat in batch_stats],
+ alpha=0.2, color='orange')
+ axes[2, 0].set_title('Ручной разбор: модели неуверенны', fontweight='bold')
+ axes[2, 0].set_xticks(hours)
+ axes[2, 0].set_xticklabels(hour_labels, rotation=45)
+ axes[2, 0].set_xlabel('Время')
+ axes[2, 0].set_ylabel('Заявок')
+ axes[2, 0].grid(True, alpha=0.3)
+
+ # 6. Сравнительный график
+ axes[2, 1].plot(times, [stat['business_manual'] for stat in batch_stats],
+ 'r-', linewidth=1.5, label='Бизнес-правила', alpha=0.7)
+ axes[2, 1].plot(times, [stat['second_uncertain'] for stat in batch_stats],
+ 'orange', linewidth=1.5, label='Модели неуверенны', alpha=0.7)
+ axes[2, 1].set_title('Сравнение источников ручного разбора', fontweight='bold')
+ axes[2, 1].set_xticks(hours)
+ axes[2, 1].set_xticklabels(hour_labels, rotation=45)
+ axes[2, 1].set_xlabel('Время')
+ axes[2, 1].set_ylabel('Заявок')
+ axes[2, 1].legend()
+ axes[2, 1].grid(True, alpha=0.3)
+
+ plt.suptitle('Детальный анализ решений', fontsize=14, fontweight='bold')
+ plt.tight_layout()
+ return plt
+
+def plot_parameters_history(pid_history, second_model_name="XGBoost", start_time="00:00"):
+ """График изменения параметров регулятора"""
+ if pid_history is None or pid_history.empty:
+ return None
+
+ fig, axes = plt.subplots(3, 1, figsize=(12, 12))
+
+ total_minutes = len(pid_history)
+ times = range(total_minutes)
+
+ # Метки времени
+ hours = range(0, total_minutes, 60)
+ hour_labels = [minutes_to_time(m, start_time) for m in hours]
+
+ # 1. Отступы LR
+ axes[0].plot(times, pid_history['lr_low'], 'g-', linewidth=2, label='LR Low')
+ axes[0].plot(times, pid_history['lr_high'], 'r-', linewidth=2, label='LR High')
+ axes[0].set_ylabel('Отступ')
+ axes[0].set_title('Отступы Logistic Regression')
+ axes[0].legend()
+ axes[0].grid(True, alpha=0.3)
+ axes[0].set_xticks(hours)
+ axes[0].set_xticklabels(hour_labels, rotation=45)
+
+ # 2. Отступы второй модели (с именем из параметра)
+ axes[1].plot(times, pid_history['second_low'], 'g-', linewidth=2, label=f'{second_model_name} Low')
+ axes[1].plot(times, pid_history['second_high'], 'r-', linewidth=2, label=f'{second_model_name} High')
+ axes[1].set_ylabel('Отступ')
+ axes[1].set_title(f'Отступы {second_model_name}')
+ axes[1].legend()
+ axes[1].grid(True, alpha=0.3)
+ axes[1].set_xticks(hours)
+ axes[1].set_xticklabels(hour_labels, rotation=45)
+
+ # 3. Ошибка загрузки и выход регулятора
+ axes[2].plot(times, pid_history['error_load'], 'b-', label='Error load', alpha=0.7, linewidth=1.5)
+ axes[2].plot(times, pid_history['output'], 'r-', label='Output', linewidth=2, alpha=0.7)
+ axes[2].axhline(y=0, color='black', linestyle='-', linewidth=0.5)
+ axes[2].set_xlabel('Время')
+ axes[2].set_ylabel('Значение')
+ axes[2].set_title('Ошибка загрузки и выход регулятора')
+ axes[2].legend()
+ axes[2].grid(True, alpha=0.3)
+ axes[2].set_xticks(hours)
+ axes[2].set_xticklabels(hour_labels, rotation=45)
+
+ plt.tight_layout()
+ return plt
+
+
+# def plot_summary(processor):
+# """Сводный дашборд"""
+# fig, axes = plt.subplots(2, 3, figsize=(15, 10))
+#
+# stats = processor.stats
+#
+# # 1. Динамика очередей
+# axes[0, 0].plot(stats['queue_history'], 'b-', linewidth=1.5, label='Очередь моделей')
+# if 'business_queue_history' in stats:
+# axes[0, 0].plot(stats['business_queue_history'], 'orange', linewidth=1.5, label='Очередь бизнес-правил')
+# axes[0, 0].set_title('Динамика очередей')
+# axes[0, 0].set_xlabel('Минута')
+# axes[0, 0].set_ylabel('Заявок')
+# axes[0, 0].legend()
+# axes[0, 0].grid(True, alpha=0.3)
+#
+# # 2. Загрузка специалистов (модели)
+# load = [b / processor.specialists_count * 100 for b in stats['specialist_busy']]
+# axes[0, 1].plot(load, 'g-', linewidth=1.5, label='Основные специалисты')
+# axes[0, 1].axhline(y=100, color='r', linestyle='--', alpha=0.5, label='Максимум')
+# if hasattr(processor, 'target_load'):
+# axes[0, 1].axhline(y=processor.target_load * 100, color='b', linestyle='--',
+# alpha=0.5, label=f'Цель {processor.target_load * 100:.0f}%')
+# axes[0, 1].set_title('Загрузка специалистов (модели)')
+# axes[0, 1].set_xlabel('Минута')
+# axes[0, 1].set_ylabel('%')
+# axes[0, 1].legend()
+# axes[0, 1].grid(True, alpha=0.3)
+#
+# # 3. Загрузка экспертов
+# if 'business_specialist_busy' in stats and stats['business_specialist_busy']:
+# business_load = [b / processor.business_specialists_count * 100 for b in stats['business_specialist_busy']]
+# axes[1, 0].plot(business_load, 'orange', linewidth=1.5, label='Эксперты')
+# axes[1, 0].axhline(y=100, color='r', linestyle='--', alpha=0.5, label='Максимум')
+# axes[1, 0].set_title('Загрузка экспертов (бизнес-правила)')
+# axes[1, 0].set_xlabel('Минута')
+# axes[1, 0].set_ylabel('%')
+# axes[1, 0].legend()
+# axes[1, 0].grid(True, alpha=0.3)
+# else:
+# axes[1, 0].text(0.5, 0.5, 'Нет данных по экспертам', ha='center', va='center')
+# axes[1, 0].set_title('Загрузка экспертов')
+#
+# # 4. Распределение решений
+# sizes = [
+# stats['auto_approved'],
+# stats['auto_declined'],
+# stats['manual_processed'],
+# stats.get('business_manual_processed', 0)
+# ]
+# labels = ['Одобрено авто', 'Отказ авто', 'Ручной (модели)', 'Ручной (бизнес)']
+# colors = ['#2ecc71', '#e74c3c', '#3498db', '#f39c12']
+#
+# if sum(sizes) > 0:
+# wedges, texts, autotexts = axes[1, 1].pie(sizes, labels=labels, autopct='%1.1f%%',
+# colors=colors, startangle=90)
+# for autotext in autotexts:
+# autotext.set_color('white')
+# autotext.set_fontweight('bold')
+# axes[1, 1].set_title('Итоговые решения')
+#
+# # 5. Ключевые метрики (освободилось место)
+# total = stats['total_processed']
+# if total > 0:
+# avg_wait = np.mean(stats['wait_times']) if stats['wait_times'] else 0
+# avg_business_wait = np.mean(stats.get('business_wait_times', [0])) if stats.get('business_wait_times') else 0
+#
+# metrics_text = f"""
+# Всего заявок: {total:,}
+# Одобрено авто: {stats['auto_approved']:,} ({stats['auto_approved'] / total * 100:.1f}%)
+# Отказ авто: {stats['auto_declined']:,} ({stats['auto_declined'] / total * 100:.1f}%)
+#
+# Ручной разбор (модели): {stats['manual_processed']:,} ({stats['manual_processed'] / total * 100:.1f}%)
+# Ручной разбор (бизнес): {stats.get('business_manual_processed', 0):,}
+#
+# Среднее время ожидания (модели): {avg_wait:.1f} мин
+# Среднее время ожидания (бизнес): {avg_business_wait:.1f} мин
+#
+# Средняя загрузка специалистов: {np.mean(load):.1f}%
+# """
+# else:
+# metrics_text = "Нет данных"
+#
+# axes[0, 2].text(0.1, 0.5, metrics_text, transform=axes[0, 2].transAxes,
+# fontsize=10, verticalalignment='center', fontfamily='monospace')
+# axes[0, 2].axis('off')
+# axes[0, 2].set_title('Ключевые метрики')
+#
+# # 6. Пустой график или можно что-то еще
+# axes[1, 2].axis('off')
+#
+# plt.suptitle('Сводная статистика симуляции', fontsize=14, fontweight='bold')
+# plt.tight_layout()
+# return plt
+
+
diff --git a/app/simulation/visualization/simulation_20:11.gif b/app/simulation/visualization/simulation_20:11.gif
new file mode 100644
index 0000000000000000000000000000000000000000..6eae3b657c63a5ac82da1fd37a615ad7ed3aff5e
Binary files /dev/null and b/app/simulation/visualization/simulation_20:11.gif differ
diff --git a/app/simulation/visualization/simulation_20:19.gif b/app/simulation/visualization/simulation_20:19.gif
new file mode 100644
index 0000000000000000000000000000000000000000..6eae3b657c63a5ac82da1fd37a615ad7ed3aff5e
Binary files /dev/null and b/app/simulation/visualization/simulation_20:19.gif differ
diff --git a/app/simulation/visualization/simulation_20:25.gif b/app/simulation/visualization/simulation_20:25.gif
new file mode 100644
index 0000000000000000000000000000000000000000..6eae3b657c63a5ac82da1fd37a615ad7ed3aff5e
Binary files /dev/null and b/app/simulation/visualization/simulation_20:25.gif differ
diff --git a/app/simulation/visualization/simulation_20:30.gif b/app/simulation/visualization/simulation_20:30.gif
new file mode 100644
index 0000000000000000000000000000000000000000..6eae3b657c63a5ac82da1fd37a615ad7ed3aff5e
Binary files /dev/null and b/app/simulation/visualization/simulation_20:30.gif differ
diff --git a/app/utils/__pycache__/credit_preprocessor.cpython-311.pyc b/app/utils/__pycache__/credit_preprocessor.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..98062ecc31834418451384b84b7e7ba49454902a
Binary files /dev/null and b/app/utils/__pycache__/credit_preprocessor.cpython-311.pyc differ
diff --git a/app/utils/__pycache__/data_loader.cpython-311.pyc b/app/utils/__pycache__/data_loader.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e20b6cf939ce5eefa45bc7ad2234e23981bcd388
Binary files /dev/null and b/app/utils/__pycache__/data_loader.cpython-311.pyc differ
diff --git a/app/utils/credit_preprocessor.py b/app/utils/credit_preprocessor.py
new file mode 100644
index 0000000000000000000000000000000000000000..a5e46f9cf042e503f28f620e513f698f346a77a4
--- /dev/null
+++ b/app/utils/credit_preprocessor.py
@@ -0,0 +1,329 @@
+import pandas as pd
+import numpy as np
+import joblib
+from sklearn.base import BaseEstimator, TransformerMixin
+
+
+class CreditDataPreprocessor(BaseEstimator, TransformerMixin):
+
+ # Полный препроцессинг данных
+
+ def __init__(self,
+ NumberOfDependents_fill_value=0,
+ NumberOfDependents_up_threshold=10,
+ MonthlyIncome_fill_value=0,
+ RevolvingUtilizationOfUnsecuredLines_drop_threshold=2,
+ age_low_drop_threshold=18,
+ age_up_drop_threshold=80,
+ DebtRatio_up_threshold=5,
+ PastDueRiskScore_weights=[1.0, 1.2, 1.3],
+ NumberRealEstateLoansOrLines_drop_threshold=20,
+ drop_special_codes=False):
+ self.NumberOfDependents_fill_value = NumberOfDependents_fill_value
+ self.NumberOfDependents_up_threshold = NumberOfDependents_up_threshold
+
+ self.MonthlyIncome_fill_value = MonthlyIncome_fill_value
+
+ self.RevolvingUtilizationOfUnsecuredLines_drop_threshold = RevolvingUtilizationOfUnsecuredLines_drop_threshold
+
+ self.age_low_drop_threshold = age_low_drop_threshold
+ self.age_up_drop_threshold = age_up_drop_threshold
+
+ self.DebtRatio_up_threshold = DebtRatio_up_threshold
+
+ self.PastDueRiskScore_weights = PastDueRiskScore_weights
+
+ self.NumberRealEstateLoansOrLines_drop_threshold = NumberRealEstateLoansOrLines_drop_threshold
+
+ self.drop_special_codes = drop_special_codes
+
+ def fit(self, X, y=None):
+ return self
+
+ def transform(self, X):
+ X_copy = X.copy()
+
+ X_copy['NumberOfDependents'] = X_copy['NumberOfDependents'].fillna(value=self.NumberOfDependents_fill_value)
+ X_copy['NumberOfDependents'] = X_copy['NumberOfDependents'].clip(0, self.NumberOfDependents_up_threshold).copy()
+
+ X_copy['MonthlyIncomeIsMissing'] = 0
+ X_copy.loc[X_copy['MonthlyIncome'].isna(), 'MonthlyIncomeIsMissing'] = 1
+ X_copy['MonthlyIncome'] = X['MonthlyIncome'].fillna(value=self.MonthlyIncome_fill_value)
+
+ X_copy['RevolvingUtilizationOverOne'] = 0.0
+ X_copy.loc[X_copy['RevolvingUtilizationOfUnsecuredLines'] > 1, 'RevolvingUtilizationOverOne'] = 1.0
+ X_copy['RevolvingUtilizationOfUnsecuredLines'] = X_copy['RevolvingUtilizationOfUnsecuredLines'].clip(0,
+ 1).copy()
+
+ X_copy['DebtPayments'] = 0.0
+ X_copy.loc[X_copy['MonthlyIncome'] == 0, 'DebtPayments'] = X_copy.loc[X_copy['MonthlyIncome'] == 0, 'DebtRatio']
+ X_copy.loc[X_copy['MonthlyIncome'] != 0, 'DebtPayments'] = X_copy.loc[
+ X_copy['MonthlyIncome'] != 0, 'DebtRatio'] * \
+ X_copy.loc[
+ X_copy['MonthlyIncome'] != 0, 'MonthlyIncome']
+ X_copy['DebtRatio'] = X_copy['DebtRatio'].clip(0, self.DebtRatio_up_threshold).copy()
+
+ X_copy['DebtPayments_over_10k'] = 0.0
+ X_copy.loc[X_copy['DebtPayments'] > 10000, 'DebtPayments_over_10k'] = 1.0
+ X_copy['DebtPayments'] = X_copy['DebtPayments'].clip(0, 10000).copy()
+
+ X_copy['MonthlyIncome_over_20k'] = 0.0
+ X_copy.loc[X_copy['MonthlyIncome'] >= 20000, 'MonthlyIncome_over_20k'] = 1.0
+ X_copy['MonthlyIncome'] = X_copy['MonthlyIncome'].clip(0, 20000)
+
+ X_copy['Code96'] = 0.0
+ X_copy['Code98'] = 0.0
+ X_copy.loc[X_copy['NumberOfTime30-59DaysPastDueNotWorse'] == 96, 'Code96'] = 1.0
+ X_copy.loc[X_copy['NumberOfTime30-59DaysPastDueNotWorse'] == 98, 'Code98'] = 1.0
+
+ X_copy['PastDueRiskScore'] = (
+ self.PastDueRiskScore_weights[0] * X_copy['NumberOfTime30-59DaysPastDueNotWorse'] +
+ self.PastDueRiskScore_weights[1] * X_copy['NumberOfTime60-89DaysPastDueNotWorse'] +
+ self.PastDueRiskScore_weights[2] * X_copy['NumberOfTimes90DaysLate'])
+ X_copy.loc[X_copy['NumberOfTime30-59DaysPastDueNotWorse'] == 96, 'PastDueRiskScore'] = 96
+ X_copy.loc[X_copy['NumberOfTime30-59DaysPastDueNotWorse'] == 98, 'PastDueRiskScore'] = 98
+ X_copy = X_copy.drop(columns=['NumberOfTime30-59DaysPastDueNotWorse', 'NumberOfTime60-89DaysPastDueNotWorse',
+ 'NumberOfTimes90DaysLate'])
+
+ X_copy['NumberOfOpenCreditLinesAndLoans_over_30'] = 0.0
+ X_copy.loc[X_copy['NumberOfOpenCreditLinesAndLoans'] > 30, 'NumberOfOpenCreditLinesAndLoans_over_30'] = 1.0
+ X_copy['NumberOfOpenCreditLinesAndLoans'] = X_copy['NumberOfOpenCreditLinesAndLoans'].clip(0, 30).copy()
+
+ X_copy['NumberRealEstateLoansOrLines_over_5'] = 0.0
+ X_copy.loc[X_copy['NumberRealEstateLoansOrLines'] > 5, 'NumberRealEstateLoansOrLines_over_5'] = 1.0
+ X_copy['NumberRealEstateLoansOrLines'] = X_copy['NumberRealEstateLoansOrLines'].clip(0, 5).copy()
+
+ X_copy['ConsumerCredit_Group'] = pd.cut(X_copy['NumberOfOpenCreditLinesAndLoans'],
+ bins=[0, 1, 2, 6, 15, 31],
+ labels=[
+ '0_loans',
+ '1_loans',
+ '2-5_loans',
+ '6-14_loans',
+ '16-30_loans'
+ ])
+ consumer_dummy = pd.get_dummies(X_copy['ConsumerCredit_Group'], prefix='Consumer', drop_first=False).astype(
+ 'float')
+
+ X_copy['RealEstateLoans_Group'] = pd.cut(X_copy['NumberRealEstateLoansOrLines'],
+ bins=[-1, 0, 3, 100],
+ labels=[
+ '0_loans',
+ '1-3_loans',
+ '4+_loans',
+ ])
+ estate_dummy = pd.get_dummies(X_copy['RealEstateLoans_Group'], prefix='RealEstateLoans',
+ drop_first=False).astype('float')
+
+ X_copy = pd.concat([X_copy, consumer_dummy, estate_dummy], axis=1).copy()
+ X_copy = X_copy.drop(columns=['ConsumerCredit_Group',
+ 'RealEstateLoans_Group']).copy()
+
+ X_copy = X_copy.drop(columns=['Consumer_6-14_loans',
+ 'RealEstateLoans_0_loans']).copy()
+
+ X_copy = X_copy.drop(columns=['NumberOfOpenCreditLinesAndLoans',
+ 'NumberRealEstateLoansOrLines',
+ 'MonthlyIncomeIsMissing',
+ 'MonthlyIncome_over_20k',
+ 'Consumer_0_loans',
+ 'NumberOfOpenCreditLinesAndLoans_over_30']).copy()
+
+ if self.drop_special_codes:
+ X_copy = X_copy.drop(columns=['Code96', 'Code98'])
+
+ return X_copy
+
+
+ def fit_transform(self, X, y=None):
+ return self.fit(X, y).transform(X)
+
+ def clean_train(self, X, y=None):
+ mask = (
+ (X[
+ 'RevolvingUtilizationOfUnsecuredLines'] <= self.RevolvingUtilizationOfUnsecuredLines_drop_threshold) &
+ (X['age'] >= self.age_low_drop_threshold) &
+ (X['age'] <= self.age_up_drop_threshold) &
+ (X['NumberRealEstateLoansOrLines'] <= self.NumberRealEstateLoansOrLines_drop_threshold)
+ )
+
+ X_clean = X[mask].copy()
+
+ if y is not None:
+ y_clean = y[mask].copy()
+ return X_clean, y_clean
+
+ return X_clean
+
+from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler, MaxAbsScaler
+
+class CreditScaler(BaseEstimator, TransformerMixin):
+ """
+ Масштабирует только не-булевые колонки.
+ Можно задать различные способы масштабирования
+ """
+
+ def __init__(self, scaler_type='standard'):
+ """
+ Параметр scaler_type - тип scaler'а.
+
+ Доступные типы:
+ - 'standard': StandardScaler (среднее=0, дисперсия=1)
+ - 'robust': RobustScaler (устойчив к выбросам)
+ - 'minmax': MinMaxScaler (приводит к [0, 1])
+ - 'maxabs': MaxAbsScaler (приводит к [-1, 1])
+ """
+
+ self.boolean_columns = [
+ 'RevolvingUtilizationOverOne',
+ 'DebtPayments_over_10k',
+ 'Code96',
+ 'Code98',
+ 'NumberRealEstateLoansOrLines_over_5',
+ 'Consumer_1_loans',
+ 'Consumer_2-5_loans',
+ 'Consumer_16-30_loans',
+ 'RealEstateLoans_1-3_loans',
+ 'RealEstateLoans_4+_loans'
+ ]
+
+ self.scaler_type = scaler_type
+ self._create_scaler()
+
+ # Эти переменные заполнятся во время fit
+ self.columns_to_scale_ = None
+ self.n_features_in_ = None
+ self.feature_names_in_ = None
+
+ def _create_scaler(self):
+ """Создает scaler по типу"""
+ if self.scaler_type == 'standard':
+ self.scaler = StandardScaler()
+ elif self.scaler_type == 'robust':
+ self.scaler = RobustScaler()
+ elif self.scaler_type == 'minmax':
+ self.scaler = MinMaxScaler()
+ elif self.scaler_type == 'maxabs':
+ self.scaler = MaxAbsScaler()
+ else:
+ raise ValueError(
+ f"Unknown scaler_type: {self.scaler_type}. "
+ f"Available: standard, robust, minmax, maxabs"
+ )
+
+ def fit(self, X, y=None):
+ """
+ Определяет колонки для масштабирования (все, кроме булевых)
+ и обучает scaler.
+ """
+
+ self.feature_names_in_ = X.columns.tolist()
+ self.n_features_in_ = len(self.feature_names_in_)
+
+ self.columns_to_scale_ = [
+ col for col in self.feature_names_in_
+ if col not in self.boolean_columns
+ ]
+
+ self.scaler.fit(X[self.columns_to_scale_])
+ return self
+
+ def transform(self, X, y=None):
+ """
+ Масштабирует только не-булевы колонки.
+ """
+ X_copy = X.copy()
+
+ X_copy[self.columns_to_scale_] = self.scaler.transform(X_copy[self.columns_to_scale_])
+
+ return X_copy
+
+ def fit_transform(self, X, y=None):
+ return self.fit(X, y).transform(X, y)
+
+ def get_feature_names_out(self, input_features=None):
+ """Для совместимости с sklearn"""
+ if input_features is not None:
+ return input_features
+ return self.feature_names_in_ if self.feature_names_in_ is not None else []
+
+ def set_params(self, **params):
+ """Для совместимости с GridSearchCV"""
+ if 'scaler_type' in params and params['scaler_type'] != self.scaler_type:
+ self.scaler_type = params['scaler_type']
+ self._create_scaler()
+ return super().set_params(**params)
+
+
+def check_business_rules(age, monthly_income, monthly_debt, debt_ratio,
+ late_90, late_60_89, late_30_59, credit_lines,
+ real_estate, utilization, dependents):
+
+ # КРИТИЧЕСКИЕ ПРАВИЛА - сразу отказ
+ if age < 18:
+ return {
+ 'needs_manual': False,
+ 'message': 'Возраст менее 18 лет - кредит не выдаётся',
+ 'decision': 1 # отказ
+ }
+
+ # СПЕЦИАЛЬНЫЕ БАНКОВСКИЕ КОДЫ - сразу ручной разбор
+ if (late_90 == 98) or (late_60_89 == 98) or (late_30_59 == 98):
+ return {
+ 'needs_manual': True,
+ 'message': 'Код 98: Списание долга как безнадежного',
+ 'decision': None
+ }
+
+ if (late_90 == 96) or (late_60_89 == 96) or (late_30_59 == 96):
+ return {
+ 'needs_manual': True,
+ 'message': 'Код 96: Изъятие залога или реализация имущества',
+ 'decision': None
+ }
+
+ # КРИТИЧЕСКИЕ ПРАВИЛА - сразу ручной разбор
+ if age > 80:
+ return {
+ 'needs_manual': True,
+ 'message': 'Возраст > 80 лет - требуется ручной разбор (индивидуальные условия)',
+ 'decision': None
+ }
+
+ if monthly_income > 1000000:
+ return {
+ 'needs_manual': True,
+ 'message': 'Доход свыше 1,000,000 $ - требуется ручной разбор',
+ 'decision': None
+ }
+
+ if monthly_debt > 1000000:
+ return {
+ 'needs_manual': True,
+ 'message': 'Платежи свыше 1,000,000 $ - требуется ручной разбор',
+ 'decision': None
+ }
+
+ if utilization > 2:
+ return {
+ 'needs_manual': True,
+ 'message': 'Использование кредитных средств превышает 200%',
+ 'decision': None
+ }
+
+ if real_estate > 20:
+ return {
+ 'needs_manual': True,
+ 'message': 'Количество кредитов под залог недвижимости слишком велико - ручной разбор',
+ 'decision': None
+ }
+
+ # 4. ВСЕ ПРОВЕРКИ ПРОЙДЕНЫ - допуск к авторазбору моделью
+ return {
+ 'needs_manual': False,
+ 'decision': None,
+ }
+
+
+
diff --git a/app/utils/data_loader.py b/app/utils/data_loader.py
new file mode 100644
index 0000000000000000000000000000000000000000..883c9d88948dab75c1a3e8d876215704ca1e07dd
--- /dev/null
+++ b/app/utils/data_loader.py
@@ -0,0 +1,26 @@
+import streamlit as st
+import joblib
+import os
+
+
+@st.cache_resource
+def load_artifacts(models_path, preprocessor_path):
+ """Загрузка препроцессоров и моделей"""
+ preprocessor = joblib.load(os.path.join(preprocessor_path, 'preprocessor_150.pkl'))
+ scaler = joblib.load(os.path.join(preprocessor_path, 'scaler_150.pkl'))
+
+ models = {}
+ model_files = {
+ 'Logistic Regression': 'logreg_150_model.pkl',
+ 'XGBoost': 'xgb_150_model.pkl',
+ 'LightGBM': 'lgbm_150_model.pkl',
+ 'CatBoost': 'catboost_150_model.pkl',
+ 'Random Forest': 'rfc_150_model.pkl'
+ }
+
+ for name, filename in model_files.items():
+ path = os.path.join(models_path, filename)
+ if os.path.exists(path):
+ models[name] = joblib.load(path)
+
+ return preprocessor, scaler, models
\ No newline at end of file
diff --git a/catboost_info/catboost_training.json b/catboost_info/catboost_training.json
new file mode 100644
index 0000000000000000000000000000000000000000..6bd1a2f4e376a9f062a9f741f6428dda83dd5486
--- /dev/null
+++ b/catboost_info/catboost_training.json
@@ -0,0 +1,104 @@
+{
+"meta":{"test_sets":[],"test_metrics":[],"learn_metrics":[{"best_value":"Min","name":"Logloss"}],"launch_mode":"Train","parameters":"","iteration_count":100,"learn_sets":["learn"],"name":"experiment"},
+"iterations":[
+{"learn":[0.6582255385],"iteration":0,"passed_time":0.1064302509,"remaining_time":10.53659484},
+{"learn":[0.628929721],"iteration":1,"passed_time":0.1525804859,"remaining_time":7.476443808},
+{"learn":[0.6055823656],"iteration":2,"passed_time":0.1890582018,"remaining_time":6.112881857},
+{"learn":[0.585745295],"iteration":3,"passed_time":0.2252067115,"remaining_time":5.404961076},
+{"learn":[0.5691497866],"iteration":4,"passed_time":0.2406276923,"remaining_time":4.571926155},
+{"learn":[0.5553995801],"iteration":5,"passed_time":0.2564603788,"remaining_time":4.017879268},
+{"learn":[0.5431466822],"iteration":6,"passed_time":0.2927427626,"remaining_time":3.889296704},
+{"learn":[0.5321745596],"iteration":7,"passed_time":0.326945827,"remaining_time":3.75987701},
+{"learn":[0.5230197248],"iteration":8,"passed_time":0.3622708005,"remaining_time":3.662960316},
+{"learn":[0.5150673326],"iteration":9,"passed_time":0.3964141569,"remaining_time":3.567727412},
+{"learn":[0.5085723427],"iteration":10,"passed_time":0.4321486694,"remaining_time":3.496475598},
+{"learn":[0.5029521178],"iteration":11,"passed_time":0.4666833149,"remaining_time":3.422344309},
+{"learn":[0.4982952699],"iteration":12,"passed_time":0.4887040018,"remaining_time":3.270557551},
+{"learn":[0.4940193081],"iteration":13,"passed_time":0.5244568892,"remaining_time":3.221663748},
+{"learn":[0.4903079864],"iteration":14,"passed_time":0.5588645355,"remaining_time":3.166899035},
+{"learn":[0.4877126951],"iteration":15,"passed_time":0.5737366867,"remaining_time":3.012117605},
+{"learn":[0.4849442288],"iteration":16,"passed_time":0.594951629,"remaining_time":2.904763836},
+{"learn":[0.4823937275],"iteration":17,"passed_time":0.6301627283,"remaining_time":2.870741318},
+{"learn":[0.4803765605],"iteration":18,"passed_time":0.6663060298,"remaining_time":2.840567811},
+{"learn":[0.4781523185],"iteration":19,"passed_time":0.687587555,"remaining_time":2.75035022},
+{"learn":[0.4767059201],"iteration":20,"passed_time":0.7219467433,"remaining_time":2.715894891},
+{"learn":[0.475163465],"iteration":21,"passed_time":0.7562504736,"remaining_time":2.681251679},
+{"learn":[0.4741219727],"iteration":22,"passed_time":0.777308792,"remaining_time":2.602294651},
+{"learn":[0.473008657],"iteration":23,"passed_time":0.8123926421,"remaining_time":2.5725767},
+{"learn":[0.4722618181],"iteration":24,"passed_time":0.8479381142,"remaining_time":2.543814343},
+{"learn":[0.4714753648],"iteration":25,"passed_time":0.8643292969,"remaining_time":2.460014153},
+{"learn":[0.470390758],"iteration":26,"passed_time":0.904391114,"remaining_time":2.445205605},
+{"learn":[0.4696201438],"iteration":27,"passed_time":0.9384147212,"remaining_time":2.413066426},
+{"learn":[0.46859249],"iteration":28,"passed_time":0.9729809498,"remaining_time":2.382125774},
+{"learn":[0.4677104182],"iteration":29,"passed_time":1.007459887,"remaining_time":2.350739737},
+{"learn":[0.4673133885],"iteration":30,"passed_time":1.019293767,"remaining_time":2.268750643},
+{"learn":[0.4667841252],"iteration":31,"passed_time":1.031243396,"remaining_time":2.191392216},
+{"learn":[0.4664536674],"iteration":32,"passed_time":1.066954367,"remaining_time":2.166240684},
+{"learn":[0.4662622609],"iteration":33,"passed_time":1.076521387,"remaining_time":2.089717986},
+{"learn":[0.466086662],"iteration":34,"passed_time":1.088162018,"remaining_time":2.020872318},
+{"learn":[0.4657380808],"iteration":35,"passed_time":1.122871037,"remaining_time":1.996215177},
+{"learn":[0.4651284039],"iteration":36,"passed_time":1.157755597,"remaining_time":1.971313584},
+{"learn":[0.4646531445],"iteration":37,"passed_time":1.195417263,"remaining_time":1.95041764},
+{"learn":[0.4641257326],"iteration":38,"passed_time":1.230054783,"remaining_time":1.92393184},
+{"learn":[0.4637898175],"iteration":39,"passed_time":1.246455715,"remaining_time":1.869683573},
+{"learn":[0.4633285186],"iteration":40,"passed_time":1.280777487,"remaining_time":1.843070043},
+{"learn":[0.4628428368],"iteration":41,"passed_time":1.315556006,"remaining_time":1.816720199},
+{"learn":[0.4626124403],"iteration":42,"passed_time":1.349455031,"remaining_time":1.788812483},
+{"learn":[0.4623858706],"iteration":43,"passed_time":1.362016739,"remaining_time":1.73347585},
+{"learn":[0.4621286714],"iteration":44,"passed_time":1.398965243,"remaining_time":1.709846409},
+{"learn":[0.4617974001],"iteration":45,"passed_time":1.433543555,"remaining_time":1.682855478},
+{"learn":[0.4614775166],"iteration":46,"passed_time":1.466964625,"remaining_time":1.654236705},
+{"learn":[0.46142579],"iteration":47,"passed_time":1.479586041,"remaining_time":1.602884878},
+{"learn":[0.4614011205],"iteration":48,"passed_time":1.486797243,"remaining_time":1.547482845},
+{"learn":[0.4611845342],"iteration":49,"passed_time":1.521749302,"remaining_time":1.521749302},
+{"learn":[0.4609852804],"iteration":50,"passed_time":1.557818146,"remaining_time":1.496727238},
+{"learn":[0.4604321277],"iteration":51,"passed_time":1.596752012,"remaining_time":1.473924934},
+{"learn":[0.4601645791],"iteration":52,"passed_time":1.646375224,"remaining_time":1.459993123},
+{"learn":[0.459804458],"iteration":53,"passed_time":1.683746309,"remaining_time":1.434302411},
+{"learn":[0.4592589475],"iteration":54,"passed_time":1.71827358,"remaining_time":1.405860201},
+{"learn":[0.4589643366],"iteration":55,"passed_time":1.752749892,"remaining_time":1.377160629},
+{"learn":[0.4585201818],"iteration":56,"passed_time":1.787086539,"remaining_time":1.348153003},
+{"learn":[0.4582657803],"iteration":57,"passed_time":1.822507553,"remaining_time":1.319746849},
+{"learn":[0.4580557799],"iteration":58,"passed_time":1.844325617,"remaining_time":1.281650005},
+{"learn":[0.457864554],"iteration":59,"passed_time":1.877865186,"remaining_time":1.251910124},
+{"learn":[0.4576167412],"iteration":60,"passed_time":1.915169063,"remaining_time":1.224452351},
+{"learn":[0.4575874936],"iteration":61,"passed_time":1.924591958,"remaining_time":1.17958862},
+{"learn":[0.457362279],"iteration":62,"passed_time":1.960633385,"remaining_time":1.151483099},
+{"learn":[0.4572946663],"iteration":63,"passed_time":1.972484848,"remaining_time":1.109522727},
+{"learn":[0.4569830294],"iteration":64,"passed_time":2.007666031,"remaining_time":1.08105094},
+{"learn":[0.456610445],"iteration":65,"passed_time":2.042187927,"remaining_time":1.052036205},
+{"learn":[0.4560918865],"iteration":66,"passed_time":2.076725989,"remaining_time":1.022865039},
+{"learn":[0.4558479503],"iteration":67,"passed_time":2.110631805,"remaining_time":0.9932384965},
+{"learn":[0.455740418],"iteration":68,"passed_time":2.124730753,"remaining_time":0.9545891789},
+{"learn":[0.455501269],"iteration":69,"passed_time":2.159214566,"remaining_time":0.9253776709},
+{"learn":[0.4554787935],"iteration":70,"passed_time":2.168732127,"remaining_time":0.8858201647},
+{"learn":[0.4552744806],"iteration":71,"passed_time":2.205001553,"remaining_time":0.8575006039},
+{"learn":[0.455234248],"iteration":72,"passed_time":2.220812656,"remaining_time":0.8213964619},
+{"learn":[0.455137986],"iteration":73,"passed_time":2.255120136,"remaining_time":0.7923395074},
+{"learn":[0.4549484305],"iteration":74,"passed_time":2.28992153,"remaining_time":0.7633071767},
+{"learn":[0.4548062199],"iteration":75,"passed_time":2.324904798,"remaining_time":0.7341804624},
+{"learn":[0.4546474797],"iteration":76,"passed_time":2.360039856,"remaining_time":0.7049469699},
+{"learn":[0.4545581835],"iteration":77,"passed_time":2.372090859,"remaining_time":0.6690512679},
+{"learn":[0.4544265313],"iteration":78,"passed_time":2.40573901,"remaining_time":0.6395002433},
+{"learn":[0.4544030978],"iteration":79,"passed_time":2.413539667,"remaining_time":0.6033849167},
+{"learn":[0.4543650724],"iteration":80,"passed_time":2.422935896,"remaining_time":0.568342988},
+{"learn":[0.4542698101],"iteration":81,"passed_time":2.455982135,"remaining_time":0.5391180296},
+{"learn":[0.4540294101],"iteration":82,"passed_time":2.490015159,"remaining_time":0.5100031048},
+{"learn":[0.4539463005],"iteration":83,"passed_time":2.501245001,"remaining_time":0.4764276192},
+{"learn":[0.4537784829],"iteration":84,"passed_time":2.53640835,"remaining_time":0.4476014736},
+{"learn":[0.4536943889],"iteration":85,"passed_time":2.572798317,"remaining_time":0.4188276329},
+{"learn":[0.4536386999],"iteration":86,"passed_time":2.608432288,"remaining_time":0.3897657442},
+{"learn":[0.4533342039],"iteration":87,"passed_time":2.644785463,"remaining_time":0.3606525632},
+{"learn":[0.4531946585],"iteration":88,"passed_time":2.679509774,"remaining_time":0.3311753653},
+{"learn":[0.4529846134],"iteration":89,"passed_time":2.713719672,"remaining_time":0.301524408},
+{"learn":[0.4529583581],"iteration":90,"passed_time":2.728664447,"remaining_time":0.2698679124},
+{"learn":[0.4528171854],"iteration":91,"passed_time":2.765771076,"remaining_time":0.2405018327},
+{"learn":[0.4526575987],"iteration":92,"passed_time":2.800900134,"remaining_time":0.2108204402},
+{"learn":[0.4526170824],"iteration":93,"passed_time":2.822734239,"remaining_time":0.1801745259},
+{"learn":[0.4525149982],"iteration":94,"passed_time":2.857135885,"remaining_time":0.1503755729},
+{"learn":[0.4524663385],"iteration":95,"passed_time":2.892015112,"remaining_time":0.1205006297},
+{"learn":[0.4524315166],"iteration":96,"passed_time":2.908353878,"remaining_time":0.08994908901},
+{"learn":[0.4523241677],"iteration":97,"passed_time":2.943535644,"remaining_time":0.060072156},
+{"learn":[0.4523010903],"iteration":98,"passed_time":2.964563379,"remaining_time":0.02994508464},
+{"learn":[0.4522791181],"iteration":99,"passed_time":2.980352983,"remaining_time":0}
+]}
\ No newline at end of file
diff --git a/catboost_info/learn/events.out.tfevents b/catboost_info/learn/events.out.tfevents
new file mode 100644
index 0000000000000000000000000000000000000000..2907646852ceb318c42b7d2a1e09d369e9e785b5
--- /dev/null
+++ b/catboost_info/learn/events.out.tfevents
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1fe5092567732e395a3420a1205c4190f0e63d50edc678509bd4104fc34a503
+size 5398
diff --git a/catboost_info/learn_error.tsv b/catboost_info/learn_error.tsv
new file mode 100644
index 0000000000000000000000000000000000000000..c9c56469a0cd3602ee24372cd70c82a482e8320f
--- /dev/null
+++ b/catboost_info/learn_error.tsv
@@ -0,0 +1,101 @@
+iter Logloss
+0 0.6582255385
+1 0.628929721
+2 0.6055823656
+3 0.585745295
+4 0.5691497866
+5 0.5553995801
+6 0.5431466822
+7 0.5321745596
+8 0.5230197248
+9 0.5150673326
+10 0.5085723427
+11 0.5029521178
+12 0.4982952699
+13 0.4940193081
+14 0.4903079864
+15 0.4877126951
+16 0.4849442288
+17 0.4823937275
+18 0.4803765605
+19 0.4781523185
+20 0.4767059201
+21 0.475163465
+22 0.4741219727
+23 0.473008657
+24 0.4722618181
+25 0.4714753648
+26 0.470390758
+27 0.4696201438
+28 0.46859249
+29 0.4677104182
+30 0.4673133885
+31 0.4667841252
+32 0.4664536674
+33 0.4662622609
+34 0.466086662
+35 0.4657380808
+36 0.4651284039
+37 0.4646531445
+38 0.4641257326
+39 0.4637898175
+40 0.4633285186
+41 0.4628428368
+42 0.4626124403
+43 0.4623858706
+44 0.4621286714
+45 0.4617974001
+46 0.4614775166
+47 0.46142579
+48 0.4614011205
+49 0.4611845342
+50 0.4609852804
+51 0.4604321277
+52 0.4601645791
+53 0.459804458
+54 0.4592589475
+55 0.4589643366
+56 0.4585201818
+57 0.4582657803
+58 0.4580557799
+59 0.457864554
+60 0.4576167412
+61 0.4575874936
+62 0.457362279
+63 0.4572946663
+64 0.4569830294
+65 0.456610445
+66 0.4560918865
+67 0.4558479503
+68 0.455740418
+69 0.455501269
+70 0.4554787935
+71 0.4552744806
+72 0.455234248
+73 0.455137986
+74 0.4549484305
+75 0.4548062199
+76 0.4546474797
+77 0.4545581835
+78 0.4544265313
+79 0.4544030978
+80 0.4543650724
+81 0.4542698101
+82 0.4540294101
+83 0.4539463005
+84 0.4537784829
+85 0.4536943889
+86 0.4536386999
+87 0.4533342039
+88 0.4531946585
+89 0.4529846134
+90 0.4529583581
+91 0.4528171854
+92 0.4526575987
+93 0.4526170824
+94 0.4525149982
+95 0.4524663385
+96 0.4524315166
+97 0.4523241677
+98 0.4523010903
+99 0.4522791181
diff --git a/catboost_info/time_left.tsv b/catboost_info/time_left.tsv
new file mode 100644
index 0000000000000000000000000000000000000000..fce0ef1723caa4ea5ff65bd02f9c6c661e5a4463
--- /dev/null
+++ b/catboost_info/time_left.tsv
@@ -0,0 +1,101 @@
+iter Passed Remaining
+0 106 10536
+1 152 7476
+2 189 6112
+3 225 5404
+4 240 4571
+5 256 4017
+6 292 3889
+7 326 3759
+8 362 3662
+9 396 3567
+10 432 3496
+11 466 3422
+12 488 3270
+13 524 3221
+14 558 3166
+15 573 3012
+16 594 2904
+17 630 2870
+18 666 2840
+19 687 2750
+20 721 2715
+21 756 2681
+22 777 2602
+23 812 2572
+24 847 2543
+25 864 2460
+26 904 2445
+27 938 2413
+28 972 2382
+29 1007 2350
+30 1019 2268
+31 1031 2191
+32 1066 2166
+33 1076 2089
+34 1088 2020
+35 1122 1996
+36 1157 1971
+37 1195 1950
+38 1230 1923
+39 1246 1869
+40 1280 1843
+41 1315 1816
+42 1349 1788
+43 1362 1733
+44 1398 1709
+45 1433 1682
+46 1466 1654
+47 1479 1602
+48 1486 1547
+49 1521 1521
+50 1557 1496
+51 1596 1473
+52 1646 1459
+53 1683 1434
+54 1718 1405
+55 1752 1377
+56 1787 1348
+57 1822 1319
+58 1844 1281
+59 1877 1251
+60 1915 1224
+61 1924 1179
+62 1960 1151
+63 1972 1109
+64 2007 1081
+65 2042 1052
+66 2076 1022
+67 2110 993
+68 2124 954
+69 2159 925
+70 2168 885
+71 2205 857
+72 2220 821
+73 2255 792
+74 2289 763
+75 2324 734
+76 2360 704
+77 2372 669
+78 2405 639
+79 2413 603
+80 2422 568
+81 2455 539
+82 2490 510
+83 2501 476
+84 2536 447
+85 2572 418
+86 2608 389
+87 2644 360
+88 2679 331
+89 2713 301
+90 2728 269
+91 2765 240
+92 2800 210
+93 2822 180
+94 2857 150
+95 2892 120
+96 2908 89
+97 2943 60
+98 2964 29
+99 2980 0
diff --git a/catboost_info/tmp/cat_feature_index.17f2f383-11eeddeb-4a42fe68-9d5bbcbb.tmp b/catboost_info/tmp/cat_feature_index.17f2f383-11eeddeb-4a42fe68-9d5bbcbb.tmp
new file mode 100644
index 0000000000000000000000000000000000000000..593f4708db84ac8fd0f5cc47c634f38c013fe9e4
Binary files /dev/null and b/catboost_info/tmp/cat_feature_index.17f2f383-11eeddeb-4a42fe68-9d5bbcbb.tmp differ
diff --git a/datasets/.DS_Store b/datasets/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..3fce4d53a361987c1e1e990bb56cc9bfaa6877b1
Binary files /dev/null and b/datasets/.DS_Store differ
diff --git a/datasets/cs-test.csv b/datasets/cs-test.csv
new file mode 100644
index 0000000000000000000000000000000000000000..ec6e048dc0c6a0ebea3c4049222a0bb9e6126102
--- /dev/null
+++ b/datasets/cs-test.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bab363a2a807218d32a51f5fc9668b8be7977795065edd386abc8546abaa5b78
+size 4983329
diff --git a/datasets/cs-training.csv b/datasets/cs-training.csv
new file mode 100644
index 0000000000000000000000000000000000000000..df0c937285dba20c9e4673c5639e8a07c1e93254
--- /dev/null
+++ b/datasets/cs-training.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1bd46da486a5708c58c7b01a034fae2a13b327f6f7b62ea7ba4fe3b5824b24ac
+size 7564965
diff --git a/datasets/predict_data.csv b/datasets/predict_data.csv
new file mode 100644
index 0000000000000000000000000000000000000000..b4a45a878fcfa0607470fe9b8aaeec1f4aed5d94
--- /dev/null
+++ b/datasets/predict_data.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:986bef87a56fba2b0d1b30d24d6dfcfe3cc05a7a719dc86b84044b2af8a23b26
+size 18759521
diff --git a/models/.DS_Store b/models/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..7f2f4ec3103522b0e4fa2302d5fec383cbce845b
Binary files /dev/null and b/models/.DS_Store differ
diff --git a/models/best/.DS_Store b/models/best/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..d985665ce64db2dd49a6e09f684dc086f0cee9cb
Binary files /dev/null and b/models/best/.DS_Store differ
diff --git a/models/best/train_120/.DS_Store b/models/best/train_120/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6
Binary files /dev/null and b/models/best/train_120/.DS_Store differ
diff --git a/models/best/train_120/catboost_model.pkl b/models/best/train_120/catboost_model.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..86c64b3e8a31fd196c6e4a222f17dd0935a2cd96
--- /dev/null
+++ b/models/best/train_120/catboost_model.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d634b97cd239b33b6b776bb6224c744ad4aeb36b17ef9a213078a11b15b41378
+size 302054
diff --git a/models/best/train_120/dtc_model.pkl b/models/best/train_120/dtc_model.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..676605cd6da389b431f67e66963c82ee7a32e5b6
--- /dev/null
+++ b/models/best/train_120/dtc_model.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ffd7b1e5c945ec288d566d5c242216bacda5fba4f38386e330da5954025d62dd
+size 7049
diff --git a/models/best/train_120/lgbm_model.pkl b/models/best/train_120/lgbm_model.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..de22bbdf58e99286a8443140143ecada493e64ea
--- /dev/null
+++ b/models/best/train_120/lgbm_model.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:95356d4825d459969ac9d9a914d86bc41a27bbd369efca2c586db96974edcc41
+size 354372
diff --git a/models/best/train_120/logreg_model.pkl b/models/best/train_120/logreg_model.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..d583f84f55936f918add88648cf6bbcb26a4f1f1
--- /dev/null
+++ b/models/best/train_120/logreg_model.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6cd416fa6713c982afa2abc7581c650c144e560f511fcd8adcf3aba8766b02e3
+size 1755
diff --git a/models/best/train_120/logreg_model_old.pkl b/models/best/train_120/logreg_model_old.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..5a7ea7b4cabd9fd26c83d52479bd63bc46f2e480
--- /dev/null
+++ b/models/best/train_120/logreg_model_old.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07483fea4ce8d2a2eaf76b88874e3cb6931c6a994b3b6107b46ec5aa9de2c3e3
+size 1695
diff --git a/models/best/train_120/rfc_model.pkl b/models/best/train_120/rfc_model.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..262a910e050ce128133ea1827eba29041908b697
--- /dev/null
+++ b/models/best/train_120/rfc_model.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:81169937442a65f05faf04ab2cb8d0aea8d80aa23b5e8ec26490c93f57bfcfef
+size 2924569
diff --git a/models/best/train_120/xgb_model.pkl b/models/best/train_120/xgb_model.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..f187716dccbb6949b890a001b32e9266d243a45e
--- /dev/null
+++ b/models/best/train_120/xgb_model.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf5146efea09c65bfd14b968335d57ed5d415423d9d54df2e9a8cf96a4424c1e
+size 173239
diff --git a/models/best/train_150/.DS_Store b/models/best/train_150/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6
Binary files /dev/null and b/models/best/train_150/.DS_Store differ
diff --git a/models/best/train_150/catboost_150_model.pkl b/models/best/train_150/catboost_150_model.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..997feb6b44eda533331b230b95b4920f52fdc49b
--- /dev/null
+++ b/models/best/train_150/catboost_150_model.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84ccf7f069ff7a6899f449d24a2901cbac7e376ae559747d107168ee98c0aab0
+size 329566
diff --git a/models/best/train_150/dtc_150_model.pkl b/models/best/train_150/dtc_150_model.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..0ac32edab54ff273c0895f748f4d564f87495c08
--- /dev/null
+++ b/models/best/train_150/dtc_150_model.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84e84f230fe512f3ba48c459f23d7c5d3ace7fc52c1d054fcaa6a503de1f870d
+size 7065
diff --git a/models/best/train_150/lgbm_150_model.pkl b/models/best/train_150/lgbm_150_model.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..8b09ff9a41d3c83a381d4498c485dbd56d86e494
--- /dev/null
+++ b/models/best/train_150/lgbm_150_model.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:187132edf48b058a95a421f0670646d46d995c6dfaa749114482ab89ba5514a0
+size 355044
diff --git a/models/best/train_150/logreg_150_model.pkl b/models/best/train_150/logreg_150_model.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..7335583e2f580f710af7e6ca1bf9a169a2fb139c
--- /dev/null
+++ b/models/best/train_150/logreg_150_model.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b4ea65d7a9afd3b4e337cb51114e173ca19e967490951df2a2ffb0cab84a269
+size 1707
diff --git a/models/best/train_150/rfc_150_model.pkl b/models/best/train_150/rfc_150_model.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..175078043e00c967cd6bb8be9f6c9a3f8f0aa282
--- /dev/null
+++ b/models/best/train_150/rfc_150_model.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:58429f92c05d0cd87461ee397f937a298d2da54fab8d3b53063e608689ae34f7
+size 2995129
diff --git a/models/best/train_150/xgb_150_model.pkl b/models/best/train_150/xgb_150_model.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..13693817ee734f7b507bfb405e6e3019a2a6c774
--- /dev/null
+++ b/models/best/train_150/xgb_150_model.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:28a9f915d33b162454b3846105c812aa725a7fb4afd93795a62e3bdb183c0daf
+size 173459
diff --git a/ssh b/ssh
new file mode 100644
index 0000000000000000000000000000000000000000..cf352294f86d24bbb9cb298baa39b97ebf1479b6
--- /dev/null
+++ b/ssh
@@ -0,0 +1,8 @@
+-----BEGIN OPENSSH PRIVATE KEY-----
+b3BlbnNzaC1rZXktdjEAAAAACmFlczI1Ni1jdHIAAAAGYmNyeXB0AAAAGAAAABCKliKJ7r
+fL5JFyzIIGpE2XAAAAGAAAAAEAAAAzAAAAC3NzaC1lZDI1NTE5AAAAIBPBcCLBErmYfAIq
+3r1vGRJGbbtI3tuOqo3jpDnoCYkWAAAAkCIMM2hOAXl5ooEsSA7+f/xIvjAziTvEgEyG4U
+bmuKh2hFSgmGvCK7P9IRQ9zfAQ40mJKwCKeZMoh2+Dj0V+8gFJc/dY/C17ioTsfOQeIpW5
+tWGYIIeGPycWEquyC1+yHv5XpgqPBIout9nZrsBYQl2ZhAvQ1KdOBmCWbDN2g6NNF04oC2
+aLP3QoQnCiNx4+Sw==
+-----END OPENSSH PRIVATE KEY-----
diff --git a/ssh.pub b/ssh.pub
new file mode 100644
index 0000000000000000000000000000000000000000..3c2c9f573218f84731f1b53001c7131d9d9334b4
--- /dev/null
+++ b/ssh.pub
@@ -0,0 +1 @@
+ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBPBcCLBErmYfAIq3r1vGRJGbbtI3tuOqo3jpDnoCYkW svik05