diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..88bdef86a8c42930379763c2c05afc1d7267557f Binary files /dev/null and b/.DS_Store differ diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..e79f24b4ad453e7c4cb335dd8bc1e06a1f4a4e68 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +*.csv filter=lfs diff=lfs merge=lfs -text diff --git a/app/.DS_Store b/app/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..4a65af87771625b57ec3f9f8fd7b627b996ba558 Binary files /dev/null and b/app/.DS_Store differ diff --git a/app/.streamlit/config.toml b/app/.streamlit/config.toml new file mode 100644 index 0000000000000000000000000000000000000000..74812cd03e02b4f059f0fdb02781b9d0a16cb546 --- /dev/null +++ b/app/.streamlit/config.toml @@ -0,0 +1,2 @@ +[client] +showSidebarNavigation = false \ No newline at end of file diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/__pycache__/__init__.cpython-311.pyc b/app/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7028699e2e7028011cd037f3ab691bffb3de06d4 Binary files /dev/null and b/app/__pycache__/__init__.cpython-311.pyc differ diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000000000000000000000000000000000000..a33732bb7043f95c64c7bcd5802d022b4c3cb7d2 --- /dev/null +++ b/app/main.py @@ -0,0 +1,33 @@ +import streamlit as st +import os +import sys + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +st.set_page_config( + page_title="GiveMeSomeCredit", + page_icon="🏦", + layout="wide", + initial_sidebar_state="collapsed" # ← сворачивает сайдбар по умолчанию +) + + +st.title("🏦 GiveMeSomeCredit - Кредитный скоринг") +st.markdown("---") + +col1, col2 = st.columns(2) + +with col1: + st.subheader("📝 Анкета") + if st.button("Перейти к анкете"): + st.switch_page("pages/application.py") # ← вызовет main() + +with col2: + st.subheader("📊 Симуляция") + if st.button("Перейти к симуляции"): + st.switch_page("pages/simulation.py") # ← вызовет main() + +st.markdown("---") + +# streamlit run app/main.py + diff --git a/app/models/__pycache__/escalation.cpython-311.pyc b/app/models/__pycache__/escalation.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..55007ca35959fbf9ed3e85f6fbd4e953abcda134 Binary files /dev/null and b/app/models/__pycache__/escalation.cpython-311.pyc differ diff --git a/app/models/__pycache__/interpretation.cpython-311.pyc b/app/models/__pycache__/interpretation.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0625ac46ec9b574f94902d48903e82f8a904b997 Binary files /dev/null and b/app/models/__pycache__/interpretation.cpython-311.pyc differ diff --git a/app/models/escalation.py b/app/models/escalation.py new file mode 100644 index 0000000000000000000000000000000000000000..aeab70b6b3f56fad41a1297608eed5e54eceab2e --- /dev/null +++ b/app/models/escalation.py @@ -0,0 +1,267 @@ +import numpy as np + + +def check_business_rules(df): + """ + Батчевая проверка бизнес-правил + + Возвращает: + - manual_mask: булев массив (True = в ручной разбор) + - auto_reject_mask: булев массив (True = сразу отказ) + - messages: массив сообщений + - auto_decisions: массив решений для auto_reject_mask (всегда 1 - отказ) + """ + n = len(df) + manual_mask = np.zeros(n, dtype=bool) + auto_reject_mask = np.zeros(n, dtype=bool) + messages = [''] * n + auto_decisions = np.zeros(n, dtype=int) + + # Извлекаем колонки + age = df['age'].fillna(0).values + monthly_income = df['MonthlyIncome'].fillna(0).values + debt_ratio = df['DebtRatio'].fillna(0).values + monthly_debt = np.where(monthly_income > 0, + debt_ratio * monthly_income, + debt_ratio) + + late_90 = df['NumberOfTimes90DaysLate'].fillna(0).values + late_60_89 = df['NumberOfTime60-89DaysPastDueNotWorse'].fillna(0).values + late_30_59 = df['NumberOfTime30-59DaysPastDueNotWorse'].fillna(0).values + + real_estate = df['NumberRealEstateLoansOrLines'].fillna(0).values + utilization = df['RevolvingUtilizationOfUnsecuredLines'].fillna(0).values + + # 1. КРИТИЧЕСКИЕ ПРАВИЛА - сразу отказ + mask = (age < 18) + auto_reject_mask[mask] = True + auto_decisions[mask] = 1 + messages = np.where(mask, 'Возраст менее 18 лет - кредит не выдаётся', messages) + + # 2. СПЕЦИАЛЬНЫЕ БАНКОВСКИЕ КОДЫ - сразу ручной разбор + mask = (late_90 == 98) | (late_60_89 == 98) | (late_30_59 == 98) + manual_mask[mask] = True + messages = np.where(mask, 'Код 98: Списание долга как безнадежного', messages) + + mask = (late_90 == 96) | (late_60_89 == 96) | (late_30_59 == 96) + manual_mask[mask] = True + messages = np.where(mask, 'Код 96: Изъятие залога или реализация имущества', messages) + + # 3. КРИТИЧЕСКИЕ ПРАВИЛА - сразу ручной разбор + mask = (age > 80) + manual_mask[mask] = True + messages = np.where(mask, 'Возраст > 80 лет - требуется ручной разбор (индивидуальные условия)', messages) + + mask = (monthly_income > 1000000) + manual_mask[mask] = True + messages = np.where(mask, 'Доход свыше 1,000,000 $ - требуется ручной разбор', messages) + + mask = (monthly_debt > 1000000) + manual_mask[mask] = True + messages = np.where(mask, 'Платежи свыше 1,000,000 $ - требуется ручной разбор', messages) + + mask = (utilization > 2) + manual_mask[mask] = True + messages = np.where(mask, 'Использование кредитных средств превышает 200%', messages) + + mask = (real_estate > 20) + manual_mask[mask] = True + messages = np.where(mask, 'Количество кредитов под залог недвижимости слишком велико - ручной разбор', messages) + + # print(f"age: min={age.min()}, max={age.max()}") + # print(f"income: max={monthly_income.max()}") + # print(f"late_90: values 96/98: {np.sum((late_90 == 96) | (late_90 == 98))}") + # print(f"utilization: max={utilization.max()}") + # print(f"real_estate: max={real_estate.max()}") + + return manual_mask, auto_reject_mask, messages, auto_decisions + + +def escalation_decision(applications_df, lr_model, second_model, second_model_name, + threshold=0.5, lr_margins=[0.35], second_margins=[0.4], + preprocessor=None, scaler=None): + """ + Универсальная эскалационная логика + + 1. Бизнес-правила: + - часть заявок сразу в ручной разбор + - часть заявок сразу отказ + 2. Оставшиеся -> LR + 3. Если LR неуверена -> вторая модель + """ + n = len(applications_df) + decisions = [None] * n + manual_mask = np.zeros(n, dtype=bool) + + # СЧЁТЧИКИ + stats = { + 'business_manual': 0, # ручной разбор по бизнес-правилам + 'business_auto': 0, # авто отказ по бизнес-правилам + 'lr_confident': 0, # уверенно решены LR + 'second_confident': 0, # уверенно решены второй моделью + 'second_uncertain': 0, # неуверенность второй модели → ручной + 'total': n + } + + # 1. Бизнес-правила + bus_manual_mask, bus_reject_mask, bus_messages, bus_decisions = check_business_rules(applications_df) + # После check_business_rules + #print(f"Бизнес-правила: manual={bus_manual_mask.sum()}, auto_reject={bus_reject_mask.sum()}") + # Обрабатываем сразу отказ + for i in range(n): + if bus_reject_mask[i]: + stats['business_auto'] += 1 + decisions[i] = { + 'final_decision': 1, + 'model_used': 'Business Rules', + 'needs_review': False, + 'probability': 1.0, + 'message': bus_messages[i], + 'lr_proba': None, + 'second_proba': None, + 'decision_path': [f"❌ Бизнес-правила: {bus_messages[i]}"] + } + + # Обрабатываем сразу ручной разбор + for i in range(n): + if bus_manual_mask[i]: + stats['business_manual'] += 1 + manual_mask[i] = True + decisions[i] = { + 'final_decision': None, + 'model_used': 'Business Rules', + 'needs_review': True, + 'probability': None, + 'message': bus_messages[i], + 'lr_proba': None, + 'second_proba': None, + 'decision_path': [f"⚠️ Бизнес-правила: {bus_messages[i]}"] + } + + # 2. Заявки, которые идут к моделям (не отсеялись бизнес-правилами) + model_indices = [i for i in range(n) if decisions[i] is None] + + if not model_indices: + return decisions, manual_mask, stats + + # 3. Обработка моделями + df_models = applications_df.iloc[model_indices] + + # Препроцессинг + processed = preprocessor.transform(df_models) + processed_scaled = scaler.transform(processed) + + # LR предсказания (батч) + lr_probas = lr_model.predict_proba(processed_scaled)[:, 1] + + # Определяем отступы для LR + if len(lr_margins) == 1: + lr_low = lr_high = lr_margins[0] + else: + lr_low, lr_high = lr_margins[0], lr_margins[1] + + # Проверяем уверенность LR + lr_confident = np.zeros(len(model_indices), dtype=bool) + lr_margin_values = np.zeros(len(model_indices)) + + for j, proba in enumerate(lr_probas): + if proba < threshold: + margin = threshold - proba + lr_confident[j] = margin >= lr_low + else: + margin = proba - threshold + lr_confident[j] = margin >= lr_high + lr_margin_values[j] = margin + + # Обрабатываем уверенные LR + for j, idx in enumerate(model_indices): + if lr_confident[j]: + stats['lr_confident'] += 1 + decisions[idx] = { + 'final_decision': int(lr_probas[j] >= threshold), + 'probability': lr_probas[j], + 'model_used': 'Logistic Regression', + 'needs_review': False, + 'lr_proba': lr_probas[j], + 'second_proba': None, + 'lr_margin': lr_margin_values[j], + 'lr_confident': True, + 'second_used': False, + 'decision_path': [ + f"1️⃣ Logistic Regression: {lr_probas[j]:.1%} (отступ: {lr_margin_values[j]:.1%})", + f" ✅ LR уверена - финальное решение" + ] + } + + # Неуверенные LR - идут ко второй модели + uncertain_indices = [model_indices[j] for j in range(len(model_indices)) if not lr_confident[j]] + + if uncertain_indices: + # Находим позиции неуверенных заявок + uncertain_positions = [j for j in range(len(model_indices)) if not lr_confident[j]] + processed_uncertain_scaled = processed_scaled.iloc[uncertain_positions] + + # Вторая модель (батч) + second_probas = second_model.predict_proba(processed_uncertain_scaled)[:, 1] + + # Определяем отступы для второй модели + if len(second_margins) == 1: + second_low = second_high = second_margins[0] + else: + second_low, second_high = second_margins[0], second_margins[1] + + # Проверяем уверенность второй модели + for k, idx in enumerate(uncertain_indices): + proba = second_probas[k] + if proba < threshold: + second_margin = threshold - proba + second_confident = second_margin >= second_low + else: + second_margin = proba - threshold + second_confident = second_margin >= second_high + + # Формируем decision_path + path = [ + f"1️⃣ Logistic Regression: {lr_probas[uncertain_positions[k]]:.1%} (отступ: {lr_margin_values[uncertain_positions[k]]:.1%})", + f" ⚠️ LR не уверена → вызываем {second_model_name}", + f"2️⃣ {second_model_name}: {proba:.1%} (отступ: {second_margin:.1%})" + ] + + if second_confident: + stats['second_confident'] += 1 + path.append(f" ✅ {second_model_name} уверен - финальное решение") + decisions[idx] = { + 'final_decision': int(proba >= threshold), + 'probability': proba, + 'model_used': second_model_name, + 'needs_review': False, + 'lr_proba': lr_probas[uncertain_positions[k]], + 'second_proba': proba, + 'lr_margin': lr_margin_values[uncertain_positions[k]], + 'second_margin': second_margin, + 'lr_confident': False, + 'second_confident': True, + 'second_used': True, + 'decision_path': path + } + else: + stats['second_uncertain'] += 1 + path.append(f" ⚠️ {second_model_name} не уверен → ручной разбор") + manual_mask[idx] = True + decisions[idx] = { + 'final_decision': None, + 'probability': proba, + 'model_used': 'Manual Review', + 'needs_review': True, + 'lr_proba': lr_probas[uncertain_positions[k]], + 'second_proba': proba, + 'lr_margin': lr_margin_values[uncertain_positions[k]], + 'second_margin': second_margin, + 'lr_confident': False, + 'second_confident': False, + 'second_used': True, + 'message': 'Модели не уверены в решении', + 'decision_path': path + } + + return decisions, manual_mask, stats \ No newline at end of file diff --git a/app/models/interpretation.py b/app/models/interpretation.py new file mode 100644 index 0000000000000000000000000000000000000000..4d5e086144cf3235724ea44e5206e6a46ce1b35f --- /dev/null +++ b/app/models/interpretation.py @@ -0,0 +1,194 @@ +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sns + +FEATURE_DESCRIPTIONS = { ... } + +def get_feature_display_name(feature_name): + if feature_name in FEATURE_DESCRIPTIONS: + return FEATURE_DESCRIPTIONS[feature_name] + name = feature_name.replace('_', ' ').title() + name = name.replace('Over', '>') + name = name.replace('Loans', 'Кредитов') + return name + + +def interpret_lr(features, lr_model, feature_names): + """Интерпретация логистической регрессии""" + if isinstance(features, np.ndarray): + features = pd.DataFrame(features, columns=feature_names) + coefficients = lr_model.coef_[0] + intercept = lr_model.intercept_[0] + + importance_df = pd.DataFrame({ + 'feature': feature_names, + 'coefficient': coefficients, + 'value': features.iloc[0].values + }) + importance_df['logit_contribution'] = importance_df['coefficient'] * importance_df['value'] + importance_df['abs_logit'] = abs(importance_df['logit_contribution']) + importance_df = importance_df.sort_values('abs_logit', ascending=False) + + base_proba = lr_model.predict_proba(features)[0, 1] + marginal_effects = [] + features_array = features.values + + for i, feature in enumerate(feature_names): + features_zero = features_array.copy() + features_zero[0, i] = 0 + zero_proba = lr_model.predict_proba(features_zero)[0, 1] + marginal_effect = base_proba - zero_proba + marginal_effects.append({ + 'feature': feature, + 'marginal_effect': marginal_effect, + 'abs_marginal': abs(marginal_effect) + }) + + marginal_df = pd.DataFrame(marginal_effects).sort_values('abs_marginal', ascending=False) + + logit = intercept + importance_df['logit_contribution'].sum() + proba = 1 / (1 + np.exp(-logit)) + + return { + 'logit_contributions': importance_df, + 'marginal_effects': marginal_df, + 'probability': proba, + 'logit': logit, + 'intercept': intercept + } + +def plot_feature_importance_sns(importance_df, value_col='logit_contribution', title="Вклад признаков в логит"): + df = importance_df.head(10).copy() + df = df.sort_values(value_col, ascending=True) + + fig, ax = plt.subplots(figsize=(10, 6), facecolor='#f8f9fa') + ax.set_facecolor('#f8f9fa') + + colors = ['#d7191c' if x > 0 else '#1a9641' if x < 0 else '#ffffbf' for x in df[value_col]] + bars = ax.barh(df['feature'], df[value_col], color=colors, edgecolor='white', linewidth=1.5, alpha=0.9) + + for bar, val in zip(bars, df[value_col]): + if abs(val) > 0.02: + x_pos = val - 0.02 if val > 0 else val + 0.02 + ha = 'right' if val > 0 else 'left' + ax.text(x_pos, bar.get_y() + bar.get_height() / 2, f'{val:.3f}', ha=ha, va='center', fontsize=9) + + ax.axvline(x=0, color='#495057', linestyle='-', linewidth=1, alpha=0.3) + ax.grid(axis='x', alpha=0.15, linestyle='--', color='#adb5bd') + ax.set_axisbelow(True) + ax.set_xlabel('Вклад в логит', fontsize=11) + ax.set_ylabel('') + ax.set_title(title, fontsize=12, fontweight='bold', pad=15) + ax.set_yticklabels([get_feature_display_name(x) for x in df['feature']], fontsize=10) + ax.set_yticklabels([get_feature_display_name(x) for x in df['feature']], fontsize=10) + sns.despine(top=True, right=True, left=False, bottom=False) + plt.tight_layout() + return fig + +def plot_marginal_effects_sns(marginal_df, title="Влияние на вероятность дефолта"): + df = marginal_df.head(10).copy() + df = df.sort_values('marginal_effect', ascending=True) + + fig, ax = plt.subplots(figsize=(10, 6), facecolor='#f8f9fa') + ax.set_facecolor('#f8f9fa') + + colors = ['#d7191c' if x > 0 else '#1a9641' if x < 0 else '#ffffbf' for x in df['marginal_effect']] + bars = ax.barh(df['feature'], df['marginal_effect'], color=colors, edgecolor='white', linewidth=1.5, alpha=0.9) + + for bar, val in zip(bars, df['marginal_effect']): + if abs(val) > 0.01: + x_pos = val - 0.01 if val > 0 else val + 0.01 + ha = 'right' if val > 0 else 'left' + ax.text(x_pos, bar.get_y() + bar.get_height() / 2, f'{val:.1%}', ha=ha, va='center', fontsize=9) + + ax.axvline(x=0, color='#495057', linestyle='-', linewidth=1, alpha=0.3) + ax.grid(axis='x', alpha=0.15, linestyle='--', color='#adb5bd') + ax.set_axisbelow(True) + ax.set_xlabel('Изменение вероятности', fontsize=11) + ax.set_ylabel('') + ax.set_title(title, fontsize=12, fontweight='bold', pad=15) + ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'{x:.0%}')) + ax.set_yticklabels([get_feature_display_name(x) for x in df['feature']], fontsize=10) + sns.despine(top=True, right=True, left=False, bottom=False) + plt.tight_layout() + return fig + + +def plot_shap_analysis(second_model, processed_scaled, feature_names, second_model_name): + """Отображение SHAP анализа для tree-based моделей""" + import streamlit as st + st.markdown("---") + st.subheader(f"⚡ Детальный анализ: {second_model_name} (SHAP)") + + with st.spinner("🔄 Рассчитываем SHAP значения..."): + try: + import shap + + # Создаем explainer и считаем SHAP + explainer = shap.TreeExplainer(second_model) + shap_values = explainer.shap_values(processed_scaled) + + # Для бинарной классификации + if isinstance(shap_values, list): + shap_values = shap_values[1] + + # 1. Waterfall plot + fig, ax = plt.subplots(figsize=(12, 7)) + shap.waterfall_plot( + shap.Explanation( + values=shap_values[0], + base_values=explainer.expected_value, + data=processed_scaled.iloc[0].values, + feature_names=feature_names + ), + show=False, + ) + plt.tight_layout() + st.pyplot(fig) + + # 2. Объяснение как читать график + with st.expander("📋 Как читать SHAP график?"): + st.markdown(""" + - **f(x)** = итоговое предсказание модели + - **base value** = среднее предсказание по всем клиентам + - 🔴 Красное → признаки, повышающие риск + - 🔵 Синее → признаки, снижающие риск + """) + + # 3. Таблица с SHAP значениями + shap_df = pd.DataFrame({ + 'feature': feature_names, + 'shap_value': shap_values[0], + 'abs_shap': abs(shap_values[0]) + }).sort_values('abs_shap', ascending=False) + + shap_df['description'] = shap_df['feature'].apply(get_feature_display_name) + + st.markdown("### 📋 Факторы, влияющие на решение:") + + col1, col2 = st.columns(2) + + with col1: + pos = shap_df[shap_df['shap_value'] > 0].head(5) + if len(pos) > 0: + st.markdown("**🔴 Повышают риск:**") + for _, row in pos.iterrows(): + st.markdown(f"- {row['description']}: +{row['shap_value']:.3f}") + + with col2: + neg = shap_df[shap_df['shap_value'] < 0].head(5) + if len(neg) > 0: + st.markdown("**🟢 Снижают риск:**") + for _, row in neg.iterrows(): + st.markdown(f"- {row['description']}: {row['shap_value']:.3f}") + + with st.expander("📋 Все SHAP значения"): + display_df = shap_df[['feature', 'description', 'shap_value']].copy() + display_df.columns = ['Признак', 'Описание', 'SHAP'] + display_df['SHAP'] = display_df['SHAP'].round(3) + st.dataframe(display_df.sort_values('SHAP', ascending=False), width='stretch') + + except Exception as e: + st.error(f"❌ Ошибка SHAP: {e}") + st.info("Установите shap: `pip install shap`") diff --git a/app/pages/__pycache__/application.cpython-311.pyc b/app/pages/__pycache__/application.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e95c5298464fb902ab504f6ecba15da1d4f5669e Binary files /dev/null and b/app/pages/__pycache__/application.cpython-311.pyc differ diff --git a/app/pages/__pycache__/simulation.cpython-311.pyc b/app/pages/__pycache__/simulation.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dc46ab9369af7a640d1a6b23d191fa04a3ddfded Binary files /dev/null and b/app/pages/__pycache__/simulation.cpython-311.pyc differ diff --git a/app/pages/application.py b/app/pages/application.py new file mode 100644 index 0000000000000000000000000000000000000000..a51c17673e808bdff903387dcea96e6f594c571e --- /dev/null +++ b/app/pages/application.py @@ -0,0 +1,329 @@ +import streamlit as st +import pandas as pd +import os +from app.utils.data_loader import load_artifacts +from app.models.escalation import escalation_decision +from app.models.interpretation import ( + interpret_lr, plot_feature_importance_sns, + plot_marginal_effects_sns, plot_shap_analysis, + get_feature_display_name +) +from app.utils.credit_preprocessor import check_business_rules + +# Пути +PROJECT_PATH = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) +MODELS_PATH = os.path.join(PROJECT_PATH, 'models/best/train_150/') +PREPROCESSOR_PATH = os.path.join(PROJECT_PATH, 'preprocessors/') + + +def main(): + st.title("🏦 Кредитный скоринг - Анкета") + + # Загрузка артефактов + preprocessor, scaler, models = load_artifacts(MODELS_PATH, PREPROCESSOR_PATH) + + # Инициализация статистики + if 'stats' not in st.session_state: + st.session_state.stats = { + 'total': 0, + 'manual': 0, + 'lr_confident': 0, + 'second_used': 0, + 'second_confident': 0, + 'approved': 0, + 'declined': 0 + } + + if 'step' not in st.session_state: + st.session_state.step = 'input' + + # ВВОД ДАННЫХ + + if st.session_state.step == 'input': + st.header("📋 Анкета заемщика") + + with st.form("credit_form"): + st.subheader("👤 Личная информация") + col1, col2 = st.columns(2) + with col1: + age = st.number_input("Возраст", 0, 150, 35) + with col2: + dependents = st.number_input("Иждивенцы", 0, 20, 0) + + st.subheader("💰 Ежемесячный доход") + income_method = st.radio("Способ указания дохода", ["Слайдер (до 20,000$)", "Точное значение"], + horizontal=True) + + st.subheader("💳 Ежемесячные платежи") + debt_method = st.radio("Способ указания платежей", ["Слайдер (до 10,000$)", "Точное значение"], + horizontal=True) + + st.subheader("📊 Кредитная история") + credit_lines = st.number_input("Открытых кредитов и карт", 0, 100, 5) + real_estate = st.number_input("Кредитов под залог недвижимости", 0, 100, 1) + + st.subheader("📈 Использование лимитов") + util_method = st.radio("Уровень использования", + ["Норма (0-100%)", "Овердрафт (100-200%)", "Экстремальный (>200%)"], horizontal=True) + + st.subheader("⏱️ Просрочки за последние 2 года") + col1, col2, col3 = st.columns(3) + with col1: + late_30_59 = st.number_input("30-59 дней", 0, 100, 0) + with col2: + late_60_89 = st.number_input("60-89 дней", 0, 100, 0) + with col3: + late_90 = st.number_input("90+ дней", 0, 100, 0) + + submitted = st.form_submit_button("➡️ Далее: указать точные значения") + + if submitted: + st.session_state.update({ + 'age': age, 'dependents': dependents, 'income_method': income_method, + 'debt_method': debt_method, 'credit_lines': credit_lines, + 'real_estate': real_estate, 'util_method': util_method, + 'late_30_59': late_30_59, 'late_60_89': late_60_89, 'late_90': late_90 + }) + st.session_state.step = 'values' + st.rerun() + + + # ВВОД ТОЧНЫХ ЗНАЧЕНИЙ + + elif st.session_state.step == 'values': + st.header("💰 Укажите точные значения") + + with st.form("values_form"): + col1, col2 = st.columns(2) + with col1: + st.subheader("Доход") + if st.session_state.income_method == "Слайдер (до 20,000$)": + monthly_income = st.slider("Ежемесячный доход ($)", 0, 20000, 5000) + else: + monthly_income = st.number_input("Ежемесячный доход ($)", 0, 1000000, 5000) + + with col2: + st.subheader("Платежи") + if st.session_state.debt_method == "Слайдер (до 10,000$)": + monthly_debt = st.slider("Ежемесячные платежи ($)", 0, 10000, 1500) + else: + monthly_debt = st.number_input("Ежемесячные платежи ($)", 0, 1000000, 1500) + + st.subheader("📈 Использование лимитов") + if st.session_state.util_method == "Норма (0-100%)": + util_value = st.slider("Процент использования", 0, 100, 20) + utilization = util_value / 100 + elif st.session_state.util_method == "Овердрафт (100-200%)": + util_value = st.slider("Процент использования", 100, 200, 120) + utilization = util_value / 100 + else: + st.warning("Экстремальное использование (>200%) - автоматический ручной разбор") + utilization = st.number_input("Процент использования", 200, 1000, 200) / 100 + + submitted = st.form_submit_button("✅ Получить решение") + + # САЙДБАР + with st.sidebar: + st.markdown("---") + st.subheader("⚙️ Настройки") + + with st.expander("🎯 Пороги уверенности", expanded=False): + threshold = st.slider("Порог одобрения", 0.3, 0.7, 0.5, 0.05) + lr_margin = st.slider("Отступ LR", 0.2, 0.5, 0.35, 0.05) + second_margin = st.slider("Отступ второй модели", 0.2, 0.5, 0.4, 0.05) + + with st.expander("🤖 Выбор модели", expanded=False): + available_models = [name for name in models.keys() if name != 'Logistic Regression'] + second_model_name = st.selectbox("Модель для эскалации", available_models) + + with st.expander("📊 Статистика", expanded=False): + stats = st.session_state.stats + if stats['total'] > 0: + st.metric("Всего заявок", stats['total']) + st.metric("Ручной разбор", f"{stats['manual'] / stats['total']:.1%}") + st.metric("LR уверена", f"{stats['lr_confident'] / stats['total']:.1%}") + if stats['second_used'] > 0: + st.metric("Вторая модель уверена", + f"{stats['second_confident'] / stats['second_used']:.1%}") + + if st.button("🔄 Сброс"): + st.session_state.stats = {'total': 0, 'manual': 0, 'lr_confident': 0, + 'second_used': 0, 'second_confident': 0, + 'approved': 0, 'declined': 0} + st.rerun() + else: + st.info("Нет данных") + + with st.expander("ℹ️ О проекте", expanded=False): + st.markdown(f""" + **Модели:** + - Logistic Regression + - {', '.join(available_models)} + + **AUC:** 0.8578 (LR), ~0.87 (остальные) + """) + + st.session_state.threshold = threshold + st.session_state.lr_margin = lr_margin + st.session_state.second_margin = second_margin + st.session_state.second_model_name = second_model_name + + if submitted: + debt_ratio = monthly_debt / monthly_income if monthly_income > 0 else monthly_debt + + # Подготовка данных (ОДИН РАЗ) + input_data = pd.DataFrame([{ + 'RevolvingUtilizationOfUnsecuredLines': utilization, + 'age': st.session_state.age, + 'NumberOfTime30-59DaysPastDueNotWorse': st.session_state.late_30_59, + 'DebtRatio': debt_ratio, + 'MonthlyIncome': monthly_income, + 'NumberOfOpenCreditLinesAndLoans': st.session_state.credit_lines, + 'NumberOfTimes90DaysLate': st.session_state.late_90, + 'NumberRealEstateLoansOrLines': st.session_state.real_estate, + 'NumberOfTime60-89DaysPastDueNotWorse': st.session_state.late_60_89, + 'NumberOfDependents': st.session_state.dependents + }]) + + st.markdown("---") + + with st.spinner("🔄 Анализ заявки..."): + lr_model = models['Logistic Regression'] + second_model = models[second_model_name] + + # Единый вызов эскалации (включает бизнес-правила) + decisions, manual_mask, task = escalation_decision( + input_data, + lr_model, + second_model, + second_model_name, + threshold=st.session_state.threshold, + lr_margins=[st.session_state.lr_margin], + second_margins=[st.session_state.second_margin], + preprocessor=preprocessor, + scaler=scaler + ) + decision = decisions[0] + + # Для интерпретации LR нужны обработанные данные + processed = preprocessor.transform(input_data) + processed_scaled = scaler.transform(processed) + + # Обновление статистики + st.session_state.stats['total'] += 1 + if decision['needs_review']: + st.session_state.stats['manual'] += 1 + else: + if decision['final_decision'] == 0: + st.session_state.stats['approved'] += 1 + else: + st.session_state.stats['declined'] += 1 + + if decision.get('lr_confident', False): + st.session_state.stats['lr_confident'] += 1 + + if decision.get('second_used', False): + st.session_state.stats['second_used'] += 1 + if decision.get('second_confident', False): + st.session_state.stats['second_confident'] += 1 + + # ОТОБРАЖЕНИЕ РЕЗУЛЬТАТОВ + st.subheader("🔄 Цепочка принятия решения") + for step in decision['decision_path']: + st.write(step) + + col1, col2 = st.columns(2) + with col1: + st.markdown("**🏦 Logistic Regression**") + st.metric("Вероятность", f"{decision['lr_proba']:.1%}") + st.write(f"Отступ: {decision['lr_margin']:.1%}") + if decision['lr_confident']: + st.success("✅ Уверена") + else: + st.warning("⚠️ Не уверена") + + with col2: + st.markdown(f"**⚡ {second_model_name}**") + if decision['second_used']: + st.metric("Вероятность", f"{decision['second_proba']:.1%}") + st.write(f"Отступ: {decision['second_margin']:.1%}") + if decision['second_confident']: + st.success("✅ Уверен") + else: + st.warning("⚠️ Не уверен") + else: + st.info("⏳ Не вызывался") + + st.markdown("---") + if decision['needs_review']: + st.warning("👨‍💼 **РУЧНОЙ РАЗБОР**") + st.info("Модели не уверены - требуется проверка специалистом") + else: + col1, col2 = st.columns(2) + with col1: + if decision['final_decision'] == 0: + st.success("✅ **КРЕДИТ ОДОБРЕН**") + else: + st.error("❌ **КРЕДИТ НЕ ОДОБРЕН**") + with col2: + st.metric("Модель", decision['model_used']) + + # ДЕТАЛЬНЫЙ АНАЛИЗ LR + st.markdown("---") + st.subheader("🔍 Детальный анализ: Logistic Regression") + + feature_names = processed_scaled.columns.tolist() + interpretation = interpret_lr(processed_scaled, lr_model, feature_names) + + tab1, tab2 = st.tabs(["📊 Вклад в логит", "📈 Влияние на вероятность"]) + + with tab1: + st.markdown("🔴 Положительный вклад = ↑ риск, 🟢 Отрицательный = ↓ риск") + fig1 = plot_feature_importance_sns(interpretation['logit_contributions']) + st.pyplot(fig1) + + with st.expander("📋 Все вклады"): + display_df = interpretation['logit_contributions'][ + ['feature', 'value', 'coefficient', 'logit_contribution']].copy() + display_df['Описание'] = display_df['feature'].apply(get_feature_display_name) + display_df = display_df[['Описание', 'value', 'coefficient', 'logit_contribution']] + display_df.columns = ['Признак', 'Значение', 'Коэф', 'Вклад'] + display_df = display_df.round(3) + st.dataframe(display_df) + + with tab2: + st.markdown("🔴 Положительное = фактор ↑ риск, 🟢 Отрицательное = ↓ риск") + fig2 = plot_marginal_effects_sns(interpretation['marginal_effects']) + st.pyplot(fig2) + + with st.expander("📋 Все эффекты"): + display_df = interpretation['marginal_effects'][['feature', 'marginal_effect']].copy() + display_df['Описание'] = display_df['feature'].apply(get_feature_display_name) + display_df = display_df[['Описание', 'marginal_effect']] + display_df.columns = ['Признак', 'Влияние'] + display_df['Влияние'] = display_df['Влияние'].map('{:.1%}'.format) + st.dataframe(display_df) + + st.info(f"Итоговая вероятность дефолта (LR): {interpretation['probability']:.1%}") + + # ДЕТАЛЬНЫЙ АНАЛИЗ ВТОРОЙ МОДЕЛИ (SHAP для tree-based) + if decision['second_used'] and second_model_name in ['XGBoost', 'LightGBM', 'Random Forest', 'CatBoost']: + plot_shap_analysis(second_model, processed_scaled, feature_names, second_model_name) + + # КНОПКА НАЗАД + if st.button("◀️ Вернуться к выбору способов"): + st.session_state.step = 'input' + st.rerun() + + st.markdown("---") + col1, col2, col3 = st.columns([1, 2, 1]) + with col2: + if st.button("🏠 На главную", use_container_width=True): + st.switch_page("main.py") + + st.markdown("---") + st.caption("🏦 GiveMeSomeCredit - Интерпретируемый кредитный скоринг | Модели: Logistic Regression + выбор") + + +if __name__ == "__main__": + main() diff --git a/app/pages/simulation.py b/app/pages/simulation.py new file mode 100644 index 0000000000000000000000000000000000000000..ed5961bbd75193b8973e70fc9007745320494061 --- /dev/null +++ b/app/pages/simulation.py @@ -0,0 +1,345 @@ +import streamlit as st +import pandas as pd +import numpy as np +import os +import sys +import tempfile +import time +from datetime import datetime +from PIL import Image +import matplotlib.pyplot as plt + +# Остальные импорты... + +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from app.utils.data_loader import load_artifacts +from app.simulation.core.traffic_generator import TrafficGenerator +from app.simulation.core.processor import ApplicationProcessor +from app.simulation.controllers.pid import PIDController +from app.simulation.visualization.plots import ( + plot_queue_dynamics, + plot_specialist_load, + plot_inflow, + plot_parameters_history, + plot_detailed_decisions +) +# ============================================================================ +# БЛОК АНИМАЦИИ: Импорт функций для визуализации +# ============================================================================ +from app.simulation.visualization.animation import create_simulation_video + +# ============================================================================ + + +def minutes_to_time(minutes, start_time="00:00"): + """Преобразует минуты от старта в строку времени ЧЧ:ММ""" + start_hour, start_min = map(int, start_time.split(':')) + total_minutes = start_hour * 60 + start_min + minutes + hour = (total_minutes // 60) % 24 + minute = total_minutes % 60 + return f"{hour:02d}:{minute:02d}" + + +def main(): + st.title("📊 Симуляция работы системы") + + # Загрузка артефактов + PROJECT_PATH = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) + MODELS_PATH = os.path.join(PROJECT_PATH, 'models/best/train_150/') + PREPROCESSOR_PATH = os.path.join(PROJECT_PATH, 'preprocessors/') + TEST_DATA_PATH = os.path.join(PROJECT_PATH, 'datasets/cs-test.csv') + + preprocessor, scaler, models = load_artifacts(MODELS_PATH, PREPROCESSOR_PATH) + + available_models = [name for name in models.keys() if name != 'Logistic Regression'] + + # В сайдбаре добавляем выбор + st.sidebar.subheader("🤖 Выбор модели") + second_model_name = st.sidebar.selectbox( + "Вторая модель для эскалации", + available_models, + index=0 + ) + + # Параметры симуляции + st.sidebar.header("⚙️ Параметры") + # ============================================================================ + # БЛОК АНИМАЦИИ: Ограничение количества специалистов до 400 для таблицы 20x20 + # ============================================================================ + specialists_count = st.sidebar.slider("Количество специалистов (модели)", 10, 400, 100, 10) + # ============================================================================ + business_specialists_count = st.sidebar.slider("Количество экспертов (бизнес-правила)", 1, 100, 30, 1) + + business_time = st.sidebar.slider("Время обработки бизнес правил(мин)", 5, 30, 15, 5) + base_time = st.sidebar.slider("Базовое время обработки (мин)", 2, 15, 5) + + target_load = st.sidebar.slider( + "Целевая загрузка специалистов", 0.5, 1.0, 0.8, 0.05, + help="0.8 = 80% - оставляем запас на пики") + + st.sidebar.subheader("🎯 Порог одобрения") + fixed_threshold = st.sidebar.slider( + "Порог (фиксированный)", + 0.3, 0.7, 0.5, 0.05, + help="Порог одобрения - стратегический параметр, не меняется PID" + ) + + st.sidebar.subheader("🎯 Начальные отступы (%)") + + lr_low_pct = st.sidebar.slider("LR нижний отступ (% от порога)", 0, 100, 20, 5, + help="% от расстояния между 0 и порогом") + lr_high_pct = st.sidebar.slider("LR верхний отступ (% от 1-порога)", 0, 100, 20, 5, + help="% от расстояния между порогом и 1") + second_low_pct = st.sidebar.slider("Вторая модель нижний (%)", 0, 100, 20, 5) + second_high_pct = st.sidebar.slider("Вторая модель верхний (%)", 0, 100, 20, 5) + + # Преобразуем проценты в абсолютные значения + init_lr_low = fixed_threshold * lr_low_pct / 100 + init_lr_high = (1 - fixed_threshold) * lr_high_pct / 100 + init_second_low = fixed_threshold * second_low_pct / 100 + init_second_high = (1 - fixed_threshold) * second_high_pct / 100 + + # Параметры PID + st.sidebar.subheader("🎛️ PID регулятор") + use_pid = st.sidebar.checkbox("Включить PID", value=True) + + # ============================================================================ + # БЛОК АНИМАЦИИ: Переключатель для создания GIF + # ============================================================================ + st.sidebar.subheader("🎬 Анимация") + create_gif = st.sidebar.checkbox("Создать GIF после симуляции", value=False) + gif_fps = st.sidebar.slider("FPS для GIF", 5, 30, 10, 5) + # ============================================================================ + + if use_pid: + kp = st.sidebar.slider("P (пропорциональный)", 0.0, 1.0, 0.33) + ki = st.sidebar.slider("I (интегральный)", 0.0, 1.0, 0.03) + kd = st.sidebar.slider("D (дифференциальный)", 0.0, 1.0, 0.22) + w_load = st.sidebar.slider("Вес загрузки", 0.0, 1.0, 0.3) + + # Кнопка запуска + if st.button("🎬 Запустить симуляцию 24 часа"): + with st.spinner(f"Загрузка данных и симуляция..."): + # 1. Загружаем тестовый датасет + test_df = pd.read_csv(TEST_DATA_PATH) + if 'SeriousDlqin2yrs' in test_df.columns: + test_df = test_df.drop(columns=['SeriousDlqin2yrs']) + test_pool = test_df.to_dict('records') + + # 2. Генерируем распределение заявок по минутам + current_time = datetime.now() + start_hour = current_time.hour + start_minute = current_time.minute + + gen = TrafficGenerator(total_applications=len(test_pool)) + minute_counts = gen.generate_minute_counts(start_hour=start_hour, start_minute=start_minute) + + # Сохраняем для графиков + st.session_state.start_time = f"{start_hour:02d}:{start_minute:02d}" + st.session_state.minute_counts = minute_counts + + # 3. Создаём процессор + processor = ApplicationProcessor( + lr_model=models['Logistic Regression'], + second_model=models[second_model_name], + second_model_name=second_model_name, + specialists_count=specialists_count, + business_specialists_count=business_specialists_count, + base_processing_time=base_time, + business_processing_time=business_time + ) + + # 4. Создаём PID если нужно + if use_pid: + pid = PIDController( + init_threshold=fixed_threshold, + kp_load=kp, ki_load=ki, kd_load=kd, + load_weight=w_load, + init_lr_low=init_lr_low, + init_lr_high=init_lr_high, + init_second_low=init_second_low, + init_second_high=init_second_high, + target_load=target_load + ) + else: + pid = None + + # 5. Симуляция по минутам + pool_copy = test_pool.copy() + idx = 0 + progress_bar = st.progress(0) + n_steps = len(minute_counts) + + # ============================================================================ + # БЛОК АНИМАЦИИ: Сбор данных для кадров + # ============================================================================ + animation_frames = [] # список для хранения кадров анимации + # ============================================================================ + + for step, n_apps in enumerate(minute_counts): + # Берём заявки из пула + batch = pool_copy[idx:idx + n_apps] + idx += n_apps + + # Получаем текущие параметры + if pid: + margins = pid.get_margins() + lr_margins = [margins['lr_low'], margins['lr_high']] + second_margins = [margins['second_low'], margins['second_high']] + threshold = fixed_threshold + else: + lr_margins = [0.35] + second_margins = [0.4] + threshold = fixed_threshold + + # Обрабатываем батч + result = processor.process_batch( + batch, preprocessor, scaler, + threshold=threshold, + lr_margins=lr_margins, + second_margins=second_margins, + current_time=step + ) + + # Обновляем PID + if pid: + load = result['specialists_busy'] / specialists_count + pid.update(load) + + # ============================================================================ + # БЛОК АНИМАЦИИ: Сохраняем кадр каждые 10 минут (чтобы не было 1440 кадров) + # ============================================================================ + # --- Внутри цикла симуляции в simulation.py --- + # Записываем КАЖДУЮ минуту для плавности + if step % 1 == 0 or step == n_steps - 1: + specialist_states = processor.specialists.copy() + + frame_data = { + 'time': step, + 'step': step, # Добавь это поле для совместимости с кодом видео + 'time_str': minutes_to_time(step, st.session_state.start_time), + 'inflow': n_apps, + 'inflow_history': st.session_state.minute_counts[:step + 1], + 'load_history': [v / specialists_count for v in processor.stats['specialist_busy'][:step + 1]], + 'queue': result['queue_size'], + 'business_queue': result.get('business_queue_size', 0), + 'load': load if pid else 0, + 'specialist_states': specialist_states, + 'cumulative': { + 'total_processed': processor.stats['total_processed'], + 'auto_approved': processor.stats['auto_approved'], + 'auto_declined': processor.stats['auto_declined'], + 'manual_processed': processor.stats['manual_processed'], + 'business_manual_processed': processor.stats.get('business_manual_processed', 0) + } + } + animation_frames.append(frame_data) + # ============================================================================ + + # Обновляем прогресс + progress_bar.progress((step + 1) / n_steps) + + # 6. Сохраняем результаты + st.session_state.processor = processor + st.session_state.pid_history = pid.get_history() if pid else None + st.session_state.simulation_done = True + st.session_state.batch_stats = processor.batch_stats + # ============================================================================ + # БЛОК АНИМАЦИИ: Сохраняем кадры в session_state + # ============================================================================ + st.session_state.animation_frames = animation_frames + # ============================================================================ + + # Отображение результатов + if st.session_state.get('simulation_done', False): + st.success("✅ Симуляция завершена!") + + stats = st.session_state.processor.stats + + # Быстрая статистика + col1, col2, col3, col4, col5 = st.columns(5) + col1.metric("Всего заявок", stats['total_processed']) + col2.metric("Одобрено авто", stats['auto_approved']) + col3.metric("Отказ авто", stats['auto_declined']) + col4.metric("Ручной разбор", stats['manual_processed']) + manual_rate = stats['manual_sent'] / stats['total_processed'] * 100 if stats['total_processed'] > 0 else 0 + col5.metric("Ручной разбор %", f"{manual_rate:.1f}%") + + # Графики - ТОЛЬКО ВЫЗОВЫ ФУНКЦИЙ ИЗ plots.py + st.subheader("📈 Графики") + + # Очереди + st.pyplot(plot_queue_dynamics( + queue_history=stats['queue_history'], + business_queue_history=stats.get('business_queue_history'), + start_time=st.session_state.get('start_time', '00:00') + )) + plt.close() + + # Загрузка специалистов + st.pyplot(plot_specialist_load( + specialist_busy_history=stats['specialist_busy'], + specialists_count=specialists_count, + start_time=st.session_state.get('start_time', '00:00') + )) + plt.close() + st.pyplot(plot_inflow( + minute_counts=st.session_state.minute_counts, + start_time=st.session_state.get('start_time', '00:00') + )) + plt.close() + # Детальный анализ решений + st.pyplot(plot_detailed_decisions( + batch_stats=st.session_state.batch_stats, + second_model_name=second_model_name, + start_time=st.session_state.get('start_time', '00:00') + )) + plt.close() + # Параметры PID + st.pyplot(plot_parameters_history( + pid_history=st.session_state.pid_history, + second_model_name=second_model_name, + start_time=st.session_state.get('start_time', '00:00') + )) + plt.close() + # ============================================================================ + # НОВЫЙ БЛОК: Генерация видео (Стратегия для HuggingFace) + # ============================================================================ + if st.session_state.get('animation_frames'): + st.divider() + st.subheader("🎥 Настройки видео-отчета") + + col_v1, col_v2 = st.columns(2) + with col_v1: + # Слайдер для шага кадров (среза) + v_step = st.slider("Шаг кадров (1 = каждая минута)", 1, 30, 1, + help="Чем меньше шаг, тем плавнее видео, но дольше рендеринг") + with col_v2: + # Слайдер для FPS + v_fps = st.slider("Скорость видео (FPS)", 10, 60, 24, + help="Количество кадров в секунду") + + if st.button("🎬 Сгенерировать видео", type="primary", use_container_width=True): + with st.spinner("Рендеринг видео..."): + from app.simulation.visualization.animation import create_simulation_video + + # Используем выбранные в слайдерах параметры + video_path = create_simulation_video( + st.session_state.animation_frames[::v_step], + specialists_count, + second_model_name, + fps=v_fps # Передаем FPS в функцию + ) + st.video(video_path) + st.success("✅ Видео готово! Вы можете его скачать или перематывать.") + + # --- ВОТ ЭТОТ БЛОК У ТЕБЯ УЖЕ ЕСТЬ В КОНЦЕ ФАЙЛА --- + st.write("") + col1, col2, col3 = st.columns([1, 2, 1]) + with col2: + if st.button("🏠 На главную", use_container_width=True): + st.switch_page("main.py") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/app/simulation/.DS_Store b/app/simulation/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..35cb044494b4a3e60ca56fc179829cb2a84c6a4a Binary files /dev/null and b/app/simulation/.DS_Store differ diff --git a/app/simulation/__init__.py b/app/simulation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/simulation/__pycache__/__init__.cpython-311.pyc b/app/simulation/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f05221ac03b1518fa37b3dac5893eaed0f0eb44d Binary files /dev/null and b/app/simulation/__pycache__/__init__.cpython-311.pyc differ diff --git a/app/simulation/controllers/__init__.py b/app/simulation/controllers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/simulation/controllers/__pycache__/__init__.cpython-311.pyc b/app/simulation/controllers/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..aaf94963a46d407448974f5efa0ac375e4d71142 Binary files /dev/null and b/app/simulation/controllers/__pycache__/__init__.cpython-311.pyc differ diff --git a/app/simulation/controllers/__pycache__/base.cpython-311.pyc b/app/simulation/controllers/__pycache__/base.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0c0626648814d15d4fc343b866f96416e903e85f Binary files /dev/null and b/app/simulation/controllers/__pycache__/base.cpython-311.pyc differ diff --git a/app/simulation/controllers/__pycache__/pid.cpython-311.pyc b/app/simulation/controllers/__pycache__/pid.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3fe209e3134817abb86000fb714484f6f0c076fc Binary files /dev/null and b/app/simulation/controllers/__pycache__/pid.cpython-311.pyc differ diff --git a/app/simulation/controllers/base.py b/app/simulation/controllers/base.py new file mode 100644 index 0000000000000000000000000000000000000000..4170ac18f40a6bd42ece7904231a001b9d2075fd --- /dev/null +++ b/app/simulation/controllers/base.py @@ -0,0 +1,28 @@ +from abc import ABC, abstractmethod + + +class BaseController(ABC): + """Базовый класс для всех контроллеров""" + + def __init__(self, name="Base"): + self.name = name + self.history = [] + + @abstractmethod + def update(self, current_state, target_state, dt=1.0): + """ + Рассчитывает новые параметры управления + + Параметры: + - current_state: текущее состояние системы (очередь, загрузка) + - target_state: целевое состояние + - dt: шаг времени + + Возвращает: + - новые пороги и отступы + """ + pass + + def get_margins(self, hour=None): + """Возвращает текущие отступы для LR и второй модели""" + pass \ No newline at end of file diff --git a/app/simulation/controllers/pid.py b/app/simulation/controllers/pid.py new file mode 100644 index 0000000000000000000000000000000000000000..5430889ce278ea6a555efab21fe6ab9423468cd6 --- /dev/null +++ b/app/simulation/controllers/pid.py @@ -0,0 +1,129 @@ +import numpy as np +import pandas as pd +from .base import BaseController + + +class PIDController(BaseController): + """PID-регулятор для управления отступами на основе загрузки специалистов""" + + def __init__(self, name="PID", + kp_load=0.1, ki_load=0.01, kd_load=0.05, + load_weight=1.0, + # Начальные значения параметров + init_threshold=0.5, + init_lr_low=0.3, init_lr_high=0.4, + init_second_low=0.35, init_second_high=0.45, + target_load=0.8): + super().__init__(name) + + # Коэффициенты PID для загрузки + self.kp_load = kp_load + self.ki_load = ki_load + self.kd_load = kd_load + + self.load_weight = load_weight + self.target_load = target_load + + # Состояния PID + self.prev_error_load = 0 + self.integral_load = 0 + + # Начальные параметры + self.init_threshold = init_threshold + self.init_lr_low = init_lr_low + self.init_lr_high = init_lr_high + self.init_second_low = init_second_low + self.init_second_high = init_second_high + + # Текущие параметры (отступы) + self.threshold = init_threshold + self.lr_low = init_lr_low + self.lr_high = init_lr_high + self.second_low = init_second_low + self.second_high = init_second_high + + # Границы отступов + self.bounds = { + 'lr_low': (0.05, self.threshold - 0.05), + 'lr_high': (0.05, 1 - self.threshold - 0.05), + 'second_low': (0.05, self.threshold - 0.05), + 'second_high': (0.05, 1 - self.threshold - 0.05) + } + + # Ограничение интеграла + self.integral_limit = 1.0 + + def update(self, current_load): + """ + current_load: текущая загрузка специалистов (0-1) + Остальные параметры оставлены для совместимости, но не используются + """ + # Ошибка по загрузке + error_load = self.target_load - current_load + + # PID для загрузки + P_load = self.kp_load * error_load + self.integral_load += error_load + self.integral_load = np.clip(self.integral_load, -self.integral_limit, self.integral_limit) + I_load = self.ki_load * self.integral_load + D_load = self.kd_load * (error_load - self.prev_error_load) + self.prev_error_load = error_load + + # Выход регулятора + output_load = P_load + I_load + D_load + output = self.load_weight * output_load + + # Адаптируем отступы + self._update_parameters(output) + + # Сохраняем историю + self.history.append({ + 'time': len(self.history), + 'error_load': error_load, + 'output': output, + 'threshold': self.threshold, + 'lr_low': self.lr_low, + 'lr_high': self.lr_high, + 'second_low': self.second_low, + 'second_high': self.second_high, + 'load': current_load, + }) + + return self.get_margins() + + def _update_parameters(self, output): + """Обновляет отступы на основе выхода регулятора""" + delta = output * 0.1 + self.lr_low = np.clip( + self.lr_low + delta, + self.bounds['lr_low'][0], + self.bounds['lr_low'][1] + ) + self.lr_high = np.clip( + self.lr_high + delta, + self.bounds['lr_high'][0], + self.bounds['lr_high'][1] + ) + self.second_low = np.clip( + self.second_low + delta, + self.bounds['second_low'][0], + self.bounds['second_low'][1] + ) + self.second_high = np.clip( + self.second_high + delta, + self.bounds['second_high'][0], + self.bounds['second_high'][1] + ) + + def get_margins(self, hour=None): + """Возвращает текущие отступы""" + return { + 'lr_low': self.lr_low, + 'lr_high': self.lr_high, + 'second_low': self.second_low, + 'second_high': self.second_high + } + + def get_history(self): + """Возвращает историю для визуализации""" + return pd.DataFrame(self.history) \ No newline at end of file diff --git a/app/simulation/core/__init__.py b/app/simulation/core/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/simulation/core/__pycache__/__init__.cpython-311.pyc b/app/simulation/core/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5fcb5c89eb00626bc09ebac2c5c2999814604176 Binary files /dev/null and b/app/simulation/core/__pycache__/__init__.cpython-311.pyc differ diff --git a/app/simulation/core/__pycache__/processor.cpython-311.pyc b/app/simulation/core/__pycache__/processor.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4ed8b7512ff2b1c11f09edce40825190c332d36c Binary files /dev/null and b/app/simulation/core/__pycache__/processor.cpython-311.pyc differ diff --git a/app/simulation/core/__pycache__/traffic_generator.cpython-311.pyc b/app/simulation/core/__pycache__/traffic_generator.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..68c301dd1e821e19f42513bbecccdbba7a3c1bef Binary files /dev/null and b/app/simulation/core/__pycache__/traffic_generator.cpython-311.pyc differ diff --git a/app/simulation/core/processor.py b/app/simulation/core/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..956717465d4b98fa7fd3c2b335757421225c330a --- /dev/null +++ b/app/simulation/core/processor.py @@ -0,0 +1,339 @@ +import numpy as np +import pandas as pd +from app.models.escalation import escalation_decision +from app.models.escalation import check_business_rules + + +def processing_time_function(lr_proba, second_proba, threshold=0.5, base_time=5, + lr_weight=1.0, second_weight=1.5): + """ + Генерирует время обработки для заявок, попавших в ручной разбор + """ + total_weight = lr_weight + second_weight + proba = (lr_proba * lr_weight + second_proba * second_weight) / total_weight + + margin = abs(proba - threshold) + max_margin = max(threshold, 1 - threshold) + uncertainty = 1 - (margin / max_margin) + + mean_time = base_time * (1 + 3 * uncertainty) + processing_time = np.random.exponential(scale=mean_time) + + return max(1, processing_time) + + +class ApplicationProcessor: + def __init__(self, lr_model, second_model, second_model_name, + specialists_count=5, # основные специалисты (модели) + business_specialists_count=2, # эксперты (бизнес-правила) + base_processing_time=5, + business_processing_time=10, # эксперты дольше копаются + lr_weight=1.0, second_weight=1.5): + self.lr_model = lr_model + self.second_model = second_model + self.second_model_name = second_model_name + self.specialists_count = specialists_count + self.business_specialists_count = business_specialists_count + self.base_processing_time = base_processing_time + self.business_processing_time = business_processing_time + self.lr_weight = lr_weight + self.second_weight = second_weight + + self.specialists = [0] * specialists_count + self.business_specialists = [0] * business_specialists_count # отдельный пул + self.manual_queue = [] # очередь от моделей + self.business_queue = [] # очередь от бизнес-правил + + self.stats = { + 'total_processed': 0, + 'auto_approved': 0, + 'auto_declined': 0, + 'manual_sent': 0, + 'manual_processed': 0, + 'business_manual_sent': 0, + 'business_manual_processed': 0, + 'queue_history': [], + 'business_queue_history': [], + 'wait_times': [], + 'business_wait_times': [], + 'specialist_busy': [], + 'business_specialist_busy': [], + 'business_rules_manual': 0, + 'business_rules_auto': 0 + } + self.batch_stats = [] + + def process_batch(self, applications_batch, preprocessor, scaler, + threshold, lr_margins, second_margins, current_time): + """ + Обрабатывает батч заявок за текущую минуту (батчевая версия) + """ + minute_results = { + 'new_apps': len(applications_batch), + 'auto_decisions': [], + 'new_manual': 0, + 'new_business_manual': 0, + 'processed_manual': 0, + 'processed_business_manual': 0, + 'queue_size': 0, + 'business_queue_size': 0, + 'specialists_busy': sum(1 for s in self.specialists if s > 0), + 'business_specialists_busy': sum(1 for s in self.business_specialists if s > 0), + 'business_rules': 0 + } + + # 1. Уменьшаем время работы специалистов + self.specialists = [max(0, s - 1) for s in self.specialists] + self.business_specialists = [max(0, s - 1) for s in self.business_specialists] + + if not applications_batch: + minute_results['queue_size'] = len(self.manual_queue) + minute_results['business_queue_size'] = len(self.business_queue) + self.stats['queue_history'].append(len(self.manual_queue)) + self.stats['business_queue_history'].append(len(self.business_queue)) + self.stats['specialist_busy'].append(minute_results['specialists_busy']) + self.stats['business_specialist_busy'].append(minute_results['business_specialists_busy']) + return minute_results + + # 2. Превращаем батч в DataFrame для удобства + df = pd.DataFrame(applications_batch) + + # 3. Применяем бизнес-правила ко всем заявкам (БАТЧЕВО) + manual_mask, auto_reject_mask, messages, auto_decisions = check_business_rules(df) + + # Сохраняем статистику по бизнес-правилам + business_manual_count = manual_mask.sum() + business_auto_count = auto_reject_mask.sum() + + # Инициализируем + n = len(applications_batch) + model_indices = [] + + # 4. Обрабатываем результаты бизнес-правил + for idx in range(n): + if manual_mask[idx]: + # Ручной разбор по бизнес-правилам - в отдельную очередь + self.business_queue.append({ + 'app': applications_batch[idx], + 'arrival_time': current_time, + 'reason': 'business_rules', + 'message': messages[idx], + 'lr_proba': None, + 'second_proba': None + }) + minute_results['new_business_manual'] += 1 + minute_results['business_rules'] += 1 + self.stats['business_rules_manual'] += 1 + self.stats['business_manual_sent'] += 1 + + elif auto_reject_mask[idx]: + # Автоматический отказ по бизнес-правилам + decision = { + 'final_decision': auto_decisions[idx], # всегда 1 + 'model_used': 'Business Rules', + 'probability': 1.0, + 'needs_review': False, + 'message': messages[idx] + } + minute_results['auto_decisions'].append(decision) + self.stats['auto_declined'] += 1 + self.stats['business_rules_auto'] += 1 + self.stats['total_processed'] += 1 + + else: + # Заявка идет в модели + model_indices.append(idx) + + # Инициализируем переменные для статистики моделей + lr_confident_count = 0 + second_confident_count = 0 + second_uncertain_count = 0 + + # 5. Батчевая обработка моделей + if model_indices: + # Берём только заявки, которые прошли бизнес-правила + df_models = df.iloc[model_indices].copy() + + # Формируем DataFrame для моделей + model_df = pd.DataFrame({ + 'RevolvingUtilizationOfUnsecuredLines': df_models['RevolvingUtilizationOfUnsecuredLines'], + 'age': df_models['age'], + 'NumberOfTime30-59DaysPastDueNotWorse': df_models['NumberOfTime30-59DaysPastDueNotWorse'], + 'DebtRatio': df_models['DebtRatio'].fillna(0), + 'MonthlyIncome': df_models['MonthlyIncome'].fillna(0), + 'NumberOfOpenCreditLinesAndLoans': df_models['NumberOfOpenCreditLinesAndLoans'], + 'NumberOfTimes90DaysLate': df_models['NumberOfTimes90DaysLate'], + 'NumberRealEstateLoansOrLines': df_models['NumberRealEstateLoansOrLines'], + 'NumberOfTime60-89DaysPastDueNotWorse': df_models['NumberOfTime60-89DaysPastDueNotWorse'], + 'NumberOfDependents': df_models['NumberOfDependents'].fillna(0) + }) + + # Вызываем escalation_decision для всего батча + batch_decisions, batch_manual_mask, stats = escalation_decision( + model_df, + self.lr_model, + self.second_model, + self.second_model_name, + threshold=threshold, + lr_margins=lr_margins, + second_margins=second_margins, + preprocessor=preprocessor, + scaler=scaler + ) + + # Сохраняем статистику из escalation_decision + lr_confident_count = stats['lr_confident'] + second_confident_count = stats['second_confident'] + second_uncertain_count = stats['second_uncertain'] + + # print(f"Статистика батча: бизнес-ручной={business_manual_count}, " + # f"бизнес-отказ={business_auto_count}, " + # f"LR уверен={lr_confident_count}, " + # f"вторая уверен={second_confident_count}, " + # f"вторая не уверен={second_uncertain_count}") + + # Распределяем результаты по исходным индексам + for local_idx, orig_idx in enumerate(model_indices): + decision = batch_decisions[local_idx] + + if decision['needs_review']: + self.manual_queue.append({ + 'app': applications_batch[orig_idx], + 'arrival_time': current_time, + 'reason': 'model_uncertainty', + 'decision': decision, + 'lr_proba': decision.get('lr_proba'), + 'second_proba': decision.get('second_proba') + }) + minute_results['new_manual'] += 1 + self.stats['manual_sent'] += 1 + else: + minute_results['auto_decisions'].append(decision) + if decision['final_decision'] == 0: + self.stats['auto_approved'] += 1 + else: + self.stats['auto_declined'] += 1 + + self.stats['total_processed'] += 1 + + # Сохраняем общую статистику батча + self.batch_stats.append({ + 'time': current_time, + 'business_manual': business_manual_count, + 'business_auto': business_auto_count, + 'lr_confident': lr_confident_count, + 'second_confident': second_confident_count, + 'second_uncertain': second_uncertain_count, + 'total_in_batch': len(applications_batch), + 'new_manual': minute_results['new_manual'], + 'new_business_manual': minute_results['new_business_manual'], + 'auto_total': len(minute_results['auto_decisions']) + }) + + # 6. Распределяем заявки из бизнес-очереди по свободным экспертам + for i in range(self.business_specialists_count): + if self.business_specialists[i] <= 0 and self.business_queue: + next_app = self.business_queue.pop(0) + + wait_time = current_time - next_app['arrival_time'] + self.stats['business_wait_times'].append(wait_time) + + # Эксперты обрабатывают бизнес-правила + proc_time = self.business_processing_time + + self.business_specialists[i] = proc_time + minute_results['processed_business_manual'] += 1 + self.stats['business_manual_processed'] += 1 + + # 7. Распределяем заявки из основной очереди по свободным специалистам + for i in range(self.specialists_count): + if self.specialists[i] <= 0 and self.manual_queue: + next_app = self.manual_queue.pop(0) + + wait_time = current_time - next_app['arrival_time'] + self.stats['wait_times'].append(wait_time) + + if next_app['reason'] == 'business_rules': + proc_time = self.business_processing_time + else: + # Используем функцию processing_time_function + proc_time = processing_time_function( + lr_proba=next_app.get('lr_proba', 0.5), + second_proba=next_app.get('second_proba', 0.5), + threshold=threshold, + base_time=self.base_processing_time, + lr_weight=self.lr_weight, + second_weight=self.second_weight + ) + + self.specialists[i] = proc_time + minute_results['processed_manual'] += 1 + self.stats['manual_processed'] += 1 + + minute_results['queue_size'] = len(self.manual_queue) + minute_results['business_queue_size'] = len(self.business_queue) + self.stats['queue_history'].append(len(self.manual_queue)) + self.stats['business_queue_history'].append(len(self.business_queue)) + self.stats['specialist_busy'].append(minute_results['specialists_busy']) + self.stats['business_specialist_busy'].append(minute_results['business_specialists_busy']) + + return minute_results + + def load_test_dataset(self, filepath): + df = pd.read_csv(filepath) + if 'SeriousDlqin2yrs' in df.columns: + df = df.drop(columns=['SeriousDlqin2yrs']) + return df.to_dict('records') + + def get_queue_stats(self): + if self.stats['wait_times']: + avg_wait = np.mean(self.stats['wait_times']) + max_wait = np.max(self.stats['wait_times']) + else: + avg_wait = max_wait = 0 + + if self.stats['business_wait_times']: + avg_business_wait = np.mean(self.stats['business_wait_times']) + max_business_wait = np.max(self.stats['business_wait_times']) + else: + avg_business_wait = max_business_wait = 0 + + return { + 'current_queue': len(self.manual_queue), + 'current_business_queue': len(self.business_queue), + 'avg_wait_minutes': avg_wait, + 'max_wait_minutes': max_wait, + 'avg_business_wait_minutes': avg_business_wait, + 'max_business_wait_minutes': max_business_wait, + 'queue_history': self.stats['queue_history'], + 'business_queue_history': self.stats['business_queue_history'], + 'specialist_busy': self.stats['specialist_busy'], + 'business_specialist_busy': self.stats['business_specialist_busy'], + 'business_rules_split': { + 'manual': self.stats['business_rules_manual'], + 'auto': self.stats['business_rules_auto'] + } + } + + # def reset(self): + # self.specialists = [0] * self.specialists_count + # self.business_specialists = [0] * self.business_specialists_count + # self.manual_queue = [] + # self.business_queue = [] + # self.stats = { + # 'total_processed': 0, + # 'auto_approved': 0, + # 'auto_declined': 0, + # 'manual_sent': 0, + # 'manual_processed': 0, + # 'business_manual_sent': 0, + # 'business_manual_processed': 0, + # 'queue_history': [], + # 'business_queue_history': [], + # 'wait_times': [], + # 'business_wait_times': [], + # 'specialist_busy': [], + # 'business_specialist_busy': [], + # 'business_rules_manual': 0, + # 'business_rules_auto': 0 + # } \ No newline at end of file diff --git a/app/simulation/core/traffic_generator.py b/app/simulation/core/traffic_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..0d1dcd1be00ca827463555716bc77677e6777f53 --- /dev/null +++ b/app/simulation/core/traffic_generator.py @@ -0,0 +1,234 @@ +import numpy as np +import matplotlib.pyplot as plt +from datetime import datetime + + +class TrafficGenerator: + def __init__(self, total_applications=101503, random_seed=42): + self.total = total_applications + np.random.seed(random_seed) + + # Параметры интенсивности с провалом после обеда + self.intensity_params = { + 'background': 0.1, + 'day_center': 13, 'day_amplitude': 0.9, 'day_width': 2.5, # день поуже + 'evening_center': 19.5, 'evening_amplitude': 1.3, 'evening_width': 2.2, # вечер пораньше и пошире + 'afternoon_dip_center': 15.5, 'afternoon_dip_strength': 0.3, 'afternoon_dip_width': 1.5, + # провал после обеда + 'noise_level': 0.1 + } + + def _time_to_hours(self, time_tuple): + """Переводит (часы, минуты) в часы с дробной частью""" + return time_tuple[0] + time_tuple[1] / 60 + + def loan_intensity_periodic(self, t, impulses=None): + """ + Функция интенсивности с провалом после обеда + + t: время в часах (может быть дробным) + impulses: список словарей вида + [{'time': (16, 37), 'strength': 2.0}, ...] # время как (часы, минуты) + """ + t_cycle = t % 24 + + bg = self.intensity_params['background'] + + # Утренне-дневной пик (13:00) + day = self.intensity_params['day_amplitude'] * np.exp( + -(t_cycle - self.intensity_params['day_center']) ** 2 / + (2 * self.intensity_params['day_width'] ** 2) + ) + + # Вечерний пик (19:30) + evening_diff = np.minimum( + np.abs(t_cycle - self.intensity_params['evening_center']), + np.abs(t_cycle - self.intensity_params['evening_center'] + 24) + ) + evening = self.intensity_params['evening_amplitude'] * np.exp( + -(evening_diff) ** 2 / (2 * self.intensity_params['evening_width'] ** 2) + ) + + # Провал после обеда (15:30) + dip_diff = np.minimum( + np.abs(t_cycle - self.intensity_params['afternoon_dip_center']), + np.abs(t_cycle - self.intensity_params['afternoon_dip_center'] + 24) + ) + dip = -self.intensity_params['afternoon_dip_strength'] * np.exp( + -(dip_diff) ** 2 / (2 * self.intensity_params['afternoon_dip_width'] ** 2) + ) + + intensity = bg + day + evening + dip + intensity = np.maximum(intensity, 0.05) # не ниже минимума + + # Шум + if self.intensity_params['noise_level'] > 0: + noise = 1.0 + np.random.uniform( + -self.intensity_params['noise_level'], + self.intensity_params['noise_level'] + ) + intensity *= noise + + # Импульсы + if impulses: + for imp in impulses: + imp_time = self._time_to_hours(imp['time']) % 24 + # Используем гауссиану для плавного импульса (ширина ~30 минут) + imp_diff = np.minimum( + np.abs(t_cycle - imp_time), + np.abs(t_cycle - imp_time + 24) + ) + imp_factor = 1.0 + imp['strength'] * np.exp(-(imp_diff) ** 2 / (2 * 0.25 ** 2)) + intensity *= imp_factor + + return intensity + + def generate_minute_counts(self, start_hour=None, start_minute=0, impulses=None): + """ + Возвращает массив количества заявок на каждую минуту (1440 значений) + + start_hour: час старта (по умолчанию текущий) + start_minute: минута старта + impulses: список импульсов, например: + [{'time': (5, 30), 'strength': 2.0}, ...] # импульс в 5:30 силой 2.0 + """ + if start_hour is None: + now = datetime.now() + start_hour = now.hour + start_minute = now.minute + + start_time = start_hour + start_minute / 60 + + # Массив минут (от start_time до start_time + 24) + minutes = np.arange(0, 24, 1 / 60) + intensity_values = np.array([ + self.loan_intensity_periodic(start_time + m, impulses) + for m in minutes + ]) + + total_intensity = np.sum(intensity_values) + scale_factor = self.total / total_intensity + + minute_counts = np.floor(intensity_values * scale_factor).astype(int) + + # Распределяем остаток (чтоб точно сошлось общее число) + total_assigned = np.sum(minute_counts) + if total_assigned < self.total: + remainder = self.total - total_assigned + top_minutes = np.argsort(intensity_values)[-remainder:] + minute_counts[top_minutes] += 1 + + return minute_counts + + def generate_hourly_counts(self, start_hour=None, start_minute=0, impulses=None): + """ + Возвращает массив количества заявок по часам (24 значения) + """ + minute_counts = self.generate_minute_counts(start_hour, start_minute, impulses) + hourly_counts = [np.sum(minute_counts[i * 60:(i + 1) * 60]) for i in range(24)] + return hourly_counts + + def generate_random_impulses(self, n_impulses=1, min_strength=1.5, max_strength=3.0): + """ + Генерирует случайные импульсы + """ + impulses = [] + for _ in range(n_impulses): + hour = np.random.randint(0, 24) + minute = np.random.randint(0, 60) + strength = np.random.uniform(min_strength, max_strength) + impulses.append({'time': (hour, minute), 'strength': strength}) + return impulses + + def plot_distribution(self, start_hour=None, start_minute=0, impulses=None): + """Строит график распределения заявок по часам""" + hourly_counts = self.generate_hourly_counts(start_hour, start_minute, impulses) + + if start_hour is None: + start_hour = datetime.now().hour + + hours = [(start_hour + i) % 24 for i in range(24)] + sorted_pairs = sorted(zip(hours, hourly_counts)) + hours_sorted, counts_sorted = zip(*sorted_pairs) + + plt.figure(figsize=(14, 6)) + + # Цвета в зависимости от времени суток + colors = [] + for h in hours_sorted: + if 0 <= h <= 5: + colors.append('#2c3e50') # ночь + elif 6 <= h <= 11: + colors.append('#3498db') # утро + elif 12 <= h <= 16: + colors.append('#f39c12') # день (с провалом) + else: + colors.append('#e67e22') # вечер + + bars = plt.bar([str(h) for h in hours_sorted], counts_sorted, + alpha=0.8, color=colors, edgecolor='black', linewidth=1) + + # Средняя линия + mean_val = np.mean(counts_sorted) + plt.axhline(y=mean_val, color='red', linestyle='--', + alpha=0.7, linewidth=2, label=f'Среднее: {mean_val:.0f}') + + # Отметим импульсы на графике + if impulses: + for imp in impulses: + imp_hours = self._time_to_hours(imp['time']) % 24 + # Найдём ближайший час + closest_hour = min(hours_sorted, key=lambda x: abs(x - imp_hours)) + idx = list(hours_sorted).index(closest_hour) + plt.plot(idx, counts_sorted[idx], 'g*', markersize=15, + label=f'Импульс {imp["strength"]:.1f}x' if idx == 0 else '') + + # Отметим провал после обеда + dip_idx = [i for i, h in enumerate(hours_sorted) if 14 <= h <= 16] + if dip_idx: + plt.axvspan(dip_idx[0] - 0.4, dip_idx[-1] + 0.4, alpha=0.2, color='gray', + label='Послеобеденный спад') + + plt.xlabel('Час', fontsize=12) + plt.ylabel('Количество заявок', fontsize=12) + plt.title(f'Распределение заявок по часам (старт в {start_hour:02d}:{start_minute:02d})', + fontsize=14, fontweight='bold') + plt.grid(True, alpha=0.3, axis='y') + plt.legend(loc='upper right') + plt.xticks(rotation=45) + plt.tight_layout() + plt.show() + + # Статистика + print("\n📊 Статистика распределения:") + print(f" Всего заявок: {sum(counts_sorted)}") + print(f" Среднее: {mean_val:.0f} заявок/час") + print(f" Максимум: {max(counts_sorted)} заявок") + print(f" Минимум: {min(counts_sorted)} заявок") + + return hours_sorted, counts_sorted + + +# Пример использования +# if __name__ == "__main__": +# # Создаём генератор +# gen = TrafficGenerator(total_applications=110000) +# +# # 1. Без импульсов +# print("Без импульсов:") +# counts = gen.generate_minute_counts(start_hour=17) +# print(f"Всего минут: {len(counts)}") +# print(f"Всего заявок: {sum(counts)}") +# +# # 2. С импульсом в 5:30 утра +# impulses = [{'time': (5, 30), 'strength': 2.0}] +# print("\nС импульсом в 5:30:") +# counts = gen.generate_minute_counts(start_hour=17, impulses=impulses) +# +# # 3. Построить график +# gen.plot_distribution(start_hour=17, impulses=impulses) +# +# # 4. Случайные импульсы +# random_impulses = gen.generate_random_impulses(n_impulses=2) +# print("\nСлучайные импульсы:", random_impulses) +# gen.plot_distribution(start_hour=17, impulses=random_impulses) \ No newline at end of file diff --git a/app/simulation/visualization/__init__.py b/app/simulation/visualization/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/simulation/visualization/__pycache__/__init__.cpython-311.pyc b/app/simulation/visualization/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ef3a2640a504433372534c26fe12298bc1f56c9e Binary files /dev/null and b/app/simulation/visualization/__pycache__/__init__.cpython-311.pyc differ diff --git a/app/simulation/visualization/__pycache__/animation.cpython-311.pyc b/app/simulation/visualization/__pycache__/animation.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0a02b8bc0fd32084f4ce63f1dff6685369a7ae31 Binary files /dev/null and b/app/simulation/visualization/__pycache__/animation.cpython-311.pyc differ diff --git a/app/simulation/visualization/__pycache__/plots.cpython-311.pyc b/app/simulation/visualization/__pycache__/plots.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..79adb43bda8faca1c746dd259a06491403b4d386 Binary files /dev/null and b/app/simulation/visualization/__pycache__/plots.cpython-311.pyc differ diff --git a/app/simulation/visualization/animation.py b/app/simulation/visualization/animation.py new file mode 100644 index 0000000000000000000000000000000000000000..dfa4da9ec4166c1715ea716815f0eb56c6cafe86 --- /dev/null +++ b/app/simulation/visualization/animation.py @@ -0,0 +1,246 @@ +import plotly.graph_objects as go +from plotly.subplots import make_subplots +import numpy as np + + +def minutes_to_time(minutes, start_time="00:00"): + start_hour, start_min = map(int, start_time.split(':')) + total_minutes = start_hour * 60 + start_min + minutes + hour = (total_minutes // 60) % 24 + minute = total_minutes % 60 + return f"{hour:02d}:{minute:02d}" + + +def create_animation_frame_plotly(frame_data, specialists_count, second_model_name="XGBoost"): + # Фиксированная ось X для графиков + time_ticks = list(range(0, 1441, 180)) + time_labels = [minutes_to_time(t, "00:00") for t in time_ticks] + + fig = make_subplots( + rows=3, cols=2, + subplot_titles=('📈 Динамика входящего потока', '⚙️ Загрузка специалистов (%)', + '👥 МОНИТОРИНГ РАБОТЫ СПЕЦИАЛИСТОВ', '', + '📊 Сводная статистика обработки', '🎯 Оперативные показатели'), + specs=[ + [{'type': 'scatter'}, {'type': 'scatter'}], + [{'type': 'heatmap', 'colspan': 2}, None], + [{'type': 'table'}, {'type': 'scatter'}] + ], + row_heights=[0.25, 0.40, 0.35], + vertical_spacing=0.1, + ) + + # --- РЯД 1: ГРАФИКИ --- + inflow_h = frame_data.get('inflow_history', []) + load_h = frame_data.get('load_history', []) + + fig.add_trace(go.Scatter(y=inflow_h, fill='tozeroy', line=dict(color='#4361ee', width=2)), row=1, col=1) + fig.add_trace(go.Scatter(y=[l * 100 for l in load_h], fill='tozeroy', line=dict(color='#4cc9f0', width=2)), row=1, + col=2) + + for col in [1, 2]: + fig.update_xaxes(range=[0, 1440], tickvals=time_ticks, ticktext=time_labels, row=1, col=col) + fig.update_yaxes(rangemode="tozero", row=1, col=col) + + # --- РЯД 2: HEATMAP (Строго 20 ячеек в ширину) --- + states = np.array(frame_data['specialist_states']) + cols = 20 + rows = int(np.ceil(specialists_count / cols)) + + # Создаем матрицу, заполненную None (или NaN), чтобы пустые места не красились + z_matrix = np.full((rows, cols), np.nan) + for i, val in enumerate(states): + r, c = divmod(i, cols) + # Мапим значения: 0 -> 0.1 (голубой), 1-3 -> 0.4 (зеленый) и т.д. + if val == 0: + z_matrix[r, c] = 0.1 + elif val <= 3: + z_matrix[r, c] = 0.4 + elif val <= 7: + z_matrix[r, c] = 0.7 + else: + z_matrix[r, c] = 1.0 + + # Настраиваем цвета: NaN будет прозрачным/фоновым + colorscale = [ + [0.0, '#66ccff'], # Свободен (0) + [0.4, '#4ade80'], # 1-3 мин + [0.7, '#facc15'], # 4-7 мин + [1.0, '#f87171'] # 8+ мин + ] + + fig.add_trace(go.Heatmap( + z=z_matrix, colorscale=colorscale, showscale=False, + xgap=2, ygap=2, zmin=0, zmax=1, hoverinfo='none' + ), row=2, col=1) + + # Легенда над хитмапом + free = sum(1 for t in states if t <= 0) + legend = (f"Свободно: {free} | Свободен " + f" 1-3м 4-7м " + f" 8м+") + fig.add_annotation(text=legend, xref="paper", yref="paper", x=0.5, y=0.70, showarrow=False, font=dict(size=14)) + + # --- РЯД 3: ТАБЛИЦА (Формальная) --- + cum = frame_data['cumulative'] + fig.add_trace(go.Table( + header=dict(values=['Параметр', 'Значение'], fill_color='#1e293b', font=dict(color='white', size=15), + height=35), + cells=dict(values=[ + ['✅ Авто-одобрено', '❌ Авто-отказы', '👤 На рассмотрении (Manual)', 'ИТОГО ОБРАБОТАНО'], + [cum['auto_approved'], cum['auto_declined'], + cum['manual_processed'] + cum['business_manual_processed'], f"{cum['total_processed']}"] + ], align='left', font=dict(size=14), height=35, fill_color='#f8f9fa') + ), row=3, col=1) + + # --- ОПЕРАТИВНЫЕ ПОКАЗАТЕЛИ (Крупный заголовок) --- + q_models = frame_data['queue'] # Очередь к спецам + q_business = frame_data.get('business_queue', 0) # Бизнес-очередь + + # Расчет ожидания только для очереди моделей (как на левом графике) + avg_w = frame_data.get('avg_wait', 0) + + status_card = ( + f"МОНИТОРИНГ

" + f"" + f"👤 ОЧЕРЕДЬ (СПЕЦ): {q_models}

" + f"" + f"⚙️ Бизнес-правила: {q_business}

" + f"🕒 Время: {frame_data['time_str']}
" + f"⏳ Ожидание: {avg_w:.1f} мин" + ) + + fig.add_trace(go.Scatter(x=[0], y=[0], mode='text', text=[status_card], textfont=dict(size=16)), row=3, col=2) + + # Очистка осей + fig.update_xaxes(visible=False, row=2, col=1); + fig.update_yaxes(visible=False, row=2, col=1) + fig.update_xaxes(visible=False, row=3, col=2); + fig.update_yaxes(visible=False, row=3, col=2) + + # Фиксируем оси, чтобы график не "дышал" (это главная причина мерцания) + fig.update_yaxes(range=[0, 60], row=1, col=1) # Замени 60 на твой макс. поток + fig.update_yaxes(range=[0, 105], row=1, col=2) # Загрузка всегда до 100% + + fig.update_layout( + height=950, + margin=dict(t=80, b=40, l=50, r=50), + template="plotly_white", + showlegend=False, + # ОТКЛЮЧАЕМ анимации переходов, которые создают эффект мигания + transition_duration=0, + hovermode=False + ) + + # Это заставит Plotly обновлять только данные, не перерисовывая всё полотно + fig.layout.datarevision = frame_data['time'] + return fig + + +from matplotlib.animation import FFMpegWriter + +import matplotlib.pyplot as plt +import matplotlib.animation as animation +import tempfile +import numpy as np + +import matplotlib.pyplot as plt +import matplotlib.animation as animation +import tempfile +import numpy as np +import os + + +# Внести изменения в функцию create_simulation_video в animation.py +def create_simulation_video(frames, specialists_count, second_model_name, fps=24): + if not frames: + return None + + # Настройка стиля + plt.style.use('seaborn-v0_8-whitegrid') + fig, axes = plt.subplots(2, 2, figsize=(16, 10), facecolor='#f8f9fa') + plt.subplots_adjust(hspace=0.4, wspace=0.25) + plt.close() + + def update(i): + data = frames[i] + for ax in axes.flatten(): + ax.clear() + ax.set_facecolor('white') + + # 1. ДИНАМИКА ПОТОКА (Локализация) + y_inflow = data['inflow_history'] + axes[0, 0].fill_between(range(len(y_inflow)), y_inflow, color='#4361ee', alpha=0.3) + axes[0, 0].plot(range(len(y_inflow)), y_inflow, color='#4361ee', linewidth=2) + axes[0, 0].set_xlim(0, 1440) # Фиксация оси времени + axes[0, 0].set_title("ДИНАМИКА ПОТОКА (заявок/мин)", fontsize=12, fontweight='bold') + axes[0, 0].set_xlabel("Минуты симуляции") + + # 2. ЗАГРУЗКА СИСТЕМЫ + y_load = [v * 100 for v in data['load_history']] + axes[0, 1].fill_between(range(len(y_load)), y_load, color='#4cc9f0', alpha=0.3) + axes[0, 1].plot(range(len(y_load)), y_load, color='#4cc9f0', linewidth=2) + axes[0, 1].axhline(y=80, color='#f72585', linestyle='--', alpha=0.6) + axes[0, 1].set_xlim(0, 1440) + axes[0, 1].set_ylim(0, 110) + axes[0, 1].set_title(f"ЗАГРУЖЕННОСТЬ СПЕЦИАЛИСТОВ %: {y_load[-1]:.1f}%", fontsize=12, fontweight='bold') + + # 3. HEATMAP И ЛЕГЕНДА (Возвращаем информативность) + states = np.array(data['specialist_states']) + cols = 20 + rows = int(np.ceil(specialists_count / cols)) + z = np.zeros((rows, cols)) + for idx, val in enumerate(states[:rows * cols]): + z[idx // cols, idx % cols] = val + + im = axes[1, 0].imshow(z, cmap='RdYlGn_r', aspect='auto', vmin=0, vmax=10) + axes[1, 0].set_title(f"МОНИТОРИНГ: {specialists_count} СПЕЦИАЛИСТОВ", fontsize=12, fontweight='bold') + axes[1, 0].axis('off') + + # Добавляем текстовую легенду под хитмапом + legend_text = "Цвета: Зеленый (Свободен) → Желтый (3-5 мин) → Красный (8+ мин)" + axes[1, 0].text(0.5, -0.1, legend_text, ha='center', transform=axes[1, 0].transAxes, fontsize=10) + + # --- 4. РАЗДЕЛЕННЫЕ ОЧЕРЕДИ И СТАТИСТИКА --- + ax_stat = axes[1, 1] + ax_stat.clear() + ax_stat.axis('off') + + # Цвета для очередей (краснеют, если очередь > 50) + q_mod_color = '#991b1b' if data['queue'] > 50 else '#166534' + q_biz_color = '#991b1b' if data.get('business_queue', 0) > 50 else '#1e293b' + + # Две надписи очередей сверху + ax_stat.text(0.25, 0.9, "ОЧЕРЕДЬ\n(МОДЕЛИ)", fontsize=10, ha='center', fontweight='bold') + ax_stat.text(0.25, 0.78, f"{data['queue']}", fontsize=26, ha='center', fontweight='bold', color=q_mod_color) + + ax_stat.text(0.75, 0.9, "ОЧЕРЕДЬ\n(БИЗНЕС ПРАВИЛА)", fontsize=10, ha='center', fontweight='bold') + ax_stat.text(0.75, 0.78, f"{data.get('business_queue', 0)}", fontsize=26, ha='center', fontweight='bold', + color=q_biz_color) + + # Сводная таблица ниже + cum = data['cumulative'] + stats_text = ( + f"Итоговые показатели к {data['time_str']}\n" + f"--------------------------------------\n" + f"ОБРАБОТАНО ВСЕГО: {cum['total_processed']}\n" + f"Авто-одобрено: {cum['auto_approved']}\n" + f"Авто-отказы: {cum['auto_declined']}\n" + f"Ручной разбор (модель): {cum['manual_processed']}\n" + f"Ручной разбор (бизнес правила): {cum['business_manual_processed']}\n" + f"--------------------------------------\n" + f"Используемая модель: {second_model_name}" + ) + + ax_stat.text(0.5, 0.3, stats_text, fontsize=10, fontfamily='monospace', + ha='center', va='center', transform=ax_stat.transAxes, + bbox=dict(facecolor='#f8f9fa', alpha=1, boxstyle='round,pad=1', edgecolor='#dee2e6')) + + return axes.flatten() + + ani = animation.FuncAnimation(fig, update, frames=len(frames), interval=1000 / fps) + tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') + + writer = animation.FFMpegWriter(fps=fps, bitrate=2000, extra_args=['-vcodec', 'libx264', '-pix_fmt', 'yuv420p']) + ani.save(tmp_file.name, writer=writer) + return tmp_file.name \ No newline at end of file diff --git a/app/simulation/visualization/plots.py b/app/simulation/visualization/plots.py new file mode 100644 index 0000000000000000000000000000000000000000..aa1c2d0d14d72b0be645ba8c4cb76e49ce4d8101 --- /dev/null +++ b/app/simulation/visualization/plots.py @@ -0,0 +1,374 @@ +import matplotlib.pyplot as plt +import matplotlib.animation as animation +import numpy as np + + + +def minutes_to_time(minutes, start_time="00:00"): + """Преобразует минуты от старта в строку времени ЧЧ:ММ""" + start_hour, start_min = map(int, start_time.split(':')) + total_minutes = start_hour * 60 + start_min + minutes + hour = (total_minutes // 60) % 24 + minute = total_minutes % 60 + return f"{hour:02d}:{minute:02d}" + + +def plot_queue_dynamics(queue_history, business_queue_history=None, start_time="00:00"): + """ + Два отдельных графика для очередей с временной шкалой ЧЧ:ММ + """ + fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6)) + + # Создаем метки времени для каждого часа + total_minutes = len(queue_history) + hours = range(0, total_minutes, 60) # каждый час + hour_labels = [minutes_to_time(m, start_time) for m in hours] + + # График 1: Очередь моделей + ax1.plot(range(total_minutes), queue_history, 'b-', linewidth=1.5) + ax1.set_xticks(hours) + ax1.set_xticklabels(hour_labels, rotation=45) + ax1.set_xlabel('Время') + ax1.set_ylabel('Размер очереди') + ax1.set_title('Очередь моделей') + ax1.grid(True, alpha=0.3) + + # График 2: Очередь бизнес-правил + if business_queue_history and len(business_queue_history) > 0: + ax2.plot(range(total_minutes), business_queue_history, 'orange', linewidth=1.5) + ax2.set_xticks(hours) + ax2.set_xticklabels(hour_labels, rotation=45) + ax2.set_xlabel('Время') + ax2.set_ylabel('Размер очереди') + ax2.set_title('Очередь бизнес-правил') + ax2.grid(True, alpha=0.3) + else: + ax2.text(0.5, 0.5, 'Нет данных', ha='center', va='center', transform=ax2.transAxes) + ax2.set_title('Очередь бизнес-правил') + ax2.set_xlabel('Время') + + plt.tight_layout() + return plt + + +def plot_specialist_load(specialist_busy_history, specialists_count, start_time="00:00"): + """График загрузки специалистов с временной шкалой ЧЧ:ММ""" + load_percent = [busy / specialists_count * 100 for busy in specialist_busy_history] + + fig, ax = plt.subplots(figsize=(10, 4)) + + total_minutes = len(load_percent) + hours = range(0, total_minutes, 60) # каждый час + hour_labels = [minutes_to_time(m, start_time) for m in hours] + + ax.plot(range(total_minutes), load_percent, 'g-', linewidth=1.5) + ax.axhline(y=100, color='r', linestyle='--', alpha=0.5, label='Максимум') + ax.axhline(y=80, color='b', linestyle='--', alpha=0.5, label='Цель 80%') + + ax.set_xticks(hours) + ax.set_xticklabels(hour_labels, rotation=45) + ax.set_xlabel('Время') + ax.set_ylabel('Загрузка (%)') + ax.set_title('Загрузка специалистов') + ax.legend() + ax.grid(True, alpha=0.3) + ax.set_ylim(0, 110) + + plt.tight_layout() + return plt + + +def plot_inflow(minute_counts, start_time="00:00"): + """ + График входящего потока заявок с заливкой под кривой + """ + fig, ax = plt.subplots(figsize=(14, 5)) + + total_minutes = len(minute_counts) + minutes = range(total_minutes) + + # Заливка под кривой (area plot) + ax.fill_between(minutes, minute_counts, alpha=0.3, color='blue', label='Общий поток') + + # Основной график (линия поверх заливки) + ax.plot(minutes, minute_counts, 'b-', linewidth=1.5, alpha=0.8) + + # Скользящее среднее + window = 30 + if total_minutes > window: + smoothed = np.convolve(minute_counts, np.ones(window) / window, mode='valid') + ax.plot(range(window - 1, total_minutes), smoothed, + 'r-', linewidth=2.5, label=f'Среднее за 30 мин') + + # Можно добавить заливку и для среднего (опционально) + # ax.fill_between(range(window - 1, total_minutes), smoothed, alpha=0.2, color='red') + + # Метки времени + hours = range(0, total_minutes, 60) + hour_labels = [minutes_to_time(m, start_time) for m in hours] + + ax.set_xticks(hours) + ax.set_xticklabels(hour_labels, rotation=45) + ax.set_xlabel('Время') + ax.set_ylabel('Количество заявок') + ax.set_title('Входящий поток заявок') + ax.legend() + ax.grid(True, alpha=0.3) + + # Добавим горизонтальную линию среднего + mean_value = np.mean(minute_counts) + ax.axhline(y=mean_value, color='gray', linestyle='--', alpha=0.7, + label=f'Среднее: {mean_value:.1f}') + + plt.tight_layout() + return plt + + +def minutes_to_time(minutes, start_time="00:00"): + """Преобразует минуты от старта в строку времени ЧЧ:ММ""" + start_hour, start_min = map(int, start_time.split(':')) + total_minutes = start_hour * 60 + start_min + minutes + hour = (total_minutes // 60) % 24 + minute = total_minutes % 60 + return f"{hour:02d}:{minute:02d}" + + +def plot_detailed_decisions(batch_stats, second_model_name="XGBoost", start_time="00:00"): + """ + Набор графиков для каждого типа решений отдельно с временной шкалой ЧЧ:ММ + """ + if not batch_stats: + return None + + fig, axes = plt.subplots(3, 2, figsize=(14, 10)) + + times = [stat['time'] for stat in batch_stats] # минуты + total_minutes = max(times) if times else 0 + + # Метки времени каждый час + hours = range(0, total_minutes + 60, 60) + hour_labels = [minutes_to_time(m, start_time) for m in hours] + + # 1. Бизнес-правила (ручной разбор) + axes[0, 0].plot(times, [stat['business_manual'] for stat in batch_stats], + 'r-', linewidth=1.5) + axes[0, 0].fill_between(times, 0, [stat['business_manual'] for stat in batch_stats], + alpha=0.2, color='red') + axes[0, 0].set_title('Ручной разбор: бизнес-правила', fontweight='bold') + axes[0, 0].set_xticks(hours) + axes[0, 0].set_xticklabels(hour_labels, rotation=45) + axes[0, 0].set_xlabel('Время') + axes[0, 0].set_ylabel('Заявок') + axes[0, 0].grid(True, alpha=0.3) + + # 2. Бизнес-правила (авто отказ) + axes[0, 1].plot(times, [stat['business_auto'] for stat in batch_stats], + 'darkred', linewidth=1.5) + axes[0, 1].fill_between(times, 0, [stat['business_auto'] for stat in batch_stats], + alpha=0.2, color='darkred') + axes[0, 1].set_title('Авто отказ: бизнес-правила', fontweight='bold') + axes[0, 1].set_xticks(hours) + axes[0, 1].set_xticklabels(hour_labels, rotation=45) + axes[0, 1].set_xlabel('Время') + axes[0, 1].set_ylabel('Заявок') + axes[0, 1].grid(True, alpha=0.3) + + # 3. LR уверенные решения + axes[1, 0].plot(times, [stat['lr_confident'] for stat in batch_stats], + 'blue', linewidth=1.5) + axes[1, 0].fill_between(times, 0, [stat['lr_confident'] for stat in batch_stats], + alpha=0.2, color='blue') + axes[1, 0].set_title('Уверенные решения: Logistic Regression', fontweight='bold') + axes[1, 0].set_xticks(hours) + axes[1, 0].set_xticklabels(hour_labels, rotation=45) + axes[1, 0].set_xlabel('Время') + axes[1, 0].set_ylabel('Заявок') + axes[1, 0].grid(True, alpha=0.3) + + # 4. Вторая модель уверенные решения + axes[1, 1].plot(times, [stat['second_confident'] for stat in batch_stats], + 'green', linewidth=1.5) + axes[1, 1].fill_between(times, 0, [stat['second_confident'] for stat in batch_stats], + alpha=0.2, color='green') + axes[1, 1].set_title(f'Уверенные решения: {second_model_name}', fontweight='bold') + axes[1, 1].set_xticks(hours) + axes[1, 1].set_xticklabels(hour_labels, rotation=45) + axes[1, 1].set_xlabel('Время') + axes[1, 1].set_ylabel('Заявок') + axes[1, 1].grid(True, alpha=0.3) + + # 5. Ручной разбор от моделей + axes[2, 0].plot(times, [stat['second_uncertain'] for stat in batch_stats], + 'orange', linewidth=1.5) + axes[2, 0].fill_between(times, 0, [stat['second_uncertain'] for stat in batch_stats], + alpha=0.2, color='orange') + axes[2, 0].set_title('Ручной разбор: модели неуверенны', fontweight='bold') + axes[2, 0].set_xticks(hours) + axes[2, 0].set_xticklabels(hour_labels, rotation=45) + axes[2, 0].set_xlabel('Время') + axes[2, 0].set_ylabel('Заявок') + axes[2, 0].grid(True, alpha=0.3) + + # 6. Сравнительный график + axes[2, 1].plot(times, [stat['business_manual'] for stat in batch_stats], + 'r-', linewidth=1.5, label='Бизнес-правила', alpha=0.7) + axes[2, 1].plot(times, [stat['second_uncertain'] for stat in batch_stats], + 'orange', linewidth=1.5, label='Модели неуверенны', alpha=0.7) + axes[2, 1].set_title('Сравнение источников ручного разбора', fontweight='bold') + axes[2, 1].set_xticks(hours) + axes[2, 1].set_xticklabels(hour_labels, rotation=45) + axes[2, 1].set_xlabel('Время') + axes[2, 1].set_ylabel('Заявок') + axes[2, 1].legend() + axes[2, 1].grid(True, alpha=0.3) + + plt.suptitle('Детальный анализ решений', fontsize=14, fontweight='bold') + plt.tight_layout() + return plt + +def plot_parameters_history(pid_history, second_model_name="XGBoost", start_time="00:00"): + """График изменения параметров регулятора""" + if pid_history is None or pid_history.empty: + return None + + fig, axes = plt.subplots(3, 1, figsize=(12, 12)) + + total_minutes = len(pid_history) + times = range(total_minutes) + + # Метки времени + hours = range(0, total_minutes, 60) + hour_labels = [minutes_to_time(m, start_time) for m in hours] + + # 1. Отступы LR + axes[0].plot(times, pid_history['lr_low'], 'g-', linewidth=2, label='LR Low') + axes[0].plot(times, pid_history['lr_high'], 'r-', linewidth=2, label='LR High') + axes[0].set_ylabel('Отступ') + axes[0].set_title('Отступы Logistic Regression') + axes[0].legend() + axes[0].grid(True, alpha=0.3) + axes[0].set_xticks(hours) + axes[0].set_xticklabels(hour_labels, rotation=45) + + # 2. Отступы второй модели (с именем из параметра) + axes[1].plot(times, pid_history['second_low'], 'g-', linewidth=2, label=f'{second_model_name} Low') + axes[1].plot(times, pid_history['second_high'], 'r-', linewidth=2, label=f'{second_model_name} High') + axes[1].set_ylabel('Отступ') + axes[1].set_title(f'Отступы {second_model_name}') + axes[1].legend() + axes[1].grid(True, alpha=0.3) + axes[1].set_xticks(hours) + axes[1].set_xticklabels(hour_labels, rotation=45) + + # 3. Ошибка загрузки и выход регулятора + axes[2].plot(times, pid_history['error_load'], 'b-', label='Error load', alpha=0.7, linewidth=1.5) + axes[2].plot(times, pid_history['output'], 'r-', label='Output', linewidth=2, alpha=0.7) + axes[2].axhline(y=0, color='black', linestyle='-', linewidth=0.5) + axes[2].set_xlabel('Время') + axes[2].set_ylabel('Значение') + axes[2].set_title('Ошибка загрузки и выход регулятора') + axes[2].legend() + axes[2].grid(True, alpha=0.3) + axes[2].set_xticks(hours) + axes[2].set_xticklabels(hour_labels, rotation=45) + + plt.tight_layout() + return plt + + +# def plot_summary(processor): +# """Сводный дашборд""" +# fig, axes = plt.subplots(2, 3, figsize=(15, 10)) +# +# stats = processor.stats +# +# # 1. Динамика очередей +# axes[0, 0].plot(stats['queue_history'], 'b-', linewidth=1.5, label='Очередь моделей') +# if 'business_queue_history' in stats: +# axes[0, 0].plot(stats['business_queue_history'], 'orange', linewidth=1.5, label='Очередь бизнес-правил') +# axes[0, 0].set_title('Динамика очередей') +# axes[0, 0].set_xlabel('Минута') +# axes[0, 0].set_ylabel('Заявок') +# axes[0, 0].legend() +# axes[0, 0].grid(True, alpha=0.3) +# +# # 2. Загрузка специалистов (модели) +# load = [b / processor.specialists_count * 100 for b in stats['specialist_busy']] +# axes[0, 1].plot(load, 'g-', linewidth=1.5, label='Основные специалисты') +# axes[0, 1].axhline(y=100, color='r', linestyle='--', alpha=0.5, label='Максимум') +# if hasattr(processor, 'target_load'): +# axes[0, 1].axhline(y=processor.target_load * 100, color='b', linestyle='--', +# alpha=0.5, label=f'Цель {processor.target_load * 100:.0f}%') +# axes[0, 1].set_title('Загрузка специалистов (модели)') +# axes[0, 1].set_xlabel('Минута') +# axes[0, 1].set_ylabel('%') +# axes[0, 1].legend() +# axes[0, 1].grid(True, alpha=0.3) +# +# # 3. Загрузка экспертов +# if 'business_specialist_busy' in stats and stats['business_specialist_busy']: +# business_load = [b / processor.business_specialists_count * 100 for b in stats['business_specialist_busy']] +# axes[1, 0].plot(business_load, 'orange', linewidth=1.5, label='Эксперты') +# axes[1, 0].axhline(y=100, color='r', linestyle='--', alpha=0.5, label='Максимум') +# axes[1, 0].set_title('Загрузка экспертов (бизнес-правила)') +# axes[1, 0].set_xlabel('Минута') +# axes[1, 0].set_ylabel('%') +# axes[1, 0].legend() +# axes[1, 0].grid(True, alpha=0.3) +# else: +# axes[1, 0].text(0.5, 0.5, 'Нет данных по экспертам', ha='center', va='center') +# axes[1, 0].set_title('Загрузка экспертов') +# +# # 4. Распределение решений +# sizes = [ +# stats['auto_approved'], +# stats['auto_declined'], +# stats['manual_processed'], +# stats.get('business_manual_processed', 0) +# ] +# labels = ['Одобрено авто', 'Отказ авто', 'Ручной (модели)', 'Ручной (бизнес)'] +# colors = ['#2ecc71', '#e74c3c', '#3498db', '#f39c12'] +# +# if sum(sizes) > 0: +# wedges, texts, autotexts = axes[1, 1].pie(sizes, labels=labels, autopct='%1.1f%%', +# colors=colors, startangle=90) +# for autotext in autotexts: +# autotext.set_color('white') +# autotext.set_fontweight('bold') +# axes[1, 1].set_title('Итоговые решения') +# +# # 5. Ключевые метрики (освободилось место) +# total = stats['total_processed'] +# if total > 0: +# avg_wait = np.mean(stats['wait_times']) if stats['wait_times'] else 0 +# avg_business_wait = np.mean(stats.get('business_wait_times', [0])) if stats.get('business_wait_times') else 0 +# +# metrics_text = f""" +# Всего заявок: {total:,} +# Одобрено авто: {stats['auto_approved']:,} ({stats['auto_approved'] / total * 100:.1f}%) +# Отказ авто: {stats['auto_declined']:,} ({stats['auto_declined'] / total * 100:.1f}%) +# +# Ручной разбор (модели): {stats['manual_processed']:,} ({stats['manual_processed'] / total * 100:.1f}%) +# Ручной разбор (бизнес): {stats.get('business_manual_processed', 0):,} +# +# Среднее время ожидания (модели): {avg_wait:.1f} мин +# Среднее время ожидания (бизнес): {avg_business_wait:.1f} мин +# +# Средняя загрузка специалистов: {np.mean(load):.1f}% +# """ +# else: +# metrics_text = "Нет данных" +# +# axes[0, 2].text(0.1, 0.5, metrics_text, transform=axes[0, 2].transAxes, +# fontsize=10, verticalalignment='center', fontfamily='monospace') +# axes[0, 2].axis('off') +# axes[0, 2].set_title('Ключевые метрики') +# +# # 6. Пустой график или можно что-то еще +# axes[1, 2].axis('off') +# +# plt.suptitle('Сводная статистика симуляции', fontsize=14, fontweight='bold') +# plt.tight_layout() +# return plt + + diff --git a/app/simulation/visualization/simulation_20:11.gif b/app/simulation/visualization/simulation_20:11.gif new file mode 100644 index 0000000000000000000000000000000000000000..6eae3b657c63a5ac82da1fd37a615ad7ed3aff5e Binary files /dev/null and b/app/simulation/visualization/simulation_20:11.gif differ diff --git a/app/simulation/visualization/simulation_20:19.gif b/app/simulation/visualization/simulation_20:19.gif new file mode 100644 index 0000000000000000000000000000000000000000..6eae3b657c63a5ac82da1fd37a615ad7ed3aff5e Binary files /dev/null and b/app/simulation/visualization/simulation_20:19.gif differ diff --git a/app/simulation/visualization/simulation_20:25.gif b/app/simulation/visualization/simulation_20:25.gif new file mode 100644 index 0000000000000000000000000000000000000000..6eae3b657c63a5ac82da1fd37a615ad7ed3aff5e Binary files /dev/null and b/app/simulation/visualization/simulation_20:25.gif differ diff --git a/app/simulation/visualization/simulation_20:30.gif b/app/simulation/visualization/simulation_20:30.gif new file mode 100644 index 0000000000000000000000000000000000000000..6eae3b657c63a5ac82da1fd37a615ad7ed3aff5e Binary files /dev/null and b/app/simulation/visualization/simulation_20:30.gif differ diff --git a/app/utils/__pycache__/credit_preprocessor.cpython-311.pyc b/app/utils/__pycache__/credit_preprocessor.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..98062ecc31834418451384b84b7e7ba49454902a Binary files /dev/null and b/app/utils/__pycache__/credit_preprocessor.cpython-311.pyc differ diff --git a/app/utils/__pycache__/data_loader.cpython-311.pyc b/app/utils/__pycache__/data_loader.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e20b6cf939ce5eefa45bc7ad2234e23981bcd388 Binary files /dev/null and b/app/utils/__pycache__/data_loader.cpython-311.pyc differ diff --git a/app/utils/credit_preprocessor.py b/app/utils/credit_preprocessor.py new file mode 100644 index 0000000000000000000000000000000000000000..a5e46f9cf042e503f28f620e513f698f346a77a4 --- /dev/null +++ b/app/utils/credit_preprocessor.py @@ -0,0 +1,329 @@ +import pandas as pd +import numpy as np +import joblib +from sklearn.base import BaseEstimator, TransformerMixin + + +class CreditDataPreprocessor(BaseEstimator, TransformerMixin): + + # Полный препроцессинг данных + + def __init__(self, + NumberOfDependents_fill_value=0, + NumberOfDependents_up_threshold=10, + MonthlyIncome_fill_value=0, + RevolvingUtilizationOfUnsecuredLines_drop_threshold=2, + age_low_drop_threshold=18, + age_up_drop_threshold=80, + DebtRatio_up_threshold=5, + PastDueRiskScore_weights=[1.0, 1.2, 1.3], + NumberRealEstateLoansOrLines_drop_threshold=20, + drop_special_codes=False): + self.NumberOfDependents_fill_value = NumberOfDependents_fill_value + self.NumberOfDependents_up_threshold = NumberOfDependents_up_threshold + + self.MonthlyIncome_fill_value = MonthlyIncome_fill_value + + self.RevolvingUtilizationOfUnsecuredLines_drop_threshold = RevolvingUtilizationOfUnsecuredLines_drop_threshold + + self.age_low_drop_threshold = age_low_drop_threshold + self.age_up_drop_threshold = age_up_drop_threshold + + self.DebtRatio_up_threshold = DebtRatio_up_threshold + + self.PastDueRiskScore_weights = PastDueRiskScore_weights + + self.NumberRealEstateLoansOrLines_drop_threshold = NumberRealEstateLoansOrLines_drop_threshold + + self.drop_special_codes = drop_special_codes + + def fit(self, X, y=None): + return self + + def transform(self, X): + X_copy = X.copy() + + X_copy['NumberOfDependents'] = X_copy['NumberOfDependents'].fillna(value=self.NumberOfDependents_fill_value) + X_copy['NumberOfDependents'] = X_copy['NumberOfDependents'].clip(0, self.NumberOfDependents_up_threshold).copy() + + X_copy['MonthlyIncomeIsMissing'] = 0 + X_copy.loc[X_copy['MonthlyIncome'].isna(), 'MonthlyIncomeIsMissing'] = 1 + X_copy['MonthlyIncome'] = X['MonthlyIncome'].fillna(value=self.MonthlyIncome_fill_value) + + X_copy['RevolvingUtilizationOverOne'] = 0.0 + X_copy.loc[X_copy['RevolvingUtilizationOfUnsecuredLines'] > 1, 'RevolvingUtilizationOverOne'] = 1.0 + X_copy['RevolvingUtilizationOfUnsecuredLines'] = X_copy['RevolvingUtilizationOfUnsecuredLines'].clip(0, + 1).copy() + + X_copy['DebtPayments'] = 0.0 + X_copy.loc[X_copy['MonthlyIncome'] == 0, 'DebtPayments'] = X_copy.loc[X_copy['MonthlyIncome'] == 0, 'DebtRatio'] + X_copy.loc[X_copy['MonthlyIncome'] != 0, 'DebtPayments'] = X_copy.loc[ + X_copy['MonthlyIncome'] != 0, 'DebtRatio'] * \ + X_copy.loc[ + X_copy['MonthlyIncome'] != 0, 'MonthlyIncome'] + X_copy['DebtRatio'] = X_copy['DebtRatio'].clip(0, self.DebtRatio_up_threshold).copy() + + X_copy['DebtPayments_over_10k'] = 0.0 + X_copy.loc[X_copy['DebtPayments'] > 10000, 'DebtPayments_over_10k'] = 1.0 + X_copy['DebtPayments'] = X_copy['DebtPayments'].clip(0, 10000).copy() + + X_copy['MonthlyIncome_over_20k'] = 0.0 + X_copy.loc[X_copy['MonthlyIncome'] >= 20000, 'MonthlyIncome_over_20k'] = 1.0 + X_copy['MonthlyIncome'] = X_copy['MonthlyIncome'].clip(0, 20000) + + X_copy['Code96'] = 0.0 + X_copy['Code98'] = 0.0 + X_copy.loc[X_copy['NumberOfTime30-59DaysPastDueNotWorse'] == 96, 'Code96'] = 1.0 + X_copy.loc[X_copy['NumberOfTime30-59DaysPastDueNotWorse'] == 98, 'Code98'] = 1.0 + + X_copy['PastDueRiskScore'] = ( + self.PastDueRiskScore_weights[0] * X_copy['NumberOfTime30-59DaysPastDueNotWorse'] + + self.PastDueRiskScore_weights[1] * X_copy['NumberOfTime60-89DaysPastDueNotWorse'] + + self.PastDueRiskScore_weights[2] * X_copy['NumberOfTimes90DaysLate']) + X_copy.loc[X_copy['NumberOfTime30-59DaysPastDueNotWorse'] == 96, 'PastDueRiskScore'] = 96 + X_copy.loc[X_copy['NumberOfTime30-59DaysPastDueNotWorse'] == 98, 'PastDueRiskScore'] = 98 + X_copy = X_copy.drop(columns=['NumberOfTime30-59DaysPastDueNotWorse', 'NumberOfTime60-89DaysPastDueNotWorse', + 'NumberOfTimes90DaysLate']) + + X_copy['NumberOfOpenCreditLinesAndLoans_over_30'] = 0.0 + X_copy.loc[X_copy['NumberOfOpenCreditLinesAndLoans'] > 30, 'NumberOfOpenCreditLinesAndLoans_over_30'] = 1.0 + X_copy['NumberOfOpenCreditLinesAndLoans'] = X_copy['NumberOfOpenCreditLinesAndLoans'].clip(0, 30).copy() + + X_copy['NumberRealEstateLoansOrLines_over_5'] = 0.0 + X_copy.loc[X_copy['NumberRealEstateLoansOrLines'] > 5, 'NumberRealEstateLoansOrLines_over_5'] = 1.0 + X_copy['NumberRealEstateLoansOrLines'] = X_copy['NumberRealEstateLoansOrLines'].clip(0, 5).copy() + + X_copy['ConsumerCredit_Group'] = pd.cut(X_copy['NumberOfOpenCreditLinesAndLoans'], + bins=[0, 1, 2, 6, 15, 31], + labels=[ + '0_loans', + '1_loans', + '2-5_loans', + '6-14_loans', + '16-30_loans' + ]) + consumer_dummy = pd.get_dummies(X_copy['ConsumerCredit_Group'], prefix='Consumer', drop_first=False).astype( + 'float') + + X_copy['RealEstateLoans_Group'] = pd.cut(X_copy['NumberRealEstateLoansOrLines'], + bins=[-1, 0, 3, 100], + labels=[ + '0_loans', + '1-3_loans', + '4+_loans', + ]) + estate_dummy = pd.get_dummies(X_copy['RealEstateLoans_Group'], prefix='RealEstateLoans', + drop_first=False).astype('float') + + X_copy = pd.concat([X_copy, consumer_dummy, estate_dummy], axis=1).copy() + X_copy = X_copy.drop(columns=['ConsumerCredit_Group', + 'RealEstateLoans_Group']).copy() + + X_copy = X_copy.drop(columns=['Consumer_6-14_loans', + 'RealEstateLoans_0_loans']).copy() + + X_copy = X_copy.drop(columns=['NumberOfOpenCreditLinesAndLoans', + 'NumberRealEstateLoansOrLines', + 'MonthlyIncomeIsMissing', + 'MonthlyIncome_over_20k', + 'Consumer_0_loans', + 'NumberOfOpenCreditLinesAndLoans_over_30']).copy() + + if self.drop_special_codes: + X_copy = X_copy.drop(columns=['Code96', 'Code98']) + + return X_copy + + + def fit_transform(self, X, y=None): + return self.fit(X, y).transform(X) + + def clean_train(self, X, y=None): + mask = ( + (X[ + 'RevolvingUtilizationOfUnsecuredLines'] <= self.RevolvingUtilizationOfUnsecuredLines_drop_threshold) & + (X['age'] >= self.age_low_drop_threshold) & + (X['age'] <= self.age_up_drop_threshold) & + (X['NumberRealEstateLoansOrLines'] <= self.NumberRealEstateLoansOrLines_drop_threshold) + ) + + X_clean = X[mask].copy() + + if y is not None: + y_clean = y[mask].copy() + return X_clean, y_clean + + return X_clean + +from sklearn.base import BaseEstimator, TransformerMixin +from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler, MaxAbsScaler + +class CreditScaler(BaseEstimator, TransformerMixin): + """ + Масштабирует только не-булевые колонки. + Можно задать различные способы масштабирования + """ + + def __init__(self, scaler_type='standard'): + """ + Параметр scaler_type - тип scaler'а. + + Доступные типы: + - 'standard': StandardScaler (среднее=0, дисперсия=1) + - 'robust': RobustScaler (устойчив к выбросам) + - 'minmax': MinMaxScaler (приводит к [0, 1]) + - 'maxabs': MaxAbsScaler (приводит к [-1, 1]) + """ + + self.boolean_columns = [ + 'RevolvingUtilizationOverOne', + 'DebtPayments_over_10k', + 'Code96', + 'Code98', + 'NumberRealEstateLoansOrLines_over_5', + 'Consumer_1_loans', + 'Consumer_2-5_loans', + 'Consumer_16-30_loans', + 'RealEstateLoans_1-3_loans', + 'RealEstateLoans_4+_loans' + ] + + self.scaler_type = scaler_type + self._create_scaler() + + # Эти переменные заполнятся во время fit + self.columns_to_scale_ = None + self.n_features_in_ = None + self.feature_names_in_ = None + + def _create_scaler(self): + """Создает scaler по типу""" + if self.scaler_type == 'standard': + self.scaler = StandardScaler() + elif self.scaler_type == 'robust': + self.scaler = RobustScaler() + elif self.scaler_type == 'minmax': + self.scaler = MinMaxScaler() + elif self.scaler_type == 'maxabs': + self.scaler = MaxAbsScaler() + else: + raise ValueError( + f"Unknown scaler_type: {self.scaler_type}. " + f"Available: standard, robust, minmax, maxabs" + ) + + def fit(self, X, y=None): + """ + Определяет колонки для масштабирования (все, кроме булевых) + и обучает scaler. + """ + + self.feature_names_in_ = X.columns.tolist() + self.n_features_in_ = len(self.feature_names_in_) + + self.columns_to_scale_ = [ + col for col in self.feature_names_in_ + if col not in self.boolean_columns + ] + + self.scaler.fit(X[self.columns_to_scale_]) + return self + + def transform(self, X, y=None): + """ + Масштабирует только не-булевы колонки. + """ + X_copy = X.copy() + + X_copy[self.columns_to_scale_] = self.scaler.transform(X_copy[self.columns_to_scale_]) + + return X_copy + + def fit_transform(self, X, y=None): + return self.fit(X, y).transform(X, y) + + def get_feature_names_out(self, input_features=None): + """Для совместимости с sklearn""" + if input_features is not None: + return input_features + return self.feature_names_in_ if self.feature_names_in_ is not None else [] + + def set_params(self, **params): + """Для совместимости с GridSearchCV""" + if 'scaler_type' in params and params['scaler_type'] != self.scaler_type: + self.scaler_type = params['scaler_type'] + self._create_scaler() + return super().set_params(**params) + + +def check_business_rules(age, monthly_income, monthly_debt, debt_ratio, + late_90, late_60_89, late_30_59, credit_lines, + real_estate, utilization, dependents): + + # КРИТИЧЕСКИЕ ПРАВИЛА - сразу отказ + if age < 18: + return { + 'needs_manual': False, + 'message': 'Возраст менее 18 лет - кредит не выдаётся', + 'decision': 1 # отказ + } + + # СПЕЦИАЛЬНЫЕ БАНКОВСКИЕ КОДЫ - сразу ручной разбор + if (late_90 == 98) or (late_60_89 == 98) or (late_30_59 == 98): + return { + 'needs_manual': True, + 'message': 'Код 98: Списание долга как безнадежного', + 'decision': None + } + + if (late_90 == 96) or (late_60_89 == 96) or (late_30_59 == 96): + return { + 'needs_manual': True, + 'message': 'Код 96: Изъятие залога или реализация имущества', + 'decision': None + } + + # КРИТИЧЕСКИЕ ПРАВИЛА - сразу ручной разбор + if age > 80: + return { + 'needs_manual': True, + 'message': 'Возраст > 80 лет - требуется ручной разбор (индивидуальные условия)', + 'decision': None + } + + if monthly_income > 1000000: + return { + 'needs_manual': True, + 'message': 'Доход свыше 1,000,000 $ - требуется ручной разбор', + 'decision': None + } + + if monthly_debt > 1000000: + return { + 'needs_manual': True, + 'message': 'Платежи свыше 1,000,000 $ - требуется ручной разбор', + 'decision': None + } + + if utilization > 2: + return { + 'needs_manual': True, + 'message': 'Использование кредитных средств превышает 200%', + 'decision': None + } + + if real_estate > 20: + return { + 'needs_manual': True, + 'message': 'Количество кредитов под залог недвижимости слишком велико - ручной разбор', + 'decision': None + } + + # 4. ВСЕ ПРОВЕРКИ ПРОЙДЕНЫ - допуск к авторазбору моделью + return { + 'needs_manual': False, + 'decision': None, + } + + + diff --git a/app/utils/data_loader.py b/app/utils/data_loader.py new file mode 100644 index 0000000000000000000000000000000000000000..883c9d88948dab75c1a3e8d876215704ca1e07dd --- /dev/null +++ b/app/utils/data_loader.py @@ -0,0 +1,26 @@ +import streamlit as st +import joblib +import os + + +@st.cache_resource +def load_artifacts(models_path, preprocessor_path): + """Загрузка препроцессоров и моделей""" + preprocessor = joblib.load(os.path.join(preprocessor_path, 'preprocessor_150.pkl')) + scaler = joblib.load(os.path.join(preprocessor_path, 'scaler_150.pkl')) + + models = {} + model_files = { + 'Logistic Regression': 'logreg_150_model.pkl', + 'XGBoost': 'xgb_150_model.pkl', + 'LightGBM': 'lgbm_150_model.pkl', + 'CatBoost': 'catboost_150_model.pkl', + 'Random Forest': 'rfc_150_model.pkl' + } + + for name, filename in model_files.items(): + path = os.path.join(models_path, filename) + if os.path.exists(path): + models[name] = joblib.load(path) + + return preprocessor, scaler, models \ No newline at end of file diff --git a/catboost_info/catboost_training.json b/catboost_info/catboost_training.json new file mode 100644 index 0000000000000000000000000000000000000000..6bd1a2f4e376a9f062a9f741f6428dda83dd5486 --- /dev/null +++ b/catboost_info/catboost_training.json @@ -0,0 +1,104 @@ +{ +"meta":{"test_sets":[],"test_metrics":[],"learn_metrics":[{"best_value":"Min","name":"Logloss"}],"launch_mode":"Train","parameters":"","iteration_count":100,"learn_sets":["learn"],"name":"experiment"}, +"iterations":[ +{"learn":[0.6582255385],"iteration":0,"passed_time":0.1064302509,"remaining_time":10.53659484}, +{"learn":[0.628929721],"iteration":1,"passed_time":0.1525804859,"remaining_time":7.476443808}, +{"learn":[0.6055823656],"iteration":2,"passed_time":0.1890582018,"remaining_time":6.112881857}, +{"learn":[0.585745295],"iteration":3,"passed_time":0.2252067115,"remaining_time":5.404961076}, +{"learn":[0.5691497866],"iteration":4,"passed_time":0.2406276923,"remaining_time":4.571926155}, +{"learn":[0.5553995801],"iteration":5,"passed_time":0.2564603788,"remaining_time":4.017879268}, +{"learn":[0.5431466822],"iteration":6,"passed_time":0.2927427626,"remaining_time":3.889296704}, +{"learn":[0.5321745596],"iteration":7,"passed_time":0.326945827,"remaining_time":3.75987701}, +{"learn":[0.5230197248],"iteration":8,"passed_time":0.3622708005,"remaining_time":3.662960316}, +{"learn":[0.5150673326],"iteration":9,"passed_time":0.3964141569,"remaining_time":3.567727412}, +{"learn":[0.5085723427],"iteration":10,"passed_time":0.4321486694,"remaining_time":3.496475598}, +{"learn":[0.5029521178],"iteration":11,"passed_time":0.4666833149,"remaining_time":3.422344309}, +{"learn":[0.4982952699],"iteration":12,"passed_time":0.4887040018,"remaining_time":3.270557551}, +{"learn":[0.4940193081],"iteration":13,"passed_time":0.5244568892,"remaining_time":3.221663748}, +{"learn":[0.4903079864],"iteration":14,"passed_time":0.5588645355,"remaining_time":3.166899035}, +{"learn":[0.4877126951],"iteration":15,"passed_time":0.5737366867,"remaining_time":3.012117605}, +{"learn":[0.4849442288],"iteration":16,"passed_time":0.594951629,"remaining_time":2.904763836}, +{"learn":[0.4823937275],"iteration":17,"passed_time":0.6301627283,"remaining_time":2.870741318}, +{"learn":[0.4803765605],"iteration":18,"passed_time":0.6663060298,"remaining_time":2.840567811}, +{"learn":[0.4781523185],"iteration":19,"passed_time":0.687587555,"remaining_time":2.75035022}, +{"learn":[0.4767059201],"iteration":20,"passed_time":0.7219467433,"remaining_time":2.715894891}, +{"learn":[0.475163465],"iteration":21,"passed_time":0.7562504736,"remaining_time":2.681251679}, +{"learn":[0.4741219727],"iteration":22,"passed_time":0.777308792,"remaining_time":2.602294651}, +{"learn":[0.473008657],"iteration":23,"passed_time":0.8123926421,"remaining_time":2.5725767}, +{"learn":[0.4722618181],"iteration":24,"passed_time":0.8479381142,"remaining_time":2.543814343}, +{"learn":[0.4714753648],"iteration":25,"passed_time":0.8643292969,"remaining_time":2.460014153}, +{"learn":[0.470390758],"iteration":26,"passed_time":0.904391114,"remaining_time":2.445205605}, +{"learn":[0.4696201438],"iteration":27,"passed_time":0.9384147212,"remaining_time":2.413066426}, +{"learn":[0.46859249],"iteration":28,"passed_time":0.9729809498,"remaining_time":2.382125774}, +{"learn":[0.4677104182],"iteration":29,"passed_time":1.007459887,"remaining_time":2.350739737}, +{"learn":[0.4673133885],"iteration":30,"passed_time":1.019293767,"remaining_time":2.268750643}, +{"learn":[0.4667841252],"iteration":31,"passed_time":1.031243396,"remaining_time":2.191392216}, +{"learn":[0.4664536674],"iteration":32,"passed_time":1.066954367,"remaining_time":2.166240684}, +{"learn":[0.4662622609],"iteration":33,"passed_time":1.076521387,"remaining_time":2.089717986}, +{"learn":[0.466086662],"iteration":34,"passed_time":1.088162018,"remaining_time":2.020872318}, +{"learn":[0.4657380808],"iteration":35,"passed_time":1.122871037,"remaining_time":1.996215177}, +{"learn":[0.4651284039],"iteration":36,"passed_time":1.157755597,"remaining_time":1.971313584}, +{"learn":[0.4646531445],"iteration":37,"passed_time":1.195417263,"remaining_time":1.95041764}, +{"learn":[0.4641257326],"iteration":38,"passed_time":1.230054783,"remaining_time":1.92393184}, +{"learn":[0.4637898175],"iteration":39,"passed_time":1.246455715,"remaining_time":1.869683573}, +{"learn":[0.4633285186],"iteration":40,"passed_time":1.280777487,"remaining_time":1.843070043}, +{"learn":[0.4628428368],"iteration":41,"passed_time":1.315556006,"remaining_time":1.816720199}, +{"learn":[0.4626124403],"iteration":42,"passed_time":1.349455031,"remaining_time":1.788812483}, +{"learn":[0.4623858706],"iteration":43,"passed_time":1.362016739,"remaining_time":1.73347585}, +{"learn":[0.4621286714],"iteration":44,"passed_time":1.398965243,"remaining_time":1.709846409}, +{"learn":[0.4617974001],"iteration":45,"passed_time":1.433543555,"remaining_time":1.682855478}, +{"learn":[0.4614775166],"iteration":46,"passed_time":1.466964625,"remaining_time":1.654236705}, +{"learn":[0.46142579],"iteration":47,"passed_time":1.479586041,"remaining_time":1.602884878}, +{"learn":[0.4614011205],"iteration":48,"passed_time":1.486797243,"remaining_time":1.547482845}, +{"learn":[0.4611845342],"iteration":49,"passed_time":1.521749302,"remaining_time":1.521749302}, +{"learn":[0.4609852804],"iteration":50,"passed_time":1.557818146,"remaining_time":1.496727238}, +{"learn":[0.4604321277],"iteration":51,"passed_time":1.596752012,"remaining_time":1.473924934}, +{"learn":[0.4601645791],"iteration":52,"passed_time":1.646375224,"remaining_time":1.459993123}, +{"learn":[0.459804458],"iteration":53,"passed_time":1.683746309,"remaining_time":1.434302411}, +{"learn":[0.4592589475],"iteration":54,"passed_time":1.71827358,"remaining_time":1.405860201}, +{"learn":[0.4589643366],"iteration":55,"passed_time":1.752749892,"remaining_time":1.377160629}, +{"learn":[0.4585201818],"iteration":56,"passed_time":1.787086539,"remaining_time":1.348153003}, +{"learn":[0.4582657803],"iteration":57,"passed_time":1.822507553,"remaining_time":1.319746849}, +{"learn":[0.4580557799],"iteration":58,"passed_time":1.844325617,"remaining_time":1.281650005}, +{"learn":[0.457864554],"iteration":59,"passed_time":1.877865186,"remaining_time":1.251910124}, +{"learn":[0.4576167412],"iteration":60,"passed_time":1.915169063,"remaining_time":1.224452351}, +{"learn":[0.4575874936],"iteration":61,"passed_time":1.924591958,"remaining_time":1.17958862}, +{"learn":[0.457362279],"iteration":62,"passed_time":1.960633385,"remaining_time":1.151483099}, +{"learn":[0.4572946663],"iteration":63,"passed_time":1.972484848,"remaining_time":1.109522727}, +{"learn":[0.4569830294],"iteration":64,"passed_time":2.007666031,"remaining_time":1.08105094}, +{"learn":[0.456610445],"iteration":65,"passed_time":2.042187927,"remaining_time":1.052036205}, +{"learn":[0.4560918865],"iteration":66,"passed_time":2.076725989,"remaining_time":1.022865039}, +{"learn":[0.4558479503],"iteration":67,"passed_time":2.110631805,"remaining_time":0.9932384965}, +{"learn":[0.455740418],"iteration":68,"passed_time":2.124730753,"remaining_time":0.9545891789}, +{"learn":[0.455501269],"iteration":69,"passed_time":2.159214566,"remaining_time":0.9253776709}, +{"learn":[0.4554787935],"iteration":70,"passed_time":2.168732127,"remaining_time":0.8858201647}, +{"learn":[0.4552744806],"iteration":71,"passed_time":2.205001553,"remaining_time":0.8575006039}, +{"learn":[0.455234248],"iteration":72,"passed_time":2.220812656,"remaining_time":0.8213964619}, +{"learn":[0.455137986],"iteration":73,"passed_time":2.255120136,"remaining_time":0.7923395074}, +{"learn":[0.4549484305],"iteration":74,"passed_time":2.28992153,"remaining_time":0.7633071767}, +{"learn":[0.4548062199],"iteration":75,"passed_time":2.324904798,"remaining_time":0.7341804624}, +{"learn":[0.4546474797],"iteration":76,"passed_time":2.360039856,"remaining_time":0.7049469699}, +{"learn":[0.4545581835],"iteration":77,"passed_time":2.372090859,"remaining_time":0.6690512679}, +{"learn":[0.4544265313],"iteration":78,"passed_time":2.40573901,"remaining_time":0.6395002433}, +{"learn":[0.4544030978],"iteration":79,"passed_time":2.413539667,"remaining_time":0.6033849167}, +{"learn":[0.4543650724],"iteration":80,"passed_time":2.422935896,"remaining_time":0.568342988}, +{"learn":[0.4542698101],"iteration":81,"passed_time":2.455982135,"remaining_time":0.5391180296}, +{"learn":[0.4540294101],"iteration":82,"passed_time":2.490015159,"remaining_time":0.5100031048}, +{"learn":[0.4539463005],"iteration":83,"passed_time":2.501245001,"remaining_time":0.4764276192}, +{"learn":[0.4537784829],"iteration":84,"passed_time":2.53640835,"remaining_time":0.4476014736}, +{"learn":[0.4536943889],"iteration":85,"passed_time":2.572798317,"remaining_time":0.4188276329}, +{"learn":[0.4536386999],"iteration":86,"passed_time":2.608432288,"remaining_time":0.3897657442}, +{"learn":[0.4533342039],"iteration":87,"passed_time":2.644785463,"remaining_time":0.3606525632}, +{"learn":[0.4531946585],"iteration":88,"passed_time":2.679509774,"remaining_time":0.3311753653}, +{"learn":[0.4529846134],"iteration":89,"passed_time":2.713719672,"remaining_time":0.301524408}, +{"learn":[0.4529583581],"iteration":90,"passed_time":2.728664447,"remaining_time":0.2698679124}, +{"learn":[0.4528171854],"iteration":91,"passed_time":2.765771076,"remaining_time":0.2405018327}, +{"learn":[0.4526575987],"iteration":92,"passed_time":2.800900134,"remaining_time":0.2108204402}, +{"learn":[0.4526170824],"iteration":93,"passed_time":2.822734239,"remaining_time":0.1801745259}, +{"learn":[0.4525149982],"iteration":94,"passed_time":2.857135885,"remaining_time":0.1503755729}, +{"learn":[0.4524663385],"iteration":95,"passed_time":2.892015112,"remaining_time":0.1205006297}, +{"learn":[0.4524315166],"iteration":96,"passed_time":2.908353878,"remaining_time":0.08994908901}, +{"learn":[0.4523241677],"iteration":97,"passed_time":2.943535644,"remaining_time":0.060072156}, +{"learn":[0.4523010903],"iteration":98,"passed_time":2.964563379,"remaining_time":0.02994508464}, +{"learn":[0.4522791181],"iteration":99,"passed_time":2.980352983,"remaining_time":0} +]} \ No newline at end of file diff --git a/catboost_info/learn/events.out.tfevents b/catboost_info/learn/events.out.tfevents new file mode 100644 index 0000000000000000000000000000000000000000..2907646852ceb318c42b7d2a1e09d369e9e785b5 --- /dev/null +++ b/catboost_info/learn/events.out.tfevents @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1fe5092567732e395a3420a1205c4190f0e63d50edc678509bd4104fc34a503 +size 5398 diff --git a/catboost_info/learn_error.tsv b/catboost_info/learn_error.tsv new file mode 100644 index 0000000000000000000000000000000000000000..c9c56469a0cd3602ee24372cd70c82a482e8320f --- /dev/null +++ b/catboost_info/learn_error.tsv @@ -0,0 +1,101 @@ +iter Logloss +0 0.6582255385 +1 0.628929721 +2 0.6055823656 +3 0.585745295 +4 0.5691497866 +5 0.5553995801 +6 0.5431466822 +7 0.5321745596 +8 0.5230197248 +9 0.5150673326 +10 0.5085723427 +11 0.5029521178 +12 0.4982952699 +13 0.4940193081 +14 0.4903079864 +15 0.4877126951 +16 0.4849442288 +17 0.4823937275 +18 0.4803765605 +19 0.4781523185 +20 0.4767059201 +21 0.475163465 +22 0.4741219727 +23 0.473008657 +24 0.4722618181 +25 0.4714753648 +26 0.470390758 +27 0.4696201438 +28 0.46859249 +29 0.4677104182 +30 0.4673133885 +31 0.4667841252 +32 0.4664536674 +33 0.4662622609 +34 0.466086662 +35 0.4657380808 +36 0.4651284039 +37 0.4646531445 +38 0.4641257326 +39 0.4637898175 +40 0.4633285186 +41 0.4628428368 +42 0.4626124403 +43 0.4623858706 +44 0.4621286714 +45 0.4617974001 +46 0.4614775166 +47 0.46142579 +48 0.4614011205 +49 0.4611845342 +50 0.4609852804 +51 0.4604321277 +52 0.4601645791 +53 0.459804458 +54 0.4592589475 +55 0.4589643366 +56 0.4585201818 +57 0.4582657803 +58 0.4580557799 +59 0.457864554 +60 0.4576167412 +61 0.4575874936 +62 0.457362279 +63 0.4572946663 +64 0.4569830294 +65 0.456610445 +66 0.4560918865 +67 0.4558479503 +68 0.455740418 +69 0.455501269 +70 0.4554787935 +71 0.4552744806 +72 0.455234248 +73 0.455137986 +74 0.4549484305 +75 0.4548062199 +76 0.4546474797 +77 0.4545581835 +78 0.4544265313 +79 0.4544030978 +80 0.4543650724 +81 0.4542698101 +82 0.4540294101 +83 0.4539463005 +84 0.4537784829 +85 0.4536943889 +86 0.4536386999 +87 0.4533342039 +88 0.4531946585 +89 0.4529846134 +90 0.4529583581 +91 0.4528171854 +92 0.4526575987 +93 0.4526170824 +94 0.4525149982 +95 0.4524663385 +96 0.4524315166 +97 0.4523241677 +98 0.4523010903 +99 0.4522791181 diff --git a/catboost_info/time_left.tsv b/catboost_info/time_left.tsv new file mode 100644 index 0000000000000000000000000000000000000000..fce0ef1723caa4ea5ff65bd02f9c6c661e5a4463 --- /dev/null +++ b/catboost_info/time_left.tsv @@ -0,0 +1,101 @@ +iter Passed Remaining +0 106 10536 +1 152 7476 +2 189 6112 +3 225 5404 +4 240 4571 +5 256 4017 +6 292 3889 +7 326 3759 +8 362 3662 +9 396 3567 +10 432 3496 +11 466 3422 +12 488 3270 +13 524 3221 +14 558 3166 +15 573 3012 +16 594 2904 +17 630 2870 +18 666 2840 +19 687 2750 +20 721 2715 +21 756 2681 +22 777 2602 +23 812 2572 +24 847 2543 +25 864 2460 +26 904 2445 +27 938 2413 +28 972 2382 +29 1007 2350 +30 1019 2268 +31 1031 2191 +32 1066 2166 +33 1076 2089 +34 1088 2020 +35 1122 1996 +36 1157 1971 +37 1195 1950 +38 1230 1923 +39 1246 1869 +40 1280 1843 +41 1315 1816 +42 1349 1788 +43 1362 1733 +44 1398 1709 +45 1433 1682 +46 1466 1654 +47 1479 1602 +48 1486 1547 +49 1521 1521 +50 1557 1496 +51 1596 1473 +52 1646 1459 +53 1683 1434 +54 1718 1405 +55 1752 1377 +56 1787 1348 +57 1822 1319 +58 1844 1281 +59 1877 1251 +60 1915 1224 +61 1924 1179 +62 1960 1151 +63 1972 1109 +64 2007 1081 +65 2042 1052 +66 2076 1022 +67 2110 993 +68 2124 954 +69 2159 925 +70 2168 885 +71 2205 857 +72 2220 821 +73 2255 792 +74 2289 763 +75 2324 734 +76 2360 704 +77 2372 669 +78 2405 639 +79 2413 603 +80 2422 568 +81 2455 539 +82 2490 510 +83 2501 476 +84 2536 447 +85 2572 418 +86 2608 389 +87 2644 360 +88 2679 331 +89 2713 301 +90 2728 269 +91 2765 240 +92 2800 210 +93 2822 180 +94 2857 150 +95 2892 120 +96 2908 89 +97 2943 60 +98 2964 29 +99 2980 0 diff --git a/catboost_info/tmp/cat_feature_index.17f2f383-11eeddeb-4a42fe68-9d5bbcbb.tmp b/catboost_info/tmp/cat_feature_index.17f2f383-11eeddeb-4a42fe68-9d5bbcbb.tmp new file mode 100644 index 0000000000000000000000000000000000000000..593f4708db84ac8fd0f5cc47c634f38c013fe9e4 Binary files /dev/null and b/catboost_info/tmp/cat_feature_index.17f2f383-11eeddeb-4a42fe68-9d5bbcbb.tmp differ diff --git a/datasets/.DS_Store b/datasets/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..3fce4d53a361987c1e1e990bb56cc9bfaa6877b1 Binary files /dev/null and b/datasets/.DS_Store differ diff --git a/datasets/cs-test.csv b/datasets/cs-test.csv new file mode 100644 index 0000000000000000000000000000000000000000..ec6e048dc0c6a0ebea3c4049222a0bb9e6126102 --- /dev/null +++ b/datasets/cs-test.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bab363a2a807218d32a51f5fc9668b8be7977795065edd386abc8546abaa5b78 +size 4983329 diff --git a/datasets/cs-training.csv b/datasets/cs-training.csv new file mode 100644 index 0000000000000000000000000000000000000000..df0c937285dba20c9e4673c5639e8a07c1e93254 --- /dev/null +++ b/datasets/cs-training.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bd46da486a5708c58c7b01a034fae2a13b327f6f7b62ea7ba4fe3b5824b24ac +size 7564965 diff --git a/datasets/predict_data.csv b/datasets/predict_data.csv new file mode 100644 index 0000000000000000000000000000000000000000..b4a45a878fcfa0607470fe9b8aaeec1f4aed5d94 --- /dev/null +++ b/datasets/predict_data.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:986bef87a56fba2b0d1b30d24d6dfcfe3cc05a7a719dc86b84044b2af8a23b26 +size 18759521 diff --git a/models/.DS_Store b/models/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..7f2f4ec3103522b0e4fa2302d5fec383cbce845b Binary files /dev/null and b/models/.DS_Store differ diff --git a/models/best/.DS_Store b/models/best/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..d985665ce64db2dd49a6e09f684dc086f0cee9cb Binary files /dev/null and b/models/best/.DS_Store differ diff --git a/models/best/train_120/.DS_Store b/models/best/train_120/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 Binary files /dev/null and b/models/best/train_120/.DS_Store differ diff --git a/models/best/train_120/catboost_model.pkl b/models/best/train_120/catboost_model.pkl new file mode 100644 index 0000000000000000000000000000000000000000..86c64b3e8a31fd196c6e4a222f17dd0935a2cd96 --- /dev/null +++ b/models/best/train_120/catboost_model.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d634b97cd239b33b6b776bb6224c744ad4aeb36b17ef9a213078a11b15b41378 +size 302054 diff --git a/models/best/train_120/dtc_model.pkl b/models/best/train_120/dtc_model.pkl new file mode 100644 index 0000000000000000000000000000000000000000..676605cd6da389b431f67e66963c82ee7a32e5b6 --- /dev/null +++ b/models/best/train_120/dtc_model.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffd7b1e5c945ec288d566d5c242216bacda5fba4f38386e330da5954025d62dd +size 7049 diff --git a/models/best/train_120/lgbm_model.pkl b/models/best/train_120/lgbm_model.pkl new file mode 100644 index 0000000000000000000000000000000000000000..de22bbdf58e99286a8443140143ecada493e64ea --- /dev/null +++ b/models/best/train_120/lgbm_model.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95356d4825d459969ac9d9a914d86bc41a27bbd369efca2c586db96974edcc41 +size 354372 diff --git a/models/best/train_120/logreg_model.pkl b/models/best/train_120/logreg_model.pkl new file mode 100644 index 0000000000000000000000000000000000000000..d583f84f55936f918add88648cf6bbcb26a4f1f1 --- /dev/null +++ b/models/best/train_120/logreg_model.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cd416fa6713c982afa2abc7581c650c144e560f511fcd8adcf3aba8766b02e3 +size 1755 diff --git a/models/best/train_120/logreg_model_old.pkl b/models/best/train_120/logreg_model_old.pkl new file mode 100644 index 0000000000000000000000000000000000000000..5a7ea7b4cabd9fd26c83d52479bd63bc46f2e480 --- /dev/null +++ b/models/best/train_120/logreg_model_old.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07483fea4ce8d2a2eaf76b88874e3cb6931c6a994b3b6107b46ec5aa9de2c3e3 +size 1695 diff --git a/models/best/train_120/rfc_model.pkl b/models/best/train_120/rfc_model.pkl new file mode 100644 index 0000000000000000000000000000000000000000..262a910e050ce128133ea1827eba29041908b697 --- /dev/null +++ b/models/best/train_120/rfc_model.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81169937442a65f05faf04ab2cb8d0aea8d80aa23b5e8ec26490c93f57bfcfef +size 2924569 diff --git a/models/best/train_120/xgb_model.pkl b/models/best/train_120/xgb_model.pkl new file mode 100644 index 0000000000000000000000000000000000000000..f187716dccbb6949b890a001b32e9266d243a45e --- /dev/null +++ b/models/best/train_120/xgb_model.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf5146efea09c65bfd14b968335d57ed5d415423d9d54df2e9a8cf96a4424c1e +size 173239 diff --git a/models/best/train_150/.DS_Store b/models/best/train_150/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 Binary files /dev/null and b/models/best/train_150/.DS_Store differ diff --git a/models/best/train_150/catboost_150_model.pkl b/models/best/train_150/catboost_150_model.pkl new file mode 100644 index 0000000000000000000000000000000000000000..997feb6b44eda533331b230b95b4920f52fdc49b --- /dev/null +++ b/models/best/train_150/catboost_150_model.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84ccf7f069ff7a6899f449d24a2901cbac7e376ae559747d107168ee98c0aab0 +size 329566 diff --git a/models/best/train_150/dtc_150_model.pkl b/models/best/train_150/dtc_150_model.pkl new file mode 100644 index 0000000000000000000000000000000000000000..0ac32edab54ff273c0895f748f4d564f87495c08 --- /dev/null +++ b/models/best/train_150/dtc_150_model.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84e84f230fe512f3ba48c459f23d7c5d3ace7fc52c1d054fcaa6a503de1f870d +size 7065 diff --git a/models/best/train_150/lgbm_150_model.pkl b/models/best/train_150/lgbm_150_model.pkl new file mode 100644 index 0000000000000000000000000000000000000000..8b09ff9a41d3c83a381d4498c485dbd56d86e494 --- /dev/null +++ b/models/best/train_150/lgbm_150_model.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:187132edf48b058a95a421f0670646d46d995c6dfaa749114482ab89ba5514a0 +size 355044 diff --git a/models/best/train_150/logreg_150_model.pkl b/models/best/train_150/logreg_150_model.pkl new file mode 100644 index 0000000000000000000000000000000000000000..7335583e2f580f710af7e6ca1bf9a169a2fb139c --- /dev/null +++ b/models/best/train_150/logreg_150_model.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b4ea65d7a9afd3b4e337cb51114e173ca19e967490951df2a2ffb0cab84a269 +size 1707 diff --git a/models/best/train_150/rfc_150_model.pkl b/models/best/train_150/rfc_150_model.pkl new file mode 100644 index 0000000000000000000000000000000000000000..175078043e00c967cd6bb8be9f6c9a3f8f0aa282 --- /dev/null +++ b/models/best/train_150/rfc_150_model.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58429f92c05d0cd87461ee397f937a298d2da54fab8d3b53063e608689ae34f7 +size 2995129 diff --git a/models/best/train_150/xgb_150_model.pkl b/models/best/train_150/xgb_150_model.pkl new file mode 100644 index 0000000000000000000000000000000000000000..13693817ee734f7b507bfb405e6e3019a2a6c774 --- /dev/null +++ b/models/best/train_150/xgb_150_model.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28a9f915d33b162454b3846105c812aa725a7fb4afd93795a62e3bdb183c0daf +size 173459 diff --git a/ssh b/ssh new file mode 100644 index 0000000000000000000000000000000000000000..cf352294f86d24bbb9cb298baa39b97ebf1479b6 --- /dev/null +++ b/ssh @@ -0,0 +1,8 @@ +-----BEGIN OPENSSH PRIVATE KEY----- +b3BlbnNzaC1rZXktdjEAAAAACmFlczI1Ni1jdHIAAAAGYmNyeXB0AAAAGAAAABCKliKJ7r +fL5JFyzIIGpE2XAAAAGAAAAAEAAAAzAAAAC3NzaC1lZDI1NTE5AAAAIBPBcCLBErmYfAIq +3r1vGRJGbbtI3tuOqo3jpDnoCYkWAAAAkCIMM2hOAXl5ooEsSA7+f/xIvjAziTvEgEyG4U +bmuKh2hFSgmGvCK7P9IRQ9zfAQ40mJKwCKeZMoh2+Dj0V+8gFJc/dY/C17ioTsfOQeIpW5 +tWGYIIeGPycWEquyC1+yHv5XpgqPBIout9nZrsBYQl2ZhAvQ1KdOBmCWbDN2g6NNF04oC2 +aLP3QoQnCiNx4+Sw== +-----END OPENSSH PRIVATE KEY----- diff --git a/ssh.pub b/ssh.pub new file mode 100644 index 0000000000000000000000000000000000000000..3c2c9f573218f84731f1b53001c7131d9d9334b4 --- /dev/null +++ b/ssh.pub @@ -0,0 +1 @@ +ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBPBcCLBErmYfAIq3r1vGRJGbbtI3tuOqo3jpDnoCYkW svik05