This view is limited to 50 files because it contains too many changes. See the raw diff here.
Files changed (50) hide show
  1. .DS_Store +0 -0
  2. .gitattributes +1 -0
  3. app/.DS_Store +0 -0
  4. app/.streamlit/config.toml +2 -0
  5. app/__init__.py +0 -0
  6. app/__pycache__/__init__.cpython-311.pyc +0 -0
  7. app/main.py +33 -0
  8. app/models/__pycache__/escalation.cpython-311.pyc +0 -0
  9. app/models/__pycache__/interpretation.cpython-311.pyc +0 -0
  10. app/models/escalation.py +267 -0
  11. app/models/interpretation.py +194 -0
  12. app/pages/__pycache__/application.cpython-311.pyc +0 -0
  13. app/pages/__pycache__/simulation.cpython-311.pyc +0 -0
  14. app/pages/application.py +329 -0
  15. app/pages/simulation.py +345 -0
  16. app/simulation/.DS_Store +0 -0
  17. app/simulation/__init__.py +0 -0
  18. app/simulation/__pycache__/__init__.cpython-311.pyc +0 -0
  19. app/simulation/controllers/__init__.py +0 -0
  20. app/simulation/controllers/__pycache__/__init__.cpython-311.pyc +0 -0
  21. app/simulation/controllers/__pycache__/base.cpython-311.pyc +0 -0
  22. app/simulation/controllers/__pycache__/pid.cpython-311.pyc +0 -0
  23. app/simulation/controllers/base.py +28 -0
  24. app/simulation/controllers/pid.py +129 -0
  25. app/simulation/core/__init__.py +0 -0
  26. app/simulation/core/__pycache__/__init__.cpython-311.pyc +0 -0
  27. app/simulation/core/__pycache__/processor.cpython-311.pyc +0 -0
  28. app/simulation/core/__pycache__/traffic_generator.cpython-311.pyc +0 -0
  29. app/simulation/core/processor.py +339 -0
  30. app/simulation/core/traffic_generator.py +234 -0
  31. app/simulation/visualization/__init__.py +0 -0
  32. app/simulation/visualization/__pycache__/__init__.cpython-311.pyc +0 -0
  33. app/simulation/visualization/__pycache__/animation.cpython-311.pyc +0 -0
  34. app/simulation/visualization/__pycache__/plots.cpython-311.pyc +0 -0
  35. app/simulation/visualization/animation.py +246 -0
  36. app/simulation/visualization/plots.py +374 -0
  37. app/simulation/visualization/simulation_20:11.gif +0 -0
  38. app/simulation/visualization/simulation_20:19.gif +0 -0
  39. app/simulation/visualization/simulation_20:25.gif +0 -0
  40. app/simulation/visualization/simulation_20:30.gif +0 -0
  41. app/utils/__pycache__/credit_preprocessor.cpython-311.pyc +0 -0
  42. app/utils/__pycache__/data_loader.cpython-311.pyc +0 -0
  43. app/utils/credit_preprocessor.py +329 -0
  44. app/utils/data_loader.py +26 -0
  45. catboost_info/catboost_training.json +104 -0
  46. catboost_info/learn/events.out.tfevents +3 -0
  47. catboost_info/learn_error.tsv +101 -0
  48. catboost_info/time_left.tsv +101 -0
  49. catboost_info/tmp/cat_feature_index.17f2f383-11eeddeb-4a42fe68-9d5bbcbb.tmp +0 -0
  50. datasets/.DS_Store +0 -0
.DS_Store ADDED
Binary file (12.3 kB). View file
 
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.csv filter=lfs diff=lfs merge=lfs -text
app/.DS_Store ADDED
Binary file (10.2 kB). View file
 
app/.streamlit/config.toml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ [client]
2
+ showSidebarNavigation = false
app/__init__.py ADDED
File without changes
app/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (169 Bytes). View file
 
app/main.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import sys
4
+
5
+ sys.path.append(os.path.dirname(os.path.dirname(__file__)))
6
+
7
+ st.set_page_config(
8
+ page_title="GiveMeSomeCredit",
9
+ page_icon="🏦",
10
+ layout="wide",
11
+ initial_sidebar_state="collapsed" # ← сворачивает сайдбар по умолчанию
12
+ )
13
+
14
+
15
+ st.title("🏦 GiveMeSomeCredit - Кредитный скоринг")
16
+ st.markdown("---")
17
+
18
+ col1, col2 = st.columns(2)
19
+
20
+ with col1:
21
+ st.subheader("📝 Анкета")
22
+ if st.button("Перейти к анкете"):
23
+ st.switch_page("pages/application.py") # ← вызовет main()
24
+
25
+ with col2:
26
+ st.subheader("📊 Симуляция")
27
+ if st.button("Перейти к симуляции"):
28
+ st.switch_page("pages/simulation.py") # ← вызовет main()
29
+
30
+ st.markdown("---")
31
+
32
+ # streamlit run app/main.py
33
+
app/models/__pycache__/escalation.cpython-311.pyc ADDED
Binary file (10.9 kB). View file
 
app/models/__pycache__/interpretation.cpython-311.pyc ADDED
Binary file (14.9 kB). View file
 
app/models/escalation.py ADDED
@@ -0,0 +1,267 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+
4
+ def check_business_rules(df):
5
+ """
6
+ Батчевая проверка бизнес-правил
7
+
8
+ Возвращает:
9
+ - manual_mask: булев массив (True = в ручной разбор)
10
+ - auto_reject_mask: булев массив (True = сразу отказ)
11
+ - messages: массив сообщений
12
+ - auto_decisions: массив решений для auto_reject_mask (всегда 1 - отказ)
13
+ """
14
+ n = len(df)
15
+ manual_mask = np.zeros(n, dtype=bool)
16
+ auto_reject_mask = np.zeros(n, dtype=bool)
17
+ messages = [''] * n
18
+ auto_decisions = np.zeros(n, dtype=int)
19
+
20
+ # Извлекаем колонки
21
+ age = df['age'].fillna(0).values
22
+ monthly_income = df['MonthlyIncome'].fillna(0).values
23
+ debt_ratio = df['DebtRatio'].fillna(0).values
24
+ monthly_debt = np.where(monthly_income > 0,
25
+ debt_ratio * monthly_income,
26
+ debt_ratio)
27
+
28
+ late_90 = df['NumberOfTimes90DaysLate'].fillna(0).values
29
+ late_60_89 = df['NumberOfTime60-89DaysPastDueNotWorse'].fillna(0).values
30
+ late_30_59 = df['NumberOfTime30-59DaysPastDueNotWorse'].fillna(0).values
31
+
32
+ real_estate = df['NumberRealEstateLoansOrLines'].fillna(0).values
33
+ utilization = df['RevolvingUtilizationOfUnsecuredLines'].fillna(0).values
34
+
35
+ # 1. КРИТИЧЕСКИЕ ПРАВИЛА - сразу отказ
36
+ mask = (age < 18)
37
+ auto_reject_mask[mask] = True
38
+ auto_decisions[mask] = 1
39
+ messages = np.where(mask, 'Возраст менее 18 лет - кредит не выдаётся', messages)
40
+
41
+ # 2. СПЕЦИАЛЬНЫЕ БАНКОВСКИЕ КОДЫ - сразу ручной разбор
42
+ mask = (late_90 == 98) | (late_60_89 == 98) | (late_30_59 == 98)
43
+ manual_mask[mask] = True
44
+ messages = np.where(mask, 'Код 98: Списание долга как безнадежного', messages)
45
+
46
+ mask = (late_90 == 96) | (late_60_89 == 96) | (late_30_59 == 96)
47
+ manual_mask[mask] = True
48
+ messages = np.where(mask, 'Код 96: Изъятие залога или реализация имущества', messages)
49
+
50
+ # 3. КРИТИЧЕСКИЕ ПРАВИЛА - сразу ручной разбор
51
+ mask = (age > 80)
52
+ manual_mask[mask] = True
53
+ messages = np.where(mask, 'Возраст > 80 лет - требуется ручной разбор (индивидуальные условия)', messages)
54
+
55
+ mask = (monthly_income > 1000000)
56
+ manual_mask[mask] = True
57
+ messages = np.where(mask, 'Доход свыше 1,000,000 $ - требуется ручной разбор', messages)
58
+
59
+ mask = (monthly_debt > 1000000)
60
+ manual_mask[mask] = True
61
+ messages = np.where(mask, 'Платежи свыше 1,000,000 $ - требуется ручной разбор', messages)
62
+
63
+ mask = (utilization > 2)
64
+ manual_mask[mask] = True
65
+ messages = np.where(mask, 'Использование кредитных средств превышает 200%', messages)
66
+
67
+ mask = (real_estate > 20)
68
+ manual_mask[mask] = True
69
+ messages = np.where(mask, 'Количество кредитов под залог недвижимости слишком велико - ручной разбор', messages)
70
+
71
+ # print(f"age: min={age.min()}, max={age.max()}")
72
+ # print(f"income: max={monthly_income.max()}")
73
+ # print(f"late_90: values 96/98: {np.sum((late_90 == 96) | (late_90 == 98))}")
74
+ # print(f"utilization: max={utilization.max()}")
75
+ # print(f"real_estate: max={real_estate.max()}")
76
+
77
+ return manual_mask, auto_reject_mask, messages, auto_decisions
78
+
79
+
80
+ def escalation_decision(applications_df, lr_model, second_model, second_model_name,
81
+ threshold=0.5, lr_margins=[0.35], second_margins=[0.4],
82
+ preprocessor=None, scaler=None):
83
+ """
84
+ Универсальная эскалационная логика
85
+
86
+ 1. Бизнес-правила:
87
+ - часть заявок сразу в ручной разбор
88
+ - часть заявок сразу отказ
89
+ 2. Оставшиеся -> LR
90
+ 3. Если LR неуверена -> вторая модель
91
+ """
92
+ n = len(applications_df)
93
+ decisions = [None] * n
94
+ manual_mask = np.zeros(n, dtype=bool)
95
+
96
+ # СЧЁТЧИКИ
97
+ stats = {
98
+ 'business_manual': 0, # ручной разбор по бизнес-правилам
99
+ 'business_auto': 0, # авто отказ по бизнес-правилам
100
+ 'lr_confident': 0, # уверенно решены LR
101
+ 'second_confident': 0, # уверенно решены второй моделью
102
+ 'second_uncertain': 0, # неуверенность второй модели → ручной
103
+ 'total': n
104
+ }
105
+
106
+ # 1. Бизнес-правила
107
+ bus_manual_mask, bus_reject_mask, bus_messages, bus_decisions = check_business_rules(applications_df)
108
+ # После check_business_rules
109
+ #print(f"Бизнес-правила: manual={bus_manual_mask.sum()}, auto_reject={bus_reject_mask.sum()}")
110
+ # Обрабатываем сразу отказ
111
+ for i in range(n):
112
+ if bus_reject_mask[i]:
113
+ stats['business_auto'] += 1
114
+ decisions[i] = {
115
+ 'final_decision': 1,
116
+ 'model_used': 'Business Rules',
117
+ 'needs_review': False,
118
+ 'probability': 1.0,
119
+ 'message': bus_messages[i],
120
+ 'lr_proba': None,
121
+ 'second_proba': None,
122
+ 'decision_path': [f"❌ Бизнес-правила: {bus_messages[i]}"]
123
+ }
124
+
125
+ # Обрабатываем сразу ручной разбор
126
+ for i in range(n):
127
+ if bus_manual_mask[i]:
128
+ stats['business_manual'] += 1
129
+ manual_mask[i] = True
130
+ decisions[i] = {
131
+ 'final_decision': None,
132
+ 'model_used': 'Business Rules',
133
+ 'needs_review': True,
134
+ 'probability': None,
135
+ 'message': bus_messages[i],
136
+ 'lr_proba': None,
137
+ 'second_proba': None,
138
+ 'decision_path': [f"⚠️ Бизнес-правила: {bus_messages[i]}"]
139
+ }
140
+
141
+ # 2. Заявки, которые идут к моделям (не отсеялись бизнес-правилами)
142
+ model_indices = [i for i in range(n) if decisions[i] is None]
143
+
144
+ if not model_indices:
145
+ return decisions, manual_mask, stats
146
+
147
+ # 3. Обработка моделями
148
+ df_models = applications_df.iloc[model_indices]
149
+
150
+ # Препроцессинг
151
+ processed = preprocessor.transform(df_models)
152
+ processed_scaled = scaler.transform(processed)
153
+
154
+ # LR предсказания (батч)
155
+ lr_probas = lr_model.predict_proba(processed_scaled)[:, 1]
156
+
157
+ # Определяем отступы для LR
158
+ if len(lr_margins) == 1:
159
+ lr_low = lr_high = lr_margins[0]
160
+ else:
161
+ lr_low, lr_high = lr_margins[0], lr_margins[1]
162
+
163
+ # Проверяем уверенность LR
164
+ lr_confident = np.zeros(len(model_indices), dtype=bool)
165
+ lr_margin_values = np.zeros(len(model_indices))
166
+
167
+ for j, proba in enumerate(lr_probas):
168
+ if proba < threshold:
169
+ margin = threshold - proba
170
+ lr_confident[j] = margin >= lr_low
171
+ else:
172
+ margin = proba - threshold
173
+ lr_confident[j] = margin >= lr_high
174
+ lr_margin_values[j] = margin
175
+
176
+ # Обрабатываем уверенные LR
177
+ for j, idx in enumerate(model_indices):
178
+ if lr_confident[j]:
179
+ stats['lr_confident'] += 1
180
+ decisions[idx] = {
181
+ 'final_decision': int(lr_probas[j] >= threshold),
182
+ 'probability': lr_probas[j],
183
+ 'model_used': 'Logistic Regression',
184
+ 'needs_review': False,
185
+ 'lr_proba': lr_probas[j],
186
+ 'second_proba': None,
187
+ 'lr_margin': lr_margin_values[j],
188
+ 'lr_confident': True,
189
+ 'second_used': False,
190
+ 'decision_path': [
191
+ f"1️⃣ Logistic Regression: {lr_probas[j]:.1%} (отступ: {lr_margin_values[j]:.1%})",
192
+ f" ✅ LR уверена - финальное решение"
193
+ ]
194
+ }
195
+
196
+ # Неуверенные LR - идут ко второй модели
197
+ uncertain_indices = [model_indices[j] for j in range(len(model_indices)) if not lr_confident[j]]
198
+
199
+ if uncertain_indices:
200
+ # Находим позиции неуверенных заявок
201
+ uncertain_positions = [j for j in range(len(model_indices)) if not lr_confident[j]]
202
+ processed_uncertain_scaled = processed_scaled.iloc[uncertain_positions]
203
+
204
+ # Вторая модель (батч)
205
+ second_probas = second_model.predict_proba(processed_uncertain_scaled)[:, 1]
206
+
207
+ # Определяем отступы для второй модели
208
+ if len(second_margins) == 1:
209
+ second_low = second_high = second_margins[0]
210
+ else:
211
+ second_low, second_high = second_margins[0], second_margins[1]
212
+
213
+ # Проверяем уверенность второй модели
214
+ for k, idx in enumerate(uncertain_indices):
215
+ proba = second_probas[k]
216
+ if proba < threshold:
217
+ second_margin = threshold - proba
218
+ second_confident = second_margin >= second_low
219
+ else:
220
+ second_margin = proba - threshold
221
+ second_confident = second_margin >= second_high
222
+
223
+ # Формируем decision_path
224
+ path = [
225
+ f"1️⃣ Logistic Regression: {lr_probas[uncertain_positions[k]]:.1%} (отступ: {lr_margin_values[uncertain_positions[k]]:.1%})",
226
+ f" ⚠️ LR не уверена → вызываем {second_model_name}",
227
+ f"2️⃣ {second_model_name}: {proba:.1%} (отступ: {second_margin:.1%})"
228
+ ]
229
+
230
+ if second_confident:
231
+ stats['second_confident'] += 1
232
+ path.append(f" ✅ {second_model_name} уверен - финальное решение")
233
+ decisions[idx] = {
234
+ 'final_decision': int(proba >= threshold),
235
+ 'probability': proba,
236
+ 'model_used': second_model_name,
237
+ 'needs_review': False,
238
+ 'lr_proba': lr_probas[uncertain_positions[k]],
239
+ 'second_proba': proba,
240
+ 'lr_margin': lr_margin_values[uncertain_positions[k]],
241
+ 'second_margin': second_margin,
242
+ 'lr_confident': False,
243
+ 'second_confident': True,
244
+ 'second_used': True,
245
+ 'decision_path': path
246
+ }
247
+ else:
248
+ stats['second_uncertain'] += 1
249
+ path.append(f" ⚠️ {second_model_name} не уверен → ручной разбор")
250
+ manual_mask[idx] = True
251
+ decisions[idx] = {
252
+ 'final_decision': None,
253
+ 'probability': proba,
254
+ 'model_used': 'Manual Review',
255
+ 'needs_review': True,
256
+ 'lr_proba': lr_probas[uncertain_positions[k]],
257
+ 'second_proba': proba,
258
+ 'lr_margin': lr_margin_values[uncertain_positions[k]],
259
+ 'second_margin': second_margin,
260
+ 'lr_confident': False,
261
+ 'second_confident': False,
262
+ 'second_used': True,
263
+ 'message': 'Модели не уверены в решении',
264
+ 'decision_path': path
265
+ }
266
+
267
+ return decisions, manual_mask, stats
app/models/interpretation.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+
6
+ FEATURE_DESCRIPTIONS = { ... }
7
+
8
+ def get_feature_display_name(feature_name):
9
+ if feature_name in FEATURE_DESCRIPTIONS:
10
+ return FEATURE_DESCRIPTIONS[feature_name]
11
+ name = feature_name.replace('_', ' ').title()
12
+ name = name.replace('Over', '>')
13
+ name = name.replace('Loans', 'Кредитов')
14
+ return name
15
+
16
+
17
+ def interpret_lr(features, lr_model, feature_names):
18
+ """Интерпретация логистической регрессии"""
19
+ if isinstance(features, np.ndarray):
20
+ features = pd.DataFrame(features, columns=feature_names)
21
+ coefficients = lr_model.coef_[0]
22
+ intercept = lr_model.intercept_[0]
23
+
24
+ importance_df = pd.DataFrame({
25
+ 'feature': feature_names,
26
+ 'coefficient': coefficients,
27
+ 'value': features.iloc[0].values
28
+ })
29
+ importance_df['logit_contribution'] = importance_df['coefficient'] * importance_df['value']
30
+ importance_df['abs_logit'] = abs(importance_df['logit_contribution'])
31
+ importance_df = importance_df.sort_values('abs_logit', ascending=False)
32
+
33
+ base_proba = lr_model.predict_proba(features)[0, 1]
34
+ marginal_effects = []
35
+ features_array = features.values
36
+
37
+ for i, feature in enumerate(feature_names):
38
+ features_zero = features_array.copy()
39
+ features_zero[0, i] = 0
40
+ zero_proba = lr_model.predict_proba(features_zero)[0, 1]
41
+ marginal_effect = base_proba - zero_proba
42
+ marginal_effects.append({
43
+ 'feature': feature,
44
+ 'marginal_effect': marginal_effect,
45
+ 'abs_marginal': abs(marginal_effect)
46
+ })
47
+
48
+ marginal_df = pd.DataFrame(marginal_effects).sort_values('abs_marginal', ascending=False)
49
+
50
+ logit = intercept + importance_df['logit_contribution'].sum()
51
+ proba = 1 / (1 + np.exp(-logit))
52
+
53
+ return {
54
+ 'logit_contributions': importance_df,
55
+ 'marginal_effects': marginal_df,
56
+ 'probability': proba,
57
+ 'logit': logit,
58
+ 'intercept': intercept
59
+ }
60
+
61
+ def plot_feature_importance_sns(importance_df, value_col='logit_contribution', title="Вклад признаков в логит"):
62
+ df = importance_df.head(10).copy()
63
+ df = df.sort_values(value_col, ascending=True)
64
+
65
+ fig, ax = plt.subplots(figsize=(10, 6), facecolor='#f8f9fa')
66
+ ax.set_facecolor('#f8f9fa')
67
+
68
+ colors = ['#d7191c' if x > 0 else '#1a9641' if x < 0 else '#ffffbf' for x in df[value_col]]
69
+ bars = ax.barh(df['feature'], df[value_col], color=colors, edgecolor='white', linewidth=1.5, alpha=0.9)
70
+
71
+ for bar, val in zip(bars, df[value_col]):
72
+ if abs(val) > 0.02:
73
+ x_pos = val - 0.02 if val > 0 else val + 0.02
74
+ ha = 'right' if val > 0 else 'left'
75
+ ax.text(x_pos, bar.get_y() + bar.get_height() / 2, f'{val:.3f}', ha=ha, va='center', fontsize=9)
76
+
77
+ ax.axvline(x=0, color='#495057', linestyle='-', linewidth=1, alpha=0.3)
78
+ ax.grid(axis='x', alpha=0.15, linestyle='--', color='#adb5bd')
79
+ ax.set_axisbelow(True)
80
+ ax.set_xlabel('Вклад в логит', fontsize=11)
81
+ ax.set_ylabel('')
82
+ ax.set_title(title, fontsize=12, fontweight='bold', pad=15)
83
+ ax.set_yticklabels([get_feature_display_name(x) for x in df['feature']], fontsize=10)
84
+ ax.set_yticklabels([get_feature_display_name(x) for x in df['feature']], fontsize=10)
85
+ sns.despine(top=True, right=True, left=False, bottom=False)
86
+ plt.tight_layout()
87
+ return fig
88
+
89
+ def plot_marginal_effects_sns(marginal_df, title="Влияние на вероятность дефолта"):
90
+ df = marginal_df.head(10).copy()
91
+ df = df.sort_values('marginal_effect', ascending=True)
92
+
93
+ fig, ax = plt.subplots(figsize=(10, 6), facecolor='#f8f9fa')
94
+ ax.set_facecolor('#f8f9fa')
95
+
96
+ colors = ['#d7191c' if x > 0 else '#1a9641' if x < 0 else '#ffffbf' for x in df['marginal_effect']]
97
+ bars = ax.barh(df['feature'], df['marginal_effect'], color=colors, edgecolor='white', linewidth=1.5, alpha=0.9)
98
+
99
+ for bar, val in zip(bars, df['marginal_effect']):
100
+ if abs(val) > 0.01:
101
+ x_pos = val - 0.01 if val > 0 else val + 0.01
102
+ ha = 'right' if val > 0 else 'left'
103
+ ax.text(x_pos, bar.get_y() + bar.get_height() / 2, f'{val:.1%}', ha=ha, va='center', fontsize=9)
104
+
105
+ ax.axvline(x=0, color='#495057', linestyle='-', linewidth=1, alpha=0.3)
106
+ ax.grid(axis='x', alpha=0.15, linestyle='--', color='#adb5bd')
107
+ ax.set_axisbelow(True)
108
+ ax.set_xlabel('Изменение вероятности', fontsize=11)
109
+ ax.set_ylabel('')
110
+ ax.set_title(title, fontsize=12, fontweight='bold', pad=15)
111
+ ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'{x:.0%}'))
112
+ ax.set_yticklabels([get_feature_display_name(x) for x in df['feature']], fontsize=10)
113
+ sns.despine(top=True, right=True, left=False, bottom=False)
114
+ plt.tight_layout()
115
+ return fig
116
+
117
+
118
+ def plot_shap_analysis(second_model, processed_scaled, feature_names, second_model_name):
119
+ """Отображение SHAP анализа для tree-based моделей"""
120
+ import streamlit as st
121
+ st.markdown("---")
122
+ st.subheader(f"⚡ Детальный анализ: {second_model_name} (SHAP)")
123
+
124
+ with st.spinner("🔄 Рассчитываем SHAP значения..."):
125
+ try:
126
+ import shap
127
+
128
+ # Создаем explainer и считаем SHAP
129
+ explainer = shap.TreeExplainer(second_model)
130
+ shap_values = explainer.shap_values(processed_scaled)
131
+
132
+ # Для бинарной классификации
133
+ if isinstance(shap_values, list):
134
+ shap_values = shap_values[1]
135
+
136
+ # 1. Waterfall plot
137
+ fig, ax = plt.subplots(figsize=(12, 7))
138
+ shap.waterfall_plot(
139
+ shap.Explanation(
140
+ values=shap_values[0],
141
+ base_values=explainer.expected_value,
142
+ data=processed_scaled.iloc[0].values,
143
+ feature_names=feature_names
144
+ ),
145
+ show=False,
146
+ )
147
+ plt.tight_layout()
148
+ st.pyplot(fig)
149
+
150
+ # 2. Объяснение как читать график
151
+ with st.expander("📋 Как читать SHAP график?"):
152
+ st.markdown("""
153
+ - **f(x)** = итоговое предсказание модели
154
+ - **base value** = среднее предсказание по всем клиентам
155
+ - 🔴 Красное → признаки, повышающие риск
156
+ - 🔵 Синее → признаки, снижающие риск
157
+ """)
158
+
159
+ # 3. Таблица с SHAP значениями
160
+ shap_df = pd.DataFrame({
161
+ 'feature': feature_names,
162
+ 'shap_value': shap_values[0],
163
+ 'abs_shap': abs(shap_values[0])
164
+ }).sort_values('abs_shap', ascending=False)
165
+
166
+ shap_df['description'] = shap_df['feature'].apply(get_feature_display_name)
167
+
168
+ st.markdown("### 📋 Факторы, влияющие на решение:")
169
+
170
+ col1, col2 = st.columns(2)
171
+
172
+ with col1:
173
+ pos = shap_df[shap_df['shap_value'] > 0].head(5)
174
+ if len(pos) > 0:
175
+ st.markdown("**🔴 Повышают риск:**")
176
+ for _, row in pos.iterrows():
177
+ st.markdown(f"- {row['description']}: +{row['shap_value']:.3f}")
178
+
179
+ with col2:
180
+ neg = shap_df[shap_df['shap_value'] < 0].head(5)
181
+ if len(neg) > 0:
182
+ st.markdown("**🟢 Снижают риск:**")
183
+ for _, row in neg.iterrows():
184
+ st.markdown(f"- {row['description']}: {row['shap_value']:.3f}")
185
+
186
+ with st.expander("📋 Все SHAP значения"):
187
+ display_df = shap_df[['feature', 'description', 'shap_value']].copy()
188
+ display_df.columns = ['Признак', 'Описание', 'SHAP']
189
+ display_df['SHAP'] = display_df['SHAP'].round(3)
190
+ st.dataframe(display_df.sort_values('SHAP', ascending=False), width='stretch')
191
+
192
+ except Exception as e:
193
+ st.error(f"❌ Ошибка SHAP: {e}")
194
+ st.info("Установите shap: `pip install shap`")
app/pages/__pycache__/application.cpython-311.pyc ADDED
Binary file (22.7 kB). View file
 
app/pages/__pycache__/simulation.cpython-311.pyc ADDED
Binary file (8.93 kB). View file
 
app/pages/application.py ADDED
@@ -0,0 +1,329 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import os
4
+ from app.utils.data_loader import load_artifacts
5
+ from app.models.escalation import escalation_decision
6
+ from app.models.interpretation import (
7
+ interpret_lr, plot_feature_importance_sns,
8
+ plot_marginal_effects_sns, plot_shap_analysis,
9
+ get_feature_display_name
10
+ )
11
+ from app.utils.credit_preprocessor import check_business_rules
12
+
13
+ # Пути
14
+ PROJECT_PATH = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
15
+ MODELS_PATH = os.path.join(PROJECT_PATH, 'models/best/train_150/')
16
+ PREPROCESSOR_PATH = os.path.join(PROJECT_PATH, 'preprocessors/')
17
+
18
+
19
+ def main():
20
+ st.title("🏦 Кредитный скоринг - Анкета")
21
+
22
+ # Загрузка артефактов
23
+ preprocessor, scaler, models = load_artifacts(MODELS_PATH, PREPROCESSOR_PATH)
24
+
25
+ # Инициализация статистики
26
+ if 'stats' not in st.session_state:
27
+ st.session_state.stats = {
28
+ 'total': 0,
29
+ 'manual': 0,
30
+ 'lr_confident': 0,
31
+ 'second_used': 0,
32
+ 'second_confident': 0,
33
+ 'approved': 0,
34
+ 'declined': 0
35
+ }
36
+
37
+ if 'step' not in st.session_state:
38
+ st.session_state.step = 'input'
39
+
40
+ # ВВОД ДАННЫХ
41
+
42
+ if st.session_state.step == 'input':
43
+ st.header("📋 Анкета заемщика")
44
+
45
+ with st.form("credit_form"):
46
+ st.subheader("👤 Личная информация")
47
+ col1, col2 = st.columns(2)
48
+ with col1:
49
+ age = st.number_input("Возраст", 0, 150, 35)
50
+ with col2:
51
+ dependents = st.number_input("Иждивенцы", 0, 20, 0)
52
+
53
+ st.subheader("💰 Ежемесячный доход")
54
+ income_method = st.radio("Способ указания дохода", ["Слайдер (до 20,000$)", "Точное значение"],
55
+ horizontal=True)
56
+
57
+ st.subheader("💳 Ежемесячные платежи")
58
+ debt_method = st.radio("Способ указания платежей", ["Слайдер (до 10,000$)", "Точное значение"],
59
+ horizontal=True)
60
+
61
+ st.subheader("📊 Кредитная история")
62
+ credit_lines = st.number_input("Открытых кредитов и карт", 0, 100, 5)
63
+ real_estate = st.number_input("Кредитов под залог недвижимости", 0, 100, 1)
64
+
65
+ st.subheader("📈 Использование лимитов")
66
+ util_method = st.radio("Уровень использования",
67
+ ["Норма (0-100%)", "Овердрафт (100-200%)", "Экстремальный (>200%)"], horizontal=True)
68
+
69
+ st.subheader("⏱️ Просрочки за последние 2 года")
70
+ col1, col2, col3 = st.columns(3)
71
+ with col1:
72
+ late_30_59 = st.number_input("30-59 дней", 0, 100, 0)
73
+ with col2:
74
+ late_60_89 = st.number_input("60-89 дней", 0, 100, 0)
75
+ with col3:
76
+ late_90 = st.number_input("90+ дней", 0, 100, 0)
77
+
78
+ submitted = st.form_submit_button("➡️ Далее: указать точные значения")
79
+
80
+ if submitted:
81
+ st.session_state.update({
82
+ 'age': age, 'dependents': dependents, 'income_method': income_method,
83
+ 'debt_method': debt_method, 'credit_lines': credit_lines,
84
+ 'real_estate': real_estate, 'util_method': util_method,
85
+ 'late_30_59': late_30_59, 'late_60_89': late_60_89, 'late_90': late_90
86
+ })
87
+ st.session_state.step = 'values'
88
+ st.rerun()
89
+
90
+
91
+ # ВВОД ТОЧНЫХ ЗНАЧЕНИЙ
92
+
93
+ elif st.session_state.step == 'values':
94
+ st.header("💰 Укажите точные значения")
95
+
96
+ with st.form("values_form"):
97
+ col1, col2 = st.columns(2)
98
+ with col1:
99
+ st.subheader("Доход")
100
+ if st.session_state.income_method == "Слайдер (до 20,000$)":
101
+ monthly_income = st.slider("Ежемесячный доход ($)", 0, 20000, 5000)
102
+ else:
103
+ monthly_income = st.number_input("Ежемесячный доход ($)", 0, 1000000, 5000)
104
+
105
+ with col2:
106
+ st.subheader("Платежи")
107
+ if st.session_state.debt_method == "Слайдер (до 10,000$)":
108
+ monthly_debt = st.slider("Ежемесячные платежи ($)", 0, 10000, 1500)
109
+ else:
110
+ monthly_debt = st.number_input("Ежемесячные платежи ($)", 0, 1000000, 1500)
111
+
112
+ st.subheader("📈 Использование лимитов")
113
+ if st.session_state.util_method == "��орма (0-100%)":
114
+ util_value = st.slider("Процент использования", 0, 100, 20)
115
+ utilization = util_value / 100
116
+ elif st.session_state.util_method == "Овердрафт (100-200%)":
117
+ util_value = st.slider("Процент использования", 100, 200, 120)
118
+ utilization = util_value / 100
119
+ else:
120
+ st.warning("Экстремальное использование (>200%) - автоматический ручной разбор")
121
+ utilization = st.number_input("Процент использования", 200, 1000, 200) / 100
122
+
123
+ submitted = st.form_submit_button("✅ Получить решение")
124
+
125
+ # САЙДБАР
126
+ with st.sidebar:
127
+ st.markdown("---")
128
+ st.subheader("⚙️ Настройки")
129
+
130
+ with st.expander("🎯 Пороги уверенности", expanded=False):
131
+ threshold = st.slider("Порог одобрения", 0.3, 0.7, 0.5, 0.05)
132
+ lr_margin = st.slider("Отступ LR", 0.2, 0.5, 0.35, 0.05)
133
+ second_margin = st.slider("Отступ второй модели", 0.2, 0.5, 0.4, 0.05)
134
+
135
+ with st.expander("🤖 Выбор модели", expanded=False):
136
+ available_models = [name for name in models.keys() if name != 'Logistic Regression']
137
+ second_model_name = st.selectbox("Модель для эскалации", available_models)
138
+
139
+ with st.expander("📊 Статистика", expanded=False):
140
+ stats = st.session_state.stats
141
+ if stats['total'] > 0:
142
+ st.metric("Всего заявок", stats['total'])
143
+ st.metric("Ручной разбор", f"{stats['manual'] / stats['total']:.1%}")
144
+ st.metric("LR уверена", f"{stats['lr_confident'] / stats['total']:.1%}")
145
+ if stats['second_used'] > 0:
146
+ st.metric("Вторая модель уверена",
147
+ f"{stats['second_confident'] / stats['second_used']:.1%}")
148
+
149
+ if st.button("🔄 Сброс"):
150
+ st.session_state.stats = {'total': 0, 'manual': 0, 'lr_confident': 0,
151
+ 'second_used': 0, 'second_confident': 0,
152
+ 'approved': 0, 'declined': 0}
153
+ st.rerun()
154
+ else:
155
+ st.info("Нет данных")
156
+
157
+ with st.expander("ℹ️ О проекте", expanded=False):
158
+ st.markdown(f"""
159
+ **Модели:**
160
+ - Logistic Regression
161
+ - {', '.join(available_models)}
162
+
163
+ **AUC:** 0.8578 (LR), ~0.87 (остальные)
164
+ """)
165
+
166
+ st.session_state.threshold = threshold
167
+ st.session_state.lr_margin = lr_margin
168
+ st.session_state.second_margin = second_margin
169
+ st.session_state.second_model_name = second_model_name
170
+
171
+ if submitted:
172
+ debt_ratio = monthly_debt / monthly_income if monthly_income > 0 else monthly_debt
173
+
174
+ # Подготовка данных (ОДИН РАЗ)
175
+ input_data = pd.DataFrame([{
176
+ 'RevolvingUtilizationOfUnsecuredLines': utilization,
177
+ 'age': st.session_state.age,
178
+ 'NumberOfTime30-59DaysPastDueNotWorse': st.session_state.late_30_59,
179
+ 'DebtRatio': debt_ratio,
180
+ 'MonthlyIncome': monthly_income,
181
+ 'NumberOfOpenCreditLinesAndLoans': st.session_state.credit_lines,
182
+ 'NumberOfTimes90DaysLate': st.session_state.late_90,
183
+ 'NumberRealEstateLoansOrLines': st.session_state.real_estate,
184
+ 'NumberOfTime60-89DaysPastDueNotWorse': st.session_state.late_60_89,
185
+ 'NumberOfDependents': st.session_state.dependents
186
+ }])
187
+
188
+ st.markdown("---")
189
+
190
+ with st.spinner("🔄 Анализ заявки..."):
191
+ lr_model = models['Logistic Regression']
192
+ second_model = models[second_model_name]
193
+
194
+ # Единый вызов эскалации (включает бизнес-правила)
195
+ decisions, manual_mask, task = escalation_decision(
196
+ input_data,
197
+ lr_model,
198
+ second_model,
199
+ second_model_name,
200
+ threshold=st.session_state.threshold,
201
+ lr_margins=[st.session_state.lr_margin],
202
+ second_margins=[st.session_state.second_margin],
203
+ preprocessor=preprocessor,
204
+ scaler=scaler
205
+ )
206
+ decision = decisions[0]
207
+
208
+ # Для интерпретации LR нужны обработанные данные
209
+ processed = preprocessor.transform(input_data)
210
+ processed_scaled = scaler.transform(processed)
211
+
212
+ # Обновление статистики
213
+ st.session_state.stats['total'] += 1
214
+ if decision['needs_review']:
215
+ st.session_state.stats['manual'] += 1
216
+ else:
217
+ if decision['final_decision'] == 0:
218
+ st.session_state.stats['approved'] += 1
219
+ else:
220
+ st.session_state.stats['declined'] += 1
221
+
222
+ if decision.get('lr_confident', False):
223
+ st.session_state.stats['lr_confident'] += 1
224
+
225
+ if decision.get('second_used', False):
226
+ st.session_state.stats['second_used'] += 1
227
+ if decision.get('second_confident', False):
228
+ st.session_state.stats['second_confident'] += 1
229
+
230
+ # ОТОБРАЖЕНИЕ РЕЗУЛЬТАТОВ
231
+ st.subheader("🔄 Цепочка принятия решения")
232
+ for step in decision['decision_path']:
233
+ st.write(step)
234
+
235
+ col1, col2 = st.columns(2)
236
+ with col1:
237
+ st.markdown("**🏦 Logistic Regression**")
238
+ st.metric("Вероятность", f"{decision['lr_proba']:.1%}")
239
+ st.write(f"Отступ: {decision['lr_margin']:.1%}")
240
+ if decision['lr_confident']:
241
+ st.success("✅ Уверена")
242
+ else:
243
+ st.warning("⚠️ Не уверена")
244
+
245
+ with col2:
246
+ st.markdown(f"**⚡ {second_model_name}**")
247
+ if decision['second_used']:
248
+ st.metric("Вероятность", f"{decision['second_proba']:.1%}")
249
+ st.write(f"Отступ: {decision['second_margin']:.1%}")
250
+ if decision['second_confident']:
251
+ st.success("✅ Уверен")
252
+ else:
253
+ st.warning("⚠️ Не уверен")
254
+ else:
255
+ st.info("⏳ Не вызывался")
256
+
257
+ st.markdown("---")
258
+ if decision['needs_review']:
259
+ st.warning("👨‍💼 **РУЧНОЙ РАЗБОР**")
260
+ st.info("Модели не уверены - требуется проверка специалистом")
261
+ else:
262
+ col1, col2 = st.columns(2)
263
+ with col1:
264
+ if decision['final_decision'] == 0:
265
+ st.success("✅ **КРЕДИТ ОДОБРЕН**")
266
+ else:
267
+ st.error("❌ **КРЕДИТ НЕ ОДОБРЕН**")
268
+ with col2:
269
+ st.metric("Модель", decision['model_used'])
270
+
271
+ # ДЕТАЛЬНЫЙ АНАЛИЗ LR
272
+ st.markdown("---")
273
+ st.subheader("🔍 Детальный анализ: Logistic Regression")
274
+
275
+ feature_names = processed_scaled.columns.tolist()
276
+ interpretation = interpret_lr(processed_scaled, lr_model, feature_names)
277
+
278
+ tab1, tab2 = st.tabs(["📊 Вклад в логит", "📈 Влияние на вероятность"])
279
+
280
+ with tab1:
281
+ st.markdown("🔴 Положительный вклад = ↑ риск, 🟢 Отрицательный = ↓ риск")
282
+ fig1 = plot_feature_importance_sns(interpretation['logit_contributions'])
283
+ st.pyplot(fig1)
284
+
285
+ with st.expander("📋 Все вклады"):
286
+ display_df = interpretation['logit_contributions'][
287
+ ['feature', 'value', 'coefficient', 'logit_contribution']].copy()
288
+ display_df['Описание'] = display_df['feature'].apply(get_feature_display_name)
289
+ display_df = display_df[['Описание', 'value', 'coefficient', 'logit_contribution']]
290
+ display_df.columns = ['Признак', 'Значение', 'Коэф', 'Вклад']
291
+ display_df = display_df.round(3)
292
+ st.dataframe(display_df)
293
+
294
+ with tab2:
295
+ st.markdown("🔴 Положительное = фактор ↑ риск, 🟢 Отрицательное = ↓ риск")
296
+ fig2 = plot_marginal_effects_sns(interpretation['marginal_effects'])
297
+ st.pyplot(fig2)
298
+
299
+ with st.expander("📋 Все эффекты"):
300
+ display_df = interpretation['marginal_effects'][['feature', 'marginal_effect']].copy()
301
+ display_df['Описание'] = display_df['feature'].apply(get_feature_display_name)
302
+ display_df = display_df[['Описание', 'marginal_effect']]
303
+ display_df.columns = ['Признак', 'Влияние']
304
+ display_df['Влияние'] = display_df['Влияние'].map('{:.1%}'.format)
305
+ st.dataframe(display_df)
306
+
307
+ st.info(f"Итоговая вероятность дефолта (LR): {interpretation['probability']:.1%}")
308
+
309
+ # ДЕТАЛЬНЫЙ АНАЛИЗ ВТОРОЙ МОДЕЛИ (SHAP для tree-based)
310
+ if decision['second_used'] and second_model_name in ['XGBoost', 'LightGBM', 'Random Forest', 'CatBoost']:
311
+ plot_shap_analysis(second_model, processed_scaled, feature_names, second_model_name)
312
+
313
+ # КНОПКА НАЗАД
314
+ if st.button("◀️ Вернуться к выбору способов"):
315
+ st.session_state.step = 'input'
316
+ st.rerun()
317
+
318
+ st.markdown("---")
319
+ col1, col2, col3 = st.columns([1, 2, 1])
320
+ with col2:
321
+ if st.button("🏠 На главную", use_container_width=True):
322
+ st.switch_page("main.py")
323
+
324
+ st.markdown("---")
325
+ st.caption("🏦 GiveMeSomeCredit - Интерпретируемый кредитный скоринг | Модели: Logistic Regression + выбор")
326
+
327
+
328
+ if __name__ == "__main__":
329
+ main()
app/pages/simulation.py ADDED
@@ -0,0 +1,345 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import os
5
+ import sys
6
+ import tempfile
7
+ import time
8
+ from datetime import datetime
9
+ from PIL import Image
10
+ import matplotlib.pyplot as plt
11
+
12
+ # Остальные импорты...
13
+
14
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
15
+ from app.utils.data_loader import load_artifacts
16
+ from app.simulation.core.traffic_generator import TrafficGenerator
17
+ from app.simulation.core.processor import ApplicationProcessor
18
+ from app.simulation.controllers.pid import PIDController
19
+ from app.simulation.visualization.plots import (
20
+ plot_queue_dynamics,
21
+ plot_specialist_load,
22
+ plot_inflow,
23
+ plot_parameters_history,
24
+ plot_detailed_decisions
25
+ )
26
+ # ============================================================================
27
+ # БЛОК АНИМАЦИИ: Импорт функций для визуализации
28
+ # ============================================================================
29
+ from app.simulation.visualization.animation import create_simulation_video
30
+
31
+ # ============================================================================
32
+
33
+
34
+ def minutes_to_time(minutes, start_time="00:00"):
35
+ """Преобразует минуты от старта в строку времени ЧЧ:ММ"""
36
+ start_hour, start_min = map(int, start_time.split(':'))
37
+ total_minutes = start_hour * 60 + start_min + minutes
38
+ hour = (total_minutes // 60) % 24
39
+ minute = total_minutes % 60
40
+ return f"{hour:02d}:{minute:02d}"
41
+
42
+
43
+ def main():
44
+ st.title("📊 Симуляция работы системы")
45
+
46
+ # Загрузка артефактов
47
+ PROJECT_PATH = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
48
+ MODELS_PATH = os.path.join(PROJECT_PATH, 'models/best/train_150/')
49
+ PREPROCESSOR_PATH = os.path.join(PROJECT_PATH, 'preprocessors/')
50
+ TEST_DATA_PATH = os.path.join(PROJECT_PATH, 'datasets/cs-test.csv')
51
+
52
+ preprocessor, scaler, models = load_artifacts(MODELS_PATH, PREPROCESSOR_PATH)
53
+
54
+ available_models = [name for name in models.keys() if name != 'Logistic Regression']
55
+
56
+ # В сайдбаре добавляем выбор
57
+ st.sidebar.subheader("🤖 Выбор модели")
58
+ second_model_name = st.sidebar.selectbox(
59
+ "Вторая модель для эскалации",
60
+ available_models,
61
+ index=0
62
+ )
63
+
64
+ # Параметры симуляции
65
+ st.sidebar.header("⚙️ Параметры")
66
+ # ============================================================================
67
+ # БЛОК АНИМАЦИИ: Ограничение количества специалистов до 400 для таблицы 20x20
68
+ # ============================================================================
69
+ specialists_count = st.sidebar.slider("Количество специалистов (модели)", 10, 400, 100, 10)
70
+ # ============================================================================
71
+ business_specialists_count = st.sidebar.slider("Количество экспертов (бизнес-правила)", 1, 100, 30, 1)
72
+
73
+ business_time = st.sidebar.slider("Время обработки бизнес правил(мин)", 5, 30, 15, 5)
74
+ base_time = st.sidebar.slider("Базовое время обработки (мин)", 2, 15, 5)
75
+
76
+ target_load = st.sidebar.slider(
77
+ "Целевая загрузка специалистов", 0.5, 1.0, 0.8, 0.05,
78
+ help="0.8 = 80% - оставляем запас на пики")
79
+
80
+ st.sidebar.subheader("🎯 Порог одобрения")
81
+ fixed_threshold = st.sidebar.slider(
82
+ "Порог (фиксированный)",
83
+ 0.3, 0.7, 0.5, 0.05,
84
+ help="Порог одобрения - стратегический параметр, не меняется PID"
85
+ )
86
+
87
+ st.sidebar.subheader("🎯 Начальные отступы (%)")
88
+
89
+ lr_low_pct = st.sidebar.slider("LR нижний отступ (% от порога)", 0, 100, 20, 5,
90
+ help="% от расстояния между 0 и порогом")
91
+ lr_high_pct = st.sidebar.slider("LR верхний отступ (% от 1-порога)", 0, 100, 20, 5,
92
+ help="% от расстояния между порогом и 1")
93
+ second_low_pct = st.sidebar.slider("Вторая модель нижний (%)", 0, 100, 20, 5)
94
+ second_high_pct = st.sidebar.slider("Вторая модель верхний (%)", 0, 100, 20, 5)
95
+
96
+ # Преобразуем проценты в абсолютные значения
97
+ init_lr_low = fixed_threshold * lr_low_pct / 100
98
+ init_lr_high = (1 - fixed_threshold) * lr_high_pct / 100
99
+ init_second_low = fixed_threshold * second_low_pct / 100
100
+ init_second_high = (1 - fixed_threshold) * second_high_pct / 100
101
+
102
+ # Параметры PID
103
+ st.sidebar.subheader("🎛️ PID регулятор")
104
+ use_pid = st.sidebar.checkbox("Включить PID", value=True)
105
+
106
+ # ============================================================================
107
+ # БЛОК АНИМАЦИИ: Переключатель для создания GIF
108
+ # ============================================================================
109
+ st.sidebar.subheader("🎬 Анимация")
110
+ create_gif = st.sidebar.checkbox("Создать GIF после симуляции", value=False)
111
+ gif_fps = st.sidebar.slider("FPS для GIF", 5, 30, 10, 5)
112
+ # ============================================================================
113
+
114
+ if use_pid:
115
+ kp = st.sidebar.slider("P (пропорциональный)", 0.0, 1.0, 0.33)
116
+ ki = st.sidebar.slider("I (интегральный)", 0.0, 1.0, 0.03)
117
+ kd = st.sidebar.slider("D (дифференциальный)", 0.0, 1.0, 0.22)
118
+ w_load = st.sidebar.slider("Вес загрузки", 0.0, 1.0, 0.3)
119
+
120
+ # Кнопка запуска
121
+ if st.button("🎬 Запустить симуляцию 24 часа"):
122
+ with st.spinner(f"Загрузка данных и симуляция..."):
123
+ # 1. Загружаем тестовый датасет
124
+ test_df = pd.read_csv(TEST_DATA_PATH)
125
+ if 'SeriousDlqin2yrs' in test_df.columns:
126
+ test_df = test_df.drop(columns=['SeriousDlqin2yrs'])
127
+ test_pool = test_df.to_dict('records')
128
+
129
+ # 2. Генерируем распределение заявок по минутам
130
+ current_time = datetime.now()
131
+ start_hour = current_time.hour
132
+ start_minute = current_time.minute
133
+
134
+ gen = TrafficGenerator(total_applications=len(test_pool))
135
+ minute_counts = gen.generate_minute_counts(start_hour=start_hour, start_minute=start_minute)
136
+
137
+ # Сохраняем для графиков
138
+ st.session_state.start_time = f"{start_hour:02d}:{start_minute:02d}"
139
+ st.session_state.minute_counts = minute_counts
140
+
141
+ # 3. Создаём процессор
142
+ processor = ApplicationProcessor(
143
+ lr_model=models['Logistic Regression'],
144
+ second_model=models[second_model_name],
145
+ second_model_name=second_model_name,
146
+ specialists_count=specialists_count,
147
+ business_specialists_count=business_specialists_count,
148
+ base_processing_time=base_time,
149
+ business_processing_time=business_time
150
+ )
151
+
152
+ # 4. Создаём PID если нужно
153
+ if use_pid:
154
+ pid = PIDController(
155
+ init_threshold=fixed_threshold,
156
+ kp_load=kp, ki_load=ki, kd_load=kd,
157
+ load_weight=w_load,
158
+ init_lr_low=init_lr_low,
159
+ init_lr_high=init_lr_high,
160
+ init_second_low=init_second_low,
161
+ init_second_high=init_second_high,
162
+ target_load=target_load
163
+ )
164
+ else:
165
+ pid = None
166
+
167
+ # 5. Симуляция по минутам
168
+ pool_copy = test_pool.copy()
169
+ idx = 0
170
+ progress_bar = st.progress(0)
171
+ n_steps = len(minute_counts)
172
+
173
+ # ============================================================================
174
+ # БЛОК АНИМАЦИИ: Сбор данных для кадров
175
+ # ============================================================================
176
+ animation_frames = [] # список для хранения кадров анимации
177
+ # ============================================================================
178
+
179
+ for step, n_apps in enumerate(minute_counts):
180
+ # Берём заявки из пула
181
+ batch = pool_copy[idx:idx + n_apps]
182
+ idx += n_apps
183
+
184
+ # Получаем текущие параметры
185
+ if pid:
186
+ margins = pid.get_margins()
187
+ lr_margins = [margins['lr_low'], margins['lr_high']]
188
+ second_margins = [margins['second_low'], margins['second_high']]
189
+ threshold = fixed_threshold
190
+ else:
191
+ lr_margins = [0.35]
192
+ second_margins = [0.4]
193
+ threshold = fixed_threshold
194
+
195
+ # Обрабатываем батч
196
+ result = processor.process_batch(
197
+ batch, preprocessor, scaler,
198
+ threshold=threshold,
199
+ lr_margins=lr_margins,
200
+ second_margins=second_margins,
201
+ current_time=step
202
+ )
203
+
204
+ # Обновляем PID
205
+ if pid:
206
+ load = result['specialists_busy'] / specialists_count
207
+ pid.update(load)
208
+
209
+ # ============================================================================
210
+ # БЛОК АНИМАЦИИ: Сохраняем кадр каждые 10 минут (чтобы не было 1440 кадров)
211
+ # ============================================================================
212
+ # --- Внутри цикла симуляции в simulation.py ---
213
+ # Записываем КАЖДУЮ минуту для плавности
214
+ if step % 1 == 0 or step == n_steps - 1:
215
+ specialist_states = processor.specialists.copy()
216
+
217
+ frame_data = {
218
+ 'time': step,
219
+ 'step': step, # Добавь это поле для совместимости с кодом видео
220
+ 'time_str': minutes_to_time(step, st.session_state.start_time),
221
+ 'inflow': n_apps,
222
+ 'inflow_history': st.session_state.minute_counts[:step + 1],
223
+ 'load_history': [v / specialists_count for v in processor.stats['specialist_busy'][:step + 1]],
224
+ 'queue': result['queue_size'],
225
+ 'business_queue': result.get('business_queue_size', 0),
226
+ 'load': load if pid else 0,
227
+ 'specialist_states': specialist_states,
228
+ 'cumulative': {
229
+ 'total_processed': processor.stats['total_processed'],
230
+ 'auto_approved': processor.stats['auto_approved'],
231
+ 'auto_declined': processor.stats['auto_declined'],
232
+ 'manual_processed': processor.stats['manual_processed'],
233
+ 'business_manual_processed': processor.stats.get('business_manual_processed', 0)
234
+ }
235
+ }
236
+ animation_frames.append(frame_data)
237
+ # ============================================================================
238
+
239
+ # Обновляем прогресс
240
+ progress_bar.progress((step + 1) / n_steps)
241
+
242
+ # 6. Сохраняем результаты
243
+ st.session_state.processor = processor
244
+ st.session_state.pid_history = pid.get_history() if pid else None
245
+ st.session_state.simulation_done = True
246
+ st.session_state.batch_stats = processor.batch_stats
247
+ # ============================================================================
248
+ # БЛОК АНИМАЦИИ: Сохраняем кадры в session_state
249
+ # ============================================================================
250
+ st.session_state.animation_frames = animation_frames
251
+ # ============================================================================
252
+
253
+ # Отображение результатов
254
+ if st.session_state.get('simulation_done', False):
255
+ st.success("✅ Симуляция завершена!")
256
+
257
+ stats = st.session_state.processor.stats
258
+
259
+ # Быстрая статистика
260
+ col1, col2, col3, col4, col5 = st.columns(5)
261
+ col1.metric("Всего заявок", stats['total_processed'])
262
+ col2.metric("Одобрено авто", stats['auto_approved'])
263
+ col3.metric("Отказ авто", stats['auto_declined'])
264
+ col4.metric("Ручной разбор", stats['manual_processed'])
265
+ manual_rate = stats['manual_sent'] / stats['total_processed'] * 100 if stats['total_processed'] > 0 else 0
266
+ col5.metric("Ручной разбор %", f"{manual_rate:.1f}%")
267
+
268
+ # Графики - ТОЛЬКО ВЫЗОВЫ ФУНКЦИЙ ИЗ plots.py
269
+ st.subheader("📈 Графики")
270
+
271
+ # Очереди
272
+ st.pyplot(plot_queue_dynamics(
273
+ queue_history=stats['queue_history'],
274
+ business_queue_history=stats.get('business_queue_history'),
275
+ start_time=st.session_state.get('start_time', '00:00')
276
+ ))
277
+ plt.close()
278
+
279
+ # Загрузка специалистов
280
+ st.pyplot(plot_specialist_load(
281
+ specialist_busy_history=stats['specialist_busy'],
282
+ specialists_count=specialists_count,
283
+ start_time=st.session_state.get('start_time', '00:00')
284
+ ))
285
+ plt.close()
286
+ st.pyplot(plot_inflow(
287
+ minute_counts=st.session_state.minute_counts,
288
+ start_time=st.session_state.get('start_time', '00:00')
289
+ ))
290
+ plt.close()
291
+ # Детальный анализ решений
292
+ st.pyplot(plot_detailed_decisions(
293
+ batch_stats=st.session_state.batch_stats,
294
+ second_model_name=second_model_name,
295
+ start_time=st.session_state.get('start_time', '00:00')
296
+ ))
297
+ plt.close()
298
+ # Параметры PID
299
+ st.pyplot(plot_parameters_history(
300
+ pid_history=st.session_state.pid_history,
301
+ second_model_name=second_model_name,
302
+ start_time=st.session_state.get('start_time', '00:00')
303
+ ))
304
+ plt.close()
305
+ # ============================================================================
306
+ # НОВЫЙ БЛОК: Генерация видео (Стратегия для HuggingFace)
307
+ # ============================================================================
308
+ if st.session_state.get('animation_frames'):
309
+ st.divider()
310
+ st.subheader("🎥 Настройки видео-отчета")
311
+
312
+ col_v1, col_v2 = st.columns(2)
313
+ with col_v1:
314
+ # Слайдер для шага кадров (среза)
315
+ v_step = st.slider("Шаг кадров (1 = каждая минута)", 1, 30, 1,
316
+ help="Чем меньше шаг, тем плавнее видео, но дольше рендеринг")
317
+ with col_v2:
318
+ # Слайдер для FPS
319
+ v_fps = st.slider("Скорость видео (FPS)", 10, 60, 24,
320
+ help="Количество кадров в секунду")
321
+
322
+ if st.button("🎬 Сгенерировать видео", type="primary", use_container_width=True):
323
+ with st.spinner("Рендеринг видео..."):
324
+ from app.simulation.visualization.animation import create_simulation_video
325
+
326
+ # Используем выбранные в слайдерах параметры
327
+ video_path = create_simulation_video(
328
+ st.session_state.animation_frames[::v_step],
329
+ specialists_count,
330
+ second_model_name,
331
+ fps=v_fps # Передаем FPS в функцию
332
+ )
333
+ st.video(video_path)
334
+ st.success("✅ Видео готово! Вы можете его скачать или перематывать.")
335
+
336
+ # --- ВОТ ЭТОТ БЛОК У ТЕБЯ УЖЕ ЕСТЬ В КОНЦЕ ФАЙЛА ---
337
+ st.write("")
338
+ col1, col2, col3 = st.columns([1, 2, 1])
339
+ with col2:
340
+ if st.button("🏠 На главную", use_container_width=True):
341
+ st.switch_page("main.py")
342
+
343
+
344
+ if __name__ == "__main__":
345
+ main()
app/simulation/.DS_Store ADDED
Binary file (8.2 kB). View file
 
app/simulation/__init__.py ADDED
File without changes
app/simulation/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (180 Bytes). View file
 
app/simulation/controllers/__init__.py ADDED
File without changes
app/simulation/controllers/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (192 Bytes). View file
 
app/simulation/controllers/__pycache__/base.cpython-311.pyc ADDED
Binary file (1.72 kB). View file
 
app/simulation/controllers/__pycache__/pid.cpython-311.pyc ADDED
Binary file (5.3 kB). View file
 
app/simulation/controllers/base.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+
3
+
4
+ class BaseController(ABC):
5
+ """Базовый класс для всех контроллеров"""
6
+
7
+ def __init__(self, name="Base"):
8
+ self.name = name
9
+ self.history = []
10
+
11
+ @abstractmethod
12
+ def update(self, current_state, target_state, dt=1.0):
13
+ """
14
+ Рассчитывает новые параметры управления
15
+
16
+ Параметры:
17
+ - current_state: текущее состояние системы (очередь, загрузка)
18
+ - target_state: целевое состояние
19
+ - dt: шаг времени
20
+
21
+ Возвращает:
22
+ - новые пороги и отступы
23
+ """
24
+ pass
25
+
26
+ def get_margins(self, hour=None):
27
+ """Возвращает текущие отступы для LR и второй модели"""
28
+ pass
app/simulation/controllers/pid.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ from .base import BaseController
4
+
5
+
6
+ class PIDController(BaseController):
7
+ """PID-регулятор для управления отступами на основе загрузки специалистов"""
8
+
9
+ def __init__(self, name="PID",
10
+ kp_load=0.1, ki_load=0.01, kd_load=0.05,
11
+ load_weight=1.0,
12
+ # Начальные значения параметров
13
+ init_threshold=0.5,
14
+ init_lr_low=0.3, init_lr_high=0.4,
15
+ init_second_low=0.35, init_second_high=0.45,
16
+ target_load=0.8):
17
+ super().__init__(name)
18
+
19
+ # Коэффициенты PID для загрузки
20
+ self.kp_load = kp_load
21
+ self.ki_load = ki_load
22
+ self.kd_load = kd_load
23
+
24
+ self.load_weight = load_weight
25
+ self.target_load = target_load
26
+
27
+ # Состояния PID
28
+ self.prev_error_load = 0
29
+ self.integral_load = 0
30
+
31
+ # Начальные параметры
32
+ self.init_threshold = init_threshold
33
+ self.init_lr_low = init_lr_low
34
+ self.init_lr_high = init_lr_high
35
+ self.init_second_low = init_second_low
36
+ self.init_second_high = init_second_high
37
+
38
+ # Текущие параметры (отступы)
39
+ self.threshold = init_threshold
40
+ self.lr_low = init_lr_low
41
+ self.lr_high = init_lr_high
42
+ self.second_low = init_second_low
43
+ self.second_high = init_second_high
44
+
45
+ # Границы отступов
46
+ self.bounds = {
47
+ 'lr_low': (0.05, self.threshold - 0.05),
48
+ 'lr_high': (0.05, 1 - self.threshold - 0.05),
49
+ 'second_low': (0.05, self.threshold - 0.05),
50
+ 'second_high': (0.05, 1 - self.threshold - 0.05)
51
+ }
52
+
53
+ # Ограничение интеграла
54
+ self.integral_limit = 1.0
55
+
56
+ def update(self, current_load):
57
+ """
58
+ current_load: текущая загрузка специалистов (0-1)
59
+ Остальные параметры оставлены для совместимости, но не используются
60
+ """
61
+ # Ошибка по загрузке
62
+ error_load = self.target_load - current_load
63
+
64
+ # PID для загрузки
65
+ P_load = self.kp_load * error_load
66
+ self.integral_load += error_load
67
+ self.integral_load = np.clip(self.integral_load, -self.integral_limit, self.integral_limit)
68
+ I_load = self.ki_load * self.integral_load
69
+ D_load = self.kd_load * (error_load - self.prev_error_load)
70
+ self.prev_error_load = error_load
71
+
72
+ # Выход регулятора
73
+ output_load = P_load + I_load + D_load
74
+ output = self.load_weight * output_load
75
+
76
+ # Адаптируем отступы
77
+ self._update_parameters(output)
78
+
79
+ # Сохраняем историю
80
+ self.history.append({
81
+ 'time': len(self.history),
82
+ 'error_load': error_load,
83
+ 'output': output,
84
+ 'threshold': self.threshold,
85
+ 'lr_low': self.lr_low,
86
+ 'lr_high': self.lr_high,
87
+ 'second_low': self.second_low,
88
+ 'second_high': self.second_high,
89
+ 'load': current_load,
90
+ })
91
+
92
+ return self.get_margins()
93
+
94
+ def _update_parameters(self, output):
95
+ """Обновляет отступы на основе выхода регулятора"""
96
+ delta = output * 0.1
97
+ self.lr_low = np.clip(
98
+ self.lr_low + delta,
99
+ self.bounds['lr_low'][0],
100
+ self.bounds['lr_low'][1]
101
+ )
102
+ self.lr_high = np.clip(
103
+ self.lr_high + delta,
104
+ self.bounds['lr_high'][0],
105
+ self.bounds['lr_high'][1]
106
+ )
107
+ self.second_low = np.clip(
108
+ self.second_low + delta,
109
+ self.bounds['second_low'][0],
110
+ self.bounds['second_low'][1]
111
+ )
112
+ self.second_high = np.clip(
113
+ self.second_high + delta,
114
+ self.bounds['second_high'][0],
115
+ self.bounds['second_high'][1]
116
+ )
117
+
118
+ def get_margins(self, hour=None):
119
+ """Возвращает текущие отступы"""
120
+ return {
121
+ 'lr_low': self.lr_low,
122
+ 'lr_high': self.lr_high,
123
+ 'second_low': self.second_low,
124
+ 'second_high': self.second_high
125
+ }
126
+
127
+ def get_history(self):
128
+ """Возвращает историю для визуализации"""
129
+ return pd.DataFrame(self.history)
app/simulation/core/__init__.py ADDED
File without changes
app/simulation/core/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (185 Bytes). View file
 
app/simulation/core/__pycache__/processor.cpython-311.pyc ADDED
Binary file (13.8 kB). View file
 
app/simulation/core/__pycache__/traffic_generator.cpython-311.pyc ADDED
Binary file (13.4 kB). View file
 
app/simulation/core/processor.py ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ from app.models.escalation import escalation_decision
4
+ from app.models.escalation import check_business_rules
5
+
6
+
7
+ def processing_time_function(lr_proba, second_proba, threshold=0.5, base_time=5,
8
+ lr_weight=1.0, second_weight=1.5):
9
+ """
10
+ Генерирует время обработки для заявок, попавших в ручной разбор
11
+ """
12
+ total_weight = lr_weight + second_weight
13
+ proba = (lr_proba * lr_weight + second_proba * second_weight) / total_weight
14
+
15
+ margin = abs(proba - threshold)
16
+ max_margin = max(threshold, 1 - threshold)
17
+ uncertainty = 1 - (margin / max_margin)
18
+
19
+ mean_time = base_time * (1 + 3 * uncertainty)
20
+ processing_time = np.random.exponential(scale=mean_time)
21
+
22
+ return max(1, processing_time)
23
+
24
+
25
+ class ApplicationProcessor:
26
+ def __init__(self, lr_model, second_model, second_model_name,
27
+ specialists_count=5, # основные специалисты (модели)
28
+ business_specialists_count=2, # эксперты (бизнес-правила)
29
+ base_processing_time=5,
30
+ business_processing_time=10, # эксперты дольше копаются
31
+ lr_weight=1.0, second_weight=1.5):
32
+ self.lr_model = lr_model
33
+ self.second_model = second_model
34
+ self.second_model_name = second_model_name
35
+ self.specialists_count = specialists_count
36
+ self.business_specialists_count = business_specialists_count
37
+ self.base_processing_time = base_processing_time
38
+ self.business_processing_time = business_processing_time
39
+ self.lr_weight = lr_weight
40
+ self.second_weight = second_weight
41
+
42
+ self.specialists = [0] * specialists_count
43
+ self.business_specialists = [0] * business_specialists_count # отдельный пул
44
+ self.manual_queue = [] # очередь от моделей
45
+ self.business_queue = [] # очередь от бизнес-правил
46
+
47
+ self.stats = {
48
+ 'total_processed': 0,
49
+ 'auto_approved': 0,
50
+ 'auto_declined': 0,
51
+ 'manual_sent': 0,
52
+ 'manual_processed': 0,
53
+ 'business_manual_sent': 0,
54
+ 'business_manual_processed': 0,
55
+ 'queue_history': [],
56
+ 'business_queue_history': [],
57
+ 'wait_times': [],
58
+ 'business_wait_times': [],
59
+ 'specialist_busy': [],
60
+ 'business_specialist_busy': [],
61
+ 'business_rules_manual': 0,
62
+ 'business_rules_auto': 0
63
+ }
64
+ self.batch_stats = []
65
+
66
+ def process_batch(self, applications_batch, preprocessor, scaler,
67
+ threshold, lr_margins, second_margins, current_time):
68
+ """
69
+ Обрабатывает батч заявок за текущую минуту (батчевая версия)
70
+ """
71
+ minute_results = {
72
+ 'new_apps': len(applications_batch),
73
+ 'auto_decisions': [],
74
+ 'new_manual': 0,
75
+ 'new_business_manual': 0,
76
+ 'processed_manual': 0,
77
+ 'processed_business_manual': 0,
78
+ 'queue_size': 0,
79
+ 'business_queue_size': 0,
80
+ 'specialists_busy': sum(1 for s in self.specialists if s > 0),
81
+ 'business_specialists_busy': sum(1 for s in self.business_specialists if s > 0),
82
+ 'business_rules': 0
83
+ }
84
+
85
+ # 1. Уменьшаем время работы специалистов
86
+ self.specialists = [max(0, s - 1) for s in self.specialists]
87
+ self.business_specialists = [max(0, s - 1) for s in self.business_specialists]
88
+
89
+ if not applications_batch:
90
+ minute_results['queue_size'] = len(self.manual_queue)
91
+ minute_results['business_queue_size'] = len(self.business_queue)
92
+ self.stats['queue_history'].append(len(self.manual_queue))
93
+ self.stats['business_queue_history'].append(len(self.business_queue))
94
+ self.stats['specialist_busy'].append(minute_results['specialists_busy'])
95
+ self.stats['business_specialist_busy'].append(minute_results['business_specialists_busy'])
96
+ return minute_results
97
+
98
+ # 2. Превращаем батч в DataFrame для удобства
99
+ df = pd.DataFrame(applications_batch)
100
+
101
+ # 3. Применяем бизнес-правила ко всем заявкам (БАТЧЕВО)
102
+ manual_mask, auto_reject_mask, messages, auto_decisions = check_business_rules(df)
103
+
104
+ # Сохраняем статистику по бизнес-правилам
105
+ business_manual_count = manual_mask.sum()
106
+ business_auto_count = auto_reject_mask.sum()
107
+
108
+ # Инициализируем
109
+ n = len(applications_batch)
110
+ model_indices = []
111
+
112
+ # 4. Обрабатываем результаты бизнес-правил
113
+ for idx in range(n):
114
+ if manual_mask[idx]:
115
+ # Ручной разбор по бизнес-правилам - в отдельную очередь
116
+ self.business_queue.append({
117
+ 'app': applications_batch[idx],
118
+ 'arrival_time': current_time,
119
+ 'reason': 'business_rules',
120
+ 'message': messages[idx],
121
+ 'lr_proba': None,
122
+ 'second_proba': None
123
+ })
124
+ minute_results['new_business_manual'] += 1
125
+ minute_results['business_rules'] += 1
126
+ self.stats['business_rules_manual'] += 1
127
+ self.stats['business_manual_sent'] += 1
128
+
129
+ elif auto_reject_mask[idx]:
130
+ # Автоматический отказ по бизнес-правилам
131
+ decision = {
132
+ 'final_decision': auto_decisions[idx], # всегда 1
133
+ 'model_used': 'Business Rules',
134
+ 'probability': 1.0,
135
+ 'needs_review': False,
136
+ 'message': messages[idx]
137
+ }
138
+ minute_results['auto_decisions'].append(decision)
139
+ self.stats['auto_declined'] += 1
140
+ self.stats['business_rules_auto'] += 1
141
+ self.stats['total_processed'] += 1
142
+
143
+ else:
144
+ # Заявка идет в модели
145
+ model_indices.append(idx)
146
+
147
+ # Инициализируем переменные для статистики моделей
148
+ lr_confident_count = 0
149
+ second_confident_count = 0
150
+ second_uncertain_count = 0
151
+
152
+ # 5. Батчевая обработка моделей
153
+ if model_indices:
154
+ # Берём только заявки, которые прошли бизнес-правила
155
+ df_models = df.iloc[model_indices].copy()
156
+
157
+ # Формируем DataFrame для моделей
158
+ model_df = pd.DataFrame({
159
+ 'RevolvingUtilizationOfUnsecuredLines': df_models['RevolvingUtilizationOfUnsecuredLines'],
160
+ 'age': df_models['age'],
161
+ 'NumberOfTime30-59DaysPastDueNotWorse': df_models['NumberOfTime30-59DaysPastDueNotWorse'],
162
+ 'DebtRatio': df_models['DebtRatio'].fillna(0),
163
+ 'MonthlyIncome': df_models['MonthlyIncome'].fillna(0),
164
+ 'NumberOfOpenCreditLinesAndLoans': df_models['NumberOfOpenCreditLinesAndLoans'],
165
+ 'NumberOfTimes90DaysLate': df_models['NumberOfTimes90DaysLate'],
166
+ 'NumberRealEstateLoansOrLines': df_models['NumberRealEstateLoansOrLines'],
167
+ 'NumberOfTime60-89DaysPastDueNotWorse': df_models['NumberOfTime60-89DaysPastDueNotWorse'],
168
+ 'NumberOfDependents': df_models['NumberOfDependents'].fillna(0)
169
+ })
170
+
171
+ # Вызываем escalation_decision для всего батча
172
+ batch_decisions, batch_manual_mask, stats = escalation_decision(
173
+ model_df,
174
+ self.lr_model,
175
+ self.second_model,
176
+ self.second_model_name,
177
+ threshold=threshold,
178
+ lr_margins=lr_margins,
179
+ second_margins=second_margins,
180
+ preprocessor=preprocessor,
181
+ scaler=scaler
182
+ )
183
+
184
+ # Сохраняем статистику из escalation_decision
185
+ lr_confident_count = stats['lr_confident']
186
+ second_confident_count = stats['second_confident']
187
+ second_uncertain_count = stats['second_uncertain']
188
+
189
+ # print(f"Статистика батча: бизнес-ручной={business_manual_count}, "
190
+ # f"бизнес-отказ={business_auto_count}, "
191
+ # f"LR уверен={lr_confident_count}, "
192
+ # f"вторая уверен={second_confident_count}, "
193
+ # f"вторая не уверен={second_uncertain_count}")
194
+
195
+ # Распределяем результаты по исходным индексам
196
+ for local_idx, orig_idx in enumerate(model_indices):
197
+ decision = batch_decisions[local_idx]
198
+
199
+ if decision['needs_review']:
200
+ self.manual_queue.append({
201
+ 'app': applications_batch[orig_idx],
202
+ 'arrival_time': current_time,
203
+ 'reason': 'model_uncertainty',
204
+ 'decision': decision,
205
+ 'lr_proba': decision.get('lr_proba'),
206
+ 'second_proba': decision.get('second_proba')
207
+ })
208
+ minute_results['new_manual'] += 1
209
+ self.stats['manual_sent'] += 1
210
+ else:
211
+ minute_results['auto_decisions'].append(decision)
212
+ if decision['final_decision'] == 0:
213
+ self.stats['auto_approved'] += 1
214
+ else:
215
+ self.stats['auto_declined'] += 1
216
+
217
+ self.stats['total_processed'] += 1
218
+
219
+ # Сохраняем общую статистику батча
220
+ self.batch_stats.append({
221
+ 'time': current_time,
222
+ 'business_manual': business_manual_count,
223
+ 'business_auto': business_auto_count,
224
+ 'lr_confident': lr_confident_count,
225
+ 'second_confident': second_confident_count,
226
+ 'second_uncertain': second_uncertain_count,
227
+ 'total_in_batch': len(applications_batch),
228
+ 'new_manual': minute_results['new_manual'],
229
+ 'new_business_manual': minute_results['new_business_manual'],
230
+ 'auto_total': len(minute_results['auto_decisions'])
231
+ })
232
+
233
+ # 6. Распределяем заявки из бизнес-очереди по свободным экспертам
234
+ for i in range(self.business_specialists_count):
235
+ if self.business_specialists[i] <= 0 and self.business_queue:
236
+ next_app = self.business_queue.pop(0)
237
+
238
+ wait_time = current_time - next_app['arrival_time']
239
+ self.stats['business_wait_times'].append(wait_time)
240
+
241
+ # Эксперты обрабатывают бизнес-правила
242
+ proc_time = self.business_processing_time
243
+
244
+ self.business_specialists[i] = proc_time
245
+ minute_results['processed_business_manual'] += 1
246
+ self.stats['business_manual_processed'] += 1
247
+
248
+ # 7. Распределяем заявки из основной очереди по свободным специалистам
249
+ for i in range(self.specialists_count):
250
+ if self.specialists[i] <= 0 and self.manual_queue:
251
+ next_app = self.manual_queue.pop(0)
252
+
253
+ wait_time = current_time - next_app['arrival_time']
254
+ self.stats['wait_times'].append(wait_time)
255
+
256
+ if next_app['reason'] == 'business_rules':
257
+ proc_time = self.business_processing_time
258
+ else:
259
+ # Используем функцию processing_time_function
260
+ proc_time = processing_time_function(
261
+ lr_proba=next_app.get('lr_proba', 0.5),
262
+ second_proba=next_app.get('second_proba', 0.5),
263
+ threshold=threshold,
264
+ base_time=self.base_processing_time,
265
+ lr_weight=self.lr_weight,
266
+ second_weight=self.second_weight
267
+ )
268
+
269
+ self.specialists[i] = proc_time
270
+ minute_results['processed_manual'] += 1
271
+ self.stats['manual_processed'] += 1
272
+
273
+ minute_results['queue_size'] = len(self.manual_queue)
274
+ minute_results['business_queue_size'] = len(self.business_queue)
275
+ self.stats['queue_history'].append(len(self.manual_queue))
276
+ self.stats['business_queue_history'].append(len(self.business_queue))
277
+ self.stats['specialist_busy'].append(minute_results['specialists_busy'])
278
+ self.stats['business_specialist_busy'].append(minute_results['business_specialists_busy'])
279
+
280
+ return minute_results
281
+
282
+ def load_test_dataset(self, filepath):
283
+ df = pd.read_csv(filepath)
284
+ if 'SeriousDlqin2yrs' in df.columns:
285
+ df = df.drop(columns=['SeriousDlqin2yrs'])
286
+ return df.to_dict('records')
287
+
288
+ def get_queue_stats(self):
289
+ if self.stats['wait_times']:
290
+ avg_wait = np.mean(self.stats['wait_times'])
291
+ max_wait = np.max(self.stats['wait_times'])
292
+ else:
293
+ avg_wait = max_wait = 0
294
+
295
+ if self.stats['business_wait_times']:
296
+ avg_business_wait = np.mean(self.stats['business_wait_times'])
297
+ max_business_wait = np.max(self.stats['business_wait_times'])
298
+ else:
299
+ avg_business_wait = max_business_wait = 0
300
+
301
+ return {
302
+ 'current_queue': len(self.manual_queue),
303
+ 'current_business_queue': len(self.business_queue),
304
+ 'avg_wait_minutes': avg_wait,
305
+ 'max_wait_minutes': max_wait,
306
+ 'avg_business_wait_minutes': avg_business_wait,
307
+ 'max_business_wait_minutes': max_business_wait,
308
+ 'queue_history': self.stats['queue_history'],
309
+ 'business_queue_history': self.stats['business_queue_history'],
310
+ 'specialist_busy': self.stats['specialist_busy'],
311
+ 'business_specialist_busy': self.stats['business_specialist_busy'],
312
+ 'business_rules_split': {
313
+ 'manual': self.stats['business_rules_manual'],
314
+ 'auto': self.stats['business_rules_auto']
315
+ }
316
+ }
317
+
318
+ # def reset(self):
319
+ # self.specialists = [0] * self.specialists_count
320
+ # self.business_specialists = [0] * self.business_specialists_count
321
+ # self.manual_queue = []
322
+ # self.business_queue = []
323
+ # self.stats = {
324
+ # 'total_processed': 0,
325
+ # 'auto_approved': 0,
326
+ # 'auto_declined': 0,
327
+ # 'manual_sent': 0,
328
+ # 'manual_processed': 0,
329
+ # 'business_manual_sent': 0,
330
+ # 'business_manual_processed': 0,
331
+ # 'queue_history': [],
332
+ # 'business_queue_history': [],
333
+ # 'wait_times': [],
334
+ # 'business_wait_times': [],
335
+ # 'specialist_busy': [],
336
+ # 'business_specialist_busy': [],
337
+ # 'business_rules_manual': 0,
338
+ # 'business_rules_auto': 0
339
+ # }
app/simulation/core/traffic_generator.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import matplotlib.pyplot as plt
3
+ from datetime import datetime
4
+
5
+
6
+ class TrafficGenerator:
7
+ def __init__(self, total_applications=101503, random_seed=42):
8
+ self.total = total_applications
9
+ np.random.seed(random_seed)
10
+
11
+ # Параметры интенсивности с провалом после обеда
12
+ self.intensity_params = {
13
+ 'background': 0.1,
14
+ 'day_center': 13, 'day_amplitude': 0.9, 'day_width': 2.5, # день поуже
15
+ 'evening_center': 19.5, 'evening_amplitude': 1.3, 'evening_width': 2.2, # вечер пораньше и пошире
16
+ 'afternoon_dip_center': 15.5, 'afternoon_dip_strength': 0.3, 'afternoon_dip_width': 1.5,
17
+ # провал после обеда
18
+ 'noise_level': 0.1
19
+ }
20
+
21
+ def _time_to_hours(self, time_tuple):
22
+ """Переводит (часы, минуты) в часы с дробной частью"""
23
+ return time_tuple[0] + time_tuple[1] / 60
24
+
25
+ def loan_intensity_periodic(self, t, impulses=None):
26
+ """
27
+ Функция интенсивности с провалом после обеда
28
+
29
+ t: время в часах (может быть дробным)
30
+ impulses: список словарей вида
31
+ [{'time': (16, 37), 'strength': 2.0}, ...] # время как (часы, минуты)
32
+ """
33
+ t_cycle = t % 24
34
+
35
+ bg = self.intensity_params['background']
36
+
37
+ # Утренне-дневной пик (13:00)
38
+ day = self.intensity_params['day_amplitude'] * np.exp(
39
+ -(t_cycle - self.intensity_params['day_center']) ** 2 /
40
+ (2 * self.intensity_params['day_width'] ** 2)
41
+ )
42
+
43
+ # Вечерний пик (19:30)
44
+ evening_diff = np.minimum(
45
+ np.abs(t_cycle - self.intensity_params['evening_center']),
46
+ np.abs(t_cycle - self.intensity_params['evening_center'] + 24)
47
+ )
48
+ evening = self.intensity_params['evening_amplitude'] * np.exp(
49
+ -(evening_diff) ** 2 / (2 * self.intensity_params['evening_width'] ** 2)
50
+ )
51
+
52
+ # Провал после обеда (15:30)
53
+ dip_diff = np.minimum(
54
+ np.abs(t_cycle - self.intensity_params['afternoon_dip_center']),
55
+ np.abs(t_cycle - self.intensity_params['afternoon_dip_center'] + 24)
56
+ )
57
+ dip = -self.intensity_params['afternoon_dip_strength'] * np.exp(
58
+ -(dip_diff) ** 2 / (2 * self.intensity_params['afternoon_dip_width'] ** 2)
59
+ )
60
+
61
+ intensity = bg + day + evening + dip
62
+ intensity = np.maximum(intensity, 0.05) # не ниже минимума
63
+
64
+ # Шум
65
+ if self.intensity_params['noise_level'] > 0:
66
+ noise = 1.0 + np.random.uniform(
67
+ -self.intensity_params['noise_level'],
68
+ self.intensity_params['noise_level']
69
+ )
70
+ intensity *= noise
71
+
72
+ # Импульсы
73
+ if impulses:
74
+ for imp in impulses:
75
+ imp_time = self._time_to_hours(imp['time']) % 24
76
+ # Используем гауссиану для плавного импульса (ширина ~30 минут)
77
+ imp_diff = np.minimum(
78
+ np.abs(t_cycle - imp_time),
79
+ np.abs(t_cycle - imp_time + 24)
80
+ )
81
+ imp_factor = 1.0 + imp['strength'] * np.exp(-(imp_diff) ** 2 / (2 * 0.25 ** 2))
82
+ intensity *= imp_factor
83
+
84
+ return intensity
85
+
86
+ def generate_minute_counts(self, start_hour=None, start_minute=0, impulses=None):
87
+ """
88
+ Возвращает массив количества заявок на каждую минуту (1440 значений)
89
+
90
+ start_hour: час старта (по умолчанию текущий)
91
+ start_minute: минута старта
92
+ impulses: список импульсов, например:
93
+ [{'time': (5, 30), 'strength': 2.0}, ...] # импульс в 5:30 силой 2.0
94
+ """
95
+ if start_hour is None:
96
+ now = datetime.now()
97
+ start_hour = now.hour
98
+ start_minute = now.minute
99
+
100
+ start_time = start_hour + start_minute / 60
101
+
102
+ # Массив минут (от start_time до start_time + 24)
103
+ minutes = np.arange(0, 24, 1 / 60)
104
+ intensity_values = np.array([
105
+ self.loan_intensity_periodic(start_time + m, impulses)
106
+ for m in minutes
107
+ ])
108
+
109
+ total_intensity = np.sum(intensity_values)
110
+ scale_factor = self.total / total_intensity
111
+
112
+ minute_counts = np.floor(intensity_values * scale_factor).astype(int)
113
+
114
+ # Распределяем остаток (чтоб точно сошлось общее число)
115
+ total_assigned = np.sum(minute_counts)
116
+ if total_assigned < self.total:
117
+ remainder = self.total - total_assigned
118
+ top_minutes = np.argsort(intensity_values)[-remainder:]
119
+ minute_counts[top_minutes] += 1
120
+
121
+ return minute_counts
122
+
123
+ def generate_hourly_counts(self, start_hour=None, start_minute=0, impulses=None):
124
+ """
125
+ Возвращает массив количества заявок по часам (24 значения)
126
+ """
127
+ minute_counts = self.generate_minute_counts(start_hour, start_minute, impulses)
128
+ hourly_counts = [np.sum(minute_counts[i * 60:(i + 1) * 60]) for i in range(24)]
129
+ return hourly_counts
130
+
131
+ def generate_random_impulses(self, n_impulses=1, min_strength=1.5, max_strength=3.0):
132
+ """
133
+ Генерирует случайные импульсы
134
+ """
135
+ impulses = []
136
+ for _ in range(n_impulses):
137
+ hour = np.random.randint(0, 24)
138
+ minute = np.random.randint(0, 60)
139
+ strength = np.random.uniform(min_strength, max_strength)
140
+ impulses.append({'time': (hour, minute), 'strength': strength})
141
+ return impulses
142
+
143
+ def plot_distribution(self, start_hour=None, start_minute=0, impulses=None):
144
+ """Строит график распределения заявок по часам"""
145
+ hourly_counts = self.generate_hourly_counts(start_hour, start_minute, impulses)
146
+
147
+ if start_hour is None:
148
+ start_hour = datetime.now().hour
149
+
150
+ hours = [(start_hour + i) % 24 for i in range(24)]
151
+ sorted_pairs = sorted(zip(hours, hourly_counts))
152
+ hours_sorted, counts_sorted = zip(*sorted_pairs)
153
+
154
+ plt.figure(figsize=(14, 6))
155
+
156
+ # Цвета в зависимости от времени суток
157
+ colors = []
158
+ for h in hours_sorted:
159
+ if 0 <= h <= 5:
160
+ colors.append('#2c3e50') # ночь
161
+ elif 6 <= h <= 11:
162
+ colors.append('#3498db') # утро
163
+ elif 12 <= h <= 16:
164
+ colors.append('#f39c12') # день (с провалом)
165
+ else:
166
+ colors.append('#e67e22') # вечер
167
+
168
+ bars = plt.bar([str(h) for h in hours_sorted], counts_sorted,
169
+ alpha=0.8, color=colors, edgecolor='black', linewidth=1)
170
+
171
+ # Средняя линия
172
+ mean_val = np.mean(counts_sorted)
173
+ plt.axhline(y=mean_val, color='red', linestyle='--',
174
+ alpha=0.7, linewidth=2, label=f'Среднее: {mean_val:.0f}')
175
+
176
+ # Отметим импульсы на графике
177
+ if impulses:
178
+ for imp in impulses:
179
+ imp_hours = self._time_to_hours(imp['time']) % 24
180
+ # Найдём ближайший час
181
+ closest_hour = min(hours_sorted, key=lambda x: abs(x - imp_hours))
182
+ idx = list(hours_sorted).index(closest_hour)
183
+ plt.plot(idx, counts_sorted[idx], 'g*', markersize=15,
184
+ label=f'Импульс {imp["strength"]:.1f}x' if idx == 0 else '')
185
+
186
+ # Отметим провал после обеда
187
+ dip_idx = [i for i, h in enumerate(hours_sorted) if 14 <= h <= 16]
188
+ if dip_idx:
189
+ plt.axvspan(dip_idx[0] - 0.4, dip_idx[-1] + 0.4, alpha=0.2, color='gray',
190
+ label='Послеобеденный спад')
191
+
192
+ plt.xlabel('Час', fontsize=12)
193
+ plt.ylabel('Количество заявок', fontsize=12)
194
+ plt.title(f'Распределение заявок по часам (старт в {start_hour:02d}:{start_minute:02d})',
195
+ fontsize=14, fontweight='bold')
196
+ plt.grid(True, alpha=0.3, axis='y')
197
+ plt.legend(loc='upper right')
198
+ plt.xticks(rotation=45)
199
+ plt.tight_layout()
200
+ plt.show()
201
+
202
+ # Статистика
203
+ print("\n📊 Статистика распределения:")
204
+ print(f" Всего заявок: {sum(counts_sorted)}")
205
+ print(f" Среднее: {mean_val:.0f} заявок/час")
206
+ print(f" Максимум: {max(counts_sorted)} заявок")
207
+ print(f" Минимум: {min(counts_sorted)} заявок")
208
+
209
+ return hours_sorted, counts_sorted
210
+
211
+
212
+ # Пример использования
213
+ # if __name__ == "__main__":
214
+ # # Создаём генератор
215
+ # gen = TrafficGenerator(total_applications=110000)
216
+ #
217
+ # # 1. Без импульсов
218
+ # print("Без импульсов:")
219
+ # counts = gen.generate_minute_counts(start_hour=17)
220
+ # print(f"Всего минут: {len(counts)}")
221
+ # print(f"Всего заявок: {sum(counts)}")
222
+ #
223
+ # # 2. С импульсом в 5:30 утра
224
+ # impulses = [{'time': (5, 30), 'strength': 2.0}]
225
+ # print("\nС импульсом в 5:30:")
226
+ # counts = gen.generate_minute_counts(start_hour=17, impulses=impulses)
227
+ #
228
+ # # 3. Построить график
229
+ # gen.plot_distribution(start_hour=17, impulses=impulses)
230
+ #
231
+ # # 4. Случайные импульсы
232
+ # random_impulses = gen.generate_random_impulses(n_impulses=2)
233
+ # print("\nСлучайные импульсы:", random_impulses)
234
+ # gen.plot_distribution(start_hour=17, impulses=random_impulses)
app/simulation/visualization/__init__.py ADDED
File without changes
app/simulation/visualization/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (194 Bytes). View file
 
app/simulation/visualization/__pycache__/animation.cpython-311.pyc ADDED
Binary file (15.7 kB). View file
 
app/simulation/visualization/__pycache__/plots.cpython-311.pyc ADDED
Binary file (20.8 kB). View file
 
app/simulation/visualization/animation.py ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import plotly.graph_objects as go
2
+ from plotly.subplots import make_subplots
3
+ import numpy as np
4
+
5
+
6
+ def minutes_to_time(minutes, start_time="00:00"):
7
+ start_hour, start_min = map(int, start_time.split(':'))
8
+ total_minutes = start_hour * 60 + start_min + minutes
9
+ hour = (total_minutes // 60) % 24
10
+ minute = total_minutes % 60
11
+ return f"{hour:02d}:{minute:02d}"
12
+
13
+
14
+ def create_animation_frame_plotly(frame_data, specialists_count, second_model_name="XGBoost"):
15
+ # Фиксированная ось X для графиков
16
+ time_ticks = list(range(0, 1441, 180))
17
+ time_labels = [minutes_to_time(t, "00:00") for t in time_ticks]
18
+
19
+ fig = make_subplots(
20
+ rows=3, cols=2,
21
+ subplot_titles=('📈 Динамика входящего потока', '⚙️ Загрузка специалистов (%)',
22
+ '👥 МОНИТОРИНГ РАБОТЫ СПЕЦИАЛИСТОВ', '',
23
+ '📊 Сводная статистика обработки', '🎯 Оперативные показатели'),
24
+ specs=[
25
+ [{'type': 'scatter'}, {'type': 'scatter'}],
26
+ [{'type': 'heatmap', 'colspan': 2}, None],
27
+ [{'type': 'table'}, {'type': 'scatter'}]
28
+ ],
29
+ row_heights=[0.25, 0.40, 0.35],
30
+ vertical_spacing=0.1,
31
+ )
32
+
33
+ # --- РЯД 1: ГРАФИКИ ---
34
+ inflow_h = frame_data.get('inflow_history', [])
35
+ load_h = frame_data.get('load_history', [])
36
+
37
+ fig.add_trace(go.Scatter(y=inflow_h, fill='tozeroy', line=dict(color='#4361ee', width=2)), row=1, col=1)
38
+ fig.add_trace(go.Scatter(y=[l * 100 for l in load_h], fill='tozeroy', line=dict(color='#4cc9f0', width=2)), row=1,
39
+ col=2)
40
+
41
+ for col in [1, 2]:
42
+ fig.update_xaxes(range=[0, 1440], tickvals=time_ticks, ticktext=time_labels, row=1, col=col)
43
+ fig.update_yaxes(rangemode="tozero", row=1, col=col)
44
+
45
+ # --- РЯД 2: HEATMAP (Строго 20 ячеек в ширину) ---
46
+ states = np.array(frame_data['specialist_states'])
47
+ cols = 20
48
+ rows = int(np.ceil(specialists_count / cols))
49
+
50
+ # Создаем матрицу, заполненную None (или NaN), чтобы пустые места не красились
51
+ z_matrix = np.full((rows, cols), np.nan)
52
+ for i, val in enumerate(states):
53
+ r, c = divmod(i, cols)
54
+ # Мапим значения: 0 -> 0.1 (голубой), 1-3 -> 0.4 (зеленый) и т.д.
55
+ if val == 0:
56
+ z_matrix[r, c] = 0.1
57
+ elif val <= 3:
58
+ z_matrix[r, c] = 0.4
59
+ elif val <= 7:
60
+ z_matrix[r, c] = 0.7
61
+ else:
62
+ z_matrix[r, c] = 1.0
63
+
64
+ # Настраиваем цвета: NaN будет прозрачным/фоновым
65
+ colorscale = [
66
+ [0.0, '#66ccff'], # Свободен (0)
67
+ [0.4, '#4ade80'], # 1-3 мин
68
+ [0.7, '#facc15'], # 4-7 мин
69
+ [1.0, '#f87171'] # 8+ мин
70
+ ]
71
+
72
+ fig.add_trace(go.Heatmap(
73
+ z=z_matrix, colorscale=colorscale, showscale=False,
74
+ xgap=2, ygap=2, zmin=0, zmax=1, hoverinfo='none'
75
+ ), row=2, col=1)
76
+
77
+ # Легенда над хитмапом
78
+ free = sum(1 for t in states if t <= 0)
79
+ legend = (f"Свободно: <b>{free}</b> | <span style='color:#66ccff'>■</span> Свободен "
80
+ f"<span style='color:#4ade80'>■</span> 1-3м <span style='color:#facc15'>■</span> 4-7м "
81
+ f"<span style='color:#f87171'>■</span> 8м+")
82
+ fig.add_annotation(text=legend, xref="paper", yref="paper", x=0.5, y=0.70, showarrow=False, font=dict(size=14))
83
+
84
+ # --- РЯД 3: ТАБЛИЦА (Формальная) ---
85
+ cum = frame_data['cumulative']
86
+ fig.add_trace(go.Table(
87
+ header=dict(values=['Параметр', 'Значение'], fill_color='#1e293b', font=dict(color='white', size=15),
88
+ height=35),
89
+ cells=dict(values=[
90
+ ['✅ Авто-одобрено', '❌ Авто-отказы', '👤 На рассмотрении (Manual)', '<b>ИТОГО ОБРАБОТАНО</b>'],
91
+ [cum['auto_approved'], cum['auto_declined'],
92
+ cum['manual_processed'] + cum['business_manual_processed'], f"<b>{cum['total_processed']}</b>"]
93
+ ], align='left', font=dict(size=14), height=35, fill_color='#f8f9fa')
94
+ ), row=3, col=1)
95
+
96
+ # --- ОПЕРАТИВНЫЕ ПОКАЗАТЕЛИ (Крупный заголовок) ---
97
+ q_models = frame_data['queue'] # Очередь к спецам
98
+ q_business = frame_data.get('business_queue', 0) # Бизнес-очередь
99
+
100
+ # Расчет ожидания только для очереди моделей (как на левом графике)
101
+ avg_w = frame_data.get('avg_wait', 0)
102
+
103
+ status_card = (
104
+ f"<span style='font-size:22px; font-weight:bold;'>МОНИТОРИНГ</span><br><br>"
105
+ f"<span style='background-color:#dcfce7; color:#166534; padding:8px; border-radius:5px;'>"
106
+ f"<b>👤 ОЧЕРЕДЬ (СПЕЦ): {q_models}</b></span><br><br>"
107
+ f"<span style='font-size:18px; color:#666;'>"
108
+ f"⚙️ Бизнес-правила: {q_business}</span><br><br>"
109
+ f"🕒 Время: <b>{frame_data['time_str']}</b><br>"
110
+ f"⏳ Ожидание: <b>{avg_w:.1f} мин</b>"
111
+ )
112
+
113
+ fig.add_trace(go.Scatter(x=[0], y=[0], mode='text', text=[status_card], textfont=dict(size=16)), row=3, col=2)
114
+
115
+ # Очистка осей
116
+ fig.update_xaxes(visible=False, row=2, col=1);
117
+ fig.update_yaxes(visible=False, row=2, col=1)
118
+ fig.update_xaxes(visible=False, row=3, col=2);
119
+ fig.update_yaxes(visible=False, row=3, col=2)
120
+
121
+ # Фиксируем оси, чтобы график не "дышал" (это главная причина мерцания)
122
+ fig.update_yaxes(range=[0, 60], row=1, col=1) # Замени 60 на твой макс. поток
123
+ fig.update_yaxes(range=[0, 105], row=1, col=2) # Загрузка всегда до 100%
124
+
125
+ fig.update_layout(
126
+ height=950,
127
+ margin=dict(t=80, b=40, l=50, r=50),
128
+ template="plotly_white",
129
+ showlegend=False,
130
+ # ОТКЛЮЧАЕМ анимации переходов, которые создают эффект мигания
131
+ transition_duration=0,
132
+ hovermode=False
133
+ )
134
+
135
+ # Это заставит Plotly обновлять только данные, не перерисовывая всё полотно
136
+ fig.layout.datarevision = frame_data['time']
137
+ return fig
138
+
139
+
140
+ from matplotlib.animation import FFMpegWriter
141
+
142
+ import matplotlib.pyplot as plt
143
+ import matplotlib.animation as animation
144
+ import tempfile
145
+ import numpy as np
146
+
147
+ import matplotlib.pyplot as plt
148
+ import matplotlib.animation as animation
149
+ import tempfile
150
+ import numpy as np
151
+ import os
152
+
153
+
154
+ # Внести изменения в функцию create_simulation_video в animation.py
155
+ def create_simulation_video(frames, specialists_count, second_model_name, fps=24):
156
+ if not frames:
157
+ return None
158
+
159
+ # Настройка стиля
160
+ plt.style.use('seaborn-v0_8-whitegrid')
161
+ fig, axes = plt.subplots(2, 2, figsize=(16, 10), facecolor='#f8f9fa')
162
+ plt.subplots_adjust(hspace=0.4, wspace=0.25)
163
+ plt.close()
164
+
165
+ def update(i):
166
+ data = frames[i]
167
+ for ax in axes.flatten():
168
+ ax.clear()
169
+ ax.set_facecolor('white')
170
+
171
+ # 1. ДИНАМИКА ПОТОКА (Локализация)
172
+ y_inflow = data['inflow_history']
173
+ axes[0, 0].fill_between(range(len(y_inflow)), y_inflow, color='#4361ee', alpha=0.3)
174
+ axes[0, 0].plot(range(len(y_inflow)), y_inflow, color='#4361ee', linewidth=2)
175
+ axes[0, 0].set_xlim(0, 1440) # Фиксация оси времени
176
+ axes[0, 0].set_title("ДИНАМИКА ПОТОКА (заявок/мин)", fontsize=12, fontweight='bold')
177
+ axes[0, 0].set_xlabel("Минуты симуляции")
178
+
179
+ # 2. ЗАГРУЗКА СИСТЕМЫ
180
+ y_load = [v * 100 for v in data['load_history']]
181
+ axes[0, 1].fill_between(range(len(y_load)), y_load, color='#4cc9f0', alpha=0.3)
182
+ axes[0, 1].plot(range(len(y_load)), y_load, color='#4cc9f0', linewidth=2)
183
+ axes[0, 1].axhline(y=80, color='#f72585', linestyle='--', alpha=0.6)
184
+ axes[0, 1].set_xlim(0, 1440)
185
+ axes[0, 1].set_ylim(0, 110)
186
+ axes[0, 1].set_title(f"ЗАГРУЖЕННОСТЬ СПЕЦИАЛИСТОВ %: {y_load[-1]:.1f}%", fontsize=12, fontweight='bold')
187
+
188
+ # 3. HEATMAP И ЛЕГЕНДА (Возвращаем информативность)
189
+ states = np.array(data['specialist_states'])
190
+ cols = 20
191
+ rows = int(np.ceil(specialists_count / cols))
192
+ z = np.zeros((rows, cols))
193
+ for idx, val in enumerate(states[:rows * cols]):
194
+ z[idx // cols, idx % cols] = val
195
+
196
+ im = axes[1, 0].imshow(z, cmap='RdYlGn_r', aspect='auto', vmin=0, vmax=10)
197
+ axes[1, 0].set_title(f"МОНИТОРИНГ: {specialists_count} СПЕЦИАЛИСТОВ", fontsize=12, fontweight='bold')
198
+ axes[1, 0].axis('off')
199
+
200
+ # Добавляем текстовую легенду под хитмапом
201
+ legend_text = "Цвета: Зеленый (Свободен) → Желтый (3-5 мин) → Красный (8+ мин)"
202
+ axes[1, 0].text(0.5, -0.1, legend_text, ha='center', transform=axes[1, 0].transAxes, fontsize=10)
203
+
204
+ # --- 4. РАЗДЕЛЕННЫЕ ОЧЕРЕДИ И СТАТИСТИКА ---
205
+ ax_stat = axes[1, 1]
206
+ ax_stat.clear()
207
+ ax_stat.axis('off')
208
+
209
+ # Цвета для очередей (краснеют, если очередь > 50)
210
+ q_mod_color = '#991b1b' if data['queue'] > 50 else '#166534'
211
+ q_biz_color = '#991b1b' if data.get('business_queue', 0) > 50 else '#1e293b'
212
+
213
+ # Две надписи очередей сверху
214
+ ax_stat.text(0.25, 0.9, "ОЧЕРЕДЬ\n(МОДЕЛИ)", fontsize=10, ha='center', fontweight='bold')
215
+ ax_stat.text(0.25, 0.78, f"{data['queue']}", fontsize=26, ha='center', fontweight='bold', color=q_mod_color)
216
+
217
+ ax_stat.text(0.75, 0.9, "ОЧЕРЕДЬ\n(БИЗНЕС ПРАВИЛА)", fontsize=10, ha='center', fontweight='bold')
218
+ ax_stat.text(0.75, 0.78, f"{data.get('business_queue', 0)}", fontsize=26, ha='center', fontweight='bold',
219
+ color=q_biz_color)
220
+
221
+ # Сводная таблица ниже
222
+ cum = data['cumulative']
223
+ stats_text = (
224
+ f"Итоговые показатели к {data['time_str']}\n"
225
+ f"--------------------------------------\n"
226
+ f"ОБРАБОТАНО ВСЕГО: {cum['total_processed']}\n"
227
+ f"Авто-одобрено: {cum['auto_approved']}\n"
228
+ f"Авто-отказы: {cum['auto_declined']}\n"
229
+ f"Ручной разбор (модель): {cum['manual_processed']}\n"
230
+ f"Ручной разбор (бизнес правила): {cum['business_manual_processed']}\n"
231
+ f"--------------------------------------\n"
232
+ f"Используемая модель: {second_model_name}"
233
+ )
234
+
235
+ ax_stat.text(0.5, 0.3, stats_text, fontsize=10, fontfamily='monospace',
236
+ ha='center', va='center', transform=ax_stat.transAxes,
237
+ bbox=dict(facecolor='#f8f9fa', alpha=1, boxstyle='round,pad=1', edgecolor='#dee2e6'))
238
+
239
+ return axes.flatten()
240
+
241
+ ani = animation.FuncAnimation(fig, update, frames=len(frames), interval=1000 / fps)
242
+ tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
243
+
244
+ writer = animation.FFMpegWriter(fps=fps, bitrate=2000, extra_args=['-vcodec', 'libx264', '-pix_fmt', 'yuv420p'])
245
+ ani.save(tmp_file.name, writer=writer)
246
+ return tmp_file.name
app/simulation/visualization/plots.py ADDED
@@ -0,0 +1,374 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib.pyplot as plt
2
+ import matplotlib.animation as animation
3
+ import numpy as np
4
+
5
+
6
+
7
+ def minutes_to_time(minutes, start_time="00:00"):
8
+ """Преобразует минуты от старта в строку времени ЧЧ:ММ"""
9
+ start_hour, start_min = map(int, start_time.split(':'))
10
+ total_minutes = start_hour * 60 + start_min + minutes
11
+ hour = (total_minutes // 60) % 24
12
+ minute = total_minutes % 60
13
+ return f"{hour:02d}:{minute:02d}"
14
+
15
+
16
+ def plot_queue_dynamics(queue_history, business_queue_history=None, start_time="00:00"):
17
+ """
18
+ Два отдельных графика для очередей с временной шкалой ЧЧ:ММ
19
+ """
20
+ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
21
+
22
+ # Создаем метки времени для каждого часа
23
+ total_minutes = len(queue_history)
24
+ hours = range(0, total_minutes, 60) # каждый час
25
+ hour_labels = [minutes_to_time(m, start_time) for m in hours]
26
+
27
+ # График 1: Очередь моделей
28
+ ax1.plot(range(total_minutes), queue_history, 'b-', linewidth=1.5)
29
+ ax1.set_xticks(hours)
30
+ ax1.set_xticklabels(hour_labels, rotation=45)
31
+ ax1.set_xlabel('Время')
32
+ ax1.set_ylabel('Размер очереди')
33
+ ax1.set_title('Очередь моделей')
34
+ ax1.grid(True, alpha=0.3)
35
+
36
+ # График 2: Очередь бизнес-правил
37
+ if business_queue_history and len(business_queue_history) > 0:
38
+ ax2.plot(range(total_minutes), business_queue_history, 'orange', linewidth=1.5)
39
+ ax2.set_xticks(hours)
40
+ ax2.set_xticklabels(hour_labels, rotation=45)
41
+ ax2.set_xlabel('Время')
42
+ ax2.set_ylabel('Размер очереди')
43
+ ax2.set_title('Очередь бизнес-правил')
44
+ ax2.grid(True, alpha=0.3)
45
+ else:
46
+ ax2.text(0.5, 0.5, 'Нет данных', ha='center', va='center', transform=ax2.transAxes)
47
+ ax2.set_title('Очередь бизнес-правил')
48
+ ax2.set_xlabel('Время')
49
+
50
+ plt.tight_layout()
51
+ return plt
52
+
53
+
54
+ def plot_specialist_load(specialist_busy_history, specialists_count, start_time="00:00"):
55
+ """График загрузки специалистов с временной шкалой ЧЧ:ММ"""
56
+ load_percent = [busy / specialists_count * 100 for busy in specialist_busy_history]
57
+
58
+ fig, ax = plt.subplots(figsize=(10, 4))
59
+
60
+ total_minutes = len(load_percent)
61
+ hours = range(0, total_minutes, 60) # каждый час
62
+ hour_labels = [minutes_to_time(m, start_time) for m in hours]
63
+
64
+ ax.plot(range(total_minutes), load_percent, 'g-', linewidth=1.5)
65
+ ax.axhline(y=100, color='r', linestyle='--', alpha=0.5, label='Максимум')
66
+ ax.axhline(y=80, color='b', linestyle='--', alpha=0.5, label='Цель 80%')
67
+
68
+ ax.set_xticks(hours)
69
+ ax.set_xticklabels(hour_labels, rotation=45)
70
+ ax.set_xlabel('Время')
71
+ ax.set_ylabel('Загрузка (%)')
72
+ ax.set_title('Загрузка специалистов')
73
+ ax.legend()
74
+ ax.grid(True, alpha=0.3)
75
+ ax.set_ylim(0, 110)
76
+
77
+ plt.tight_layout()
78
+ return plt
79
+
80
+
81
+ def plot_inflow(minute_counts, start_time="00:00"):
82
+ """
83
+ График входящего потока заявок с заливкой под кривой
84
+ """
85
+ fig, ax = plt.subplots(figsize=(14, 5))
86
+
87
+ total_minutes = len(minute_counts)
88
+ minutes = range(total_minutes)
89
+
90
+ # Заливка под кривой (area plot)
91
+ ax.fill_between(minutes, minute_counts, alpha=0.3, color='blue', label='Общий поток')
92
+
93
+ # Основной график (линия поверх заливки)
94
+ ax.plot(minutes, minute_counts, 'b-', linewidth=1.5, alpha=0.8)
95
+
96
+ # Скользящее среднее
97
+ window = 30
98
+ if total_minutes > window:
99
+ smoothed = np.convolve(minute_counts, np.ones(window) / window, mode='valid')
100
+ ax.plot(range(window - 1, total_minutes), smoothed,
101
+ 'r-', linewidth=2.5, label=f'Среднее за 30 мин')
102
+
103
+ # Можно добавить заливку и для среднего (опционально)
104
+ # ax.fill_between(range(window - 1, total_minutes), smoothed, alpha=0.2, color='red')
105
+
106
+ # Метки времени
107
+ hours = range(0, total_minutes, 60)
108
+ hour_labels = [minutes_to_time(m, start_time) for m in hours]
109
+
110
+ ax.set_xticks(hours)
111
+ ax.set_xticklabels(hour_labels, rotation=45)
112
+ ax.set_xlabel('Время')
113
+ ax.set_ylabel('Количество заявок')
114
+ ax.set_title('Входящий поток заявок')
115
+ ax.legend()
116
+ ax.grid(True, alpha=0.3)
117
+
118
+ # Добавим горизонтальную линию среднего
119
+ mean_value = np.mean(minute_counts)
120
+ ax.axhline(y=mean_value, color='gray', linestyle='--', alpha=0.7,
121
+ label=f'Среднее: {mean_value:.1f}')
122
+
123
+ plt.tight_layout()
124
+ return plt
125
+
126
+
127
+ def minutes_to_time(minutes, start_time="00:00"):
128
+ """Преобразует минуты от старта в строку времени ЧЧ:ММ"""
129
+ start_hour, start_min = map(int, start_time.split(':'))
130
+ total_minutes = start_hour * 60 + start_min + minutes
131
+ hour = (total_minutes // 60) % 24
132
+ minute = total_minutes % 60
133
+ return f"{hour:02d}:{minute:02d}"
134
+
135
+
136
+ def plot_detailed_decisions(batch_stats, second_model_name="XGBoost", start_time="00:00"):
137
+ """
138
+ Набор графиков для каждого типа решений отдельно с временной шкалой ЧЧ:ММ
139
+ """
140
+ if not batch_stats:
141
+ return None
142
+
143
+ fig, axes = plt.subplots(3, 2, figsize=(14, 10))
144
+
145
+ times = [stat['time'] for stat in batch_stats] # минуты
146
+ total_minutes = max(times) if times else 0
147
+
148
+ # Метки времени каждый час
149
+ hours = range(0, total_minutes + 60, 60)
150
+ hour_labels = [minutes_to_time(m, start_time) for m in hours]
151
+
152
+ # 1. Бизнес-правила (ручной разбор)
153
+ axes[0, 0].plot(times, [stat['business_manual'] for stat in batch_stats],
154
+ 'r-', linewidth=1.5)
155
+ axes[0, 0].fill_between(times, 0, [stat['business_manual'] for stat in batch_stats],
156
+ alpha=0.2, color='red')
157
+ axes[0, 0].set_title('Ручной разбор: бизнес-правила', fontweight='bold')
158
+ axes[0, 0].set_xticks(hours)
159
+ axes[0, 0].set_xticklabels(hour_labels, rotation=45)
160
+ axes[0, 0].set_xlabel('Время')
161
+ axes[0, 0].set_ylabel('Заявок')
162
+ axes[0, 0].grid(True, alpha=0.3)
163
+
164
+ # 2. Бизнес-правила (авто отказ)
165
+ axes[0, 1].plot(times, [stat['business_auto'] for stat in batch_stats],
166
+ 'darkred', linewidth=1.5)
167
+ axes[0, 1].fill_between(times, 0, [stat['business_auto'] for stat in batch_stats],
168
+ alpha=0.2, color='darkred')
169
+ axes[0, 1].set_title('Авто отказ: бизнес-правила', fontweight='bold')
170
+ axes[0, 1].set_xticks(hours)
171
+ axes[0, 1].set_xticklabels(hour_labels, rotation=45)
172
+ axes[0, 1].set_xlabel('Время')
173
+ axes[0, 1].set_ylabel('Заявок')
174
+ axes[0, 1].grid(True, alpha=0.3)
175
+
176
+ # 3. LR уверенные решения
177
+ axes[1, 0].plot(times, [stat['lr_confident'] for stat in batch_stats],
178
+ 'blue', linewidth=1.5)
179
+ axes[1, 0].fill_between(times, 0, [stat['lr_confident'] for stat in batch_stats],
180
+ alpha=0.2, color='blue')
181
+ axes[1, 0].set_title('Уверенные решения: Logistic Regression', fontweight='bold')
182
+ axes[1, 0].set_xticks(hours)
183
+ axes[1, 0].set_xticklabels(hour_labels, rotation=45)
184
+ axes[1, 0].set_xlabel('Время')
185
+ axes[1, 0].set_ylabel('Заявок')
186
+ axes[1, 0].grid(True, alpha=0.3)
187
+
188
+ # 4. Вторая модель уверенные решения
189
+ axes[1, 1].plot(times, [stat['second_confident'] for stat in batch_stats],
190
+ 'green', linewidth=1.5)
191
+ axes[1, 1].fill_between(times, 0, [stat['second_confident'] for stat in batch_stats],
192
+ alpha=0.2, color='green')
193
+ axes[1, 1].set_title(f'Уверенные решения: {second_model_name}', fontweight='bold')
194
+ axes[1, 1].set_xticks(hours)
195
+ axes[1, 1].set_xticklabels(hour_labels, rotation=45)
196
+ axes[1, 1].set_xlabel('Время')
197
+ axes[1, 1].set_ylabel('Заявок')
198
+ axes[1, 1].grid(True, alpha=0.3)
199
+
200
+ # 5. Ручной разбор от моделей
201
+ axes[2, 0].plot(times, [stat['second_uncertain'] for stat in batch_stats],
202
+ 'orange', linewidth=1.5)
203
+ axes[2, 0].fill_between(times, 0, [stat['second_uncertain'] for stat in batch_stats],
204
+ alpha=0.2, color='orange')
205
+ axes[2, 0].set_title('Ручной разбор: модели неуверенны', fontweight='bold')
206
+ axes[2, 0].set_xticks(hours)
207
+ axes[2, 0].set_xticklabels(hour_labels, rotation=45)
208
+ axes[2, 0].set_xlabel('Время')
209
+ axes[2, 0].set_ylabel('Заявок')
210
+ axes[2, 0].grid(True, alpha=0.3)
211
+
212
+ # 6. Сравнительный график
213
+ axes[2, 1].plot(times, [stat['business_manual'] for stat in batch_stats],
214
+ 'r-', linewidth=1.5, label='Бизнес-правила', alpha=0.7)
215
+ axes[2, 1].plot(times, [stat['second_uncertain'] for stat in batch_stats],
216
+ 'orange', linewidth=1.5, label='Модели неуверенны', alpha=0.7)
217
+ axes[2, 1].set_title('Сравнение источников ручного разбора', fontweight='bold')
218
+ axes[2, 1].set_xticks(hours)
219
+ axes[2, 1].set_xticklabels(hour_labels, rotation=45)
220
+ axes[2, 1].set_xlabel('Время')
221
+ axes[2, 1].set_ylabel('Заявок')
222
+ axes[2, 1].legend()
223
+ axes[2, 1].grid(True, alpha=0.3)
224
+
225
+ plt.suptitle('Детальный анализ решений', fontsize=14, fontweight='bold')
226
+ plt.tight_layout()
227
+ return plt
228
+
229
+ def plot_parameters_history(pid_history, second_model_name="XGBoost", start_time="00:00"):
230
+ """График изменения параметров регулятора"""
231
+ if pid_history is None or pid_history.empty:
232
+ return None
233
+
234
+ fig, axes = plt.subplots(3, 1, figsize=(12, 12))
235
+
236
+ total_minutes = len(pid_history)
237
+ times = range(total_minutes)
238
+
239
+ # Метки времени
240
+ hours = range(0, total_minutes, 60)
241
+ hour_labels = [minutes_to_time(m, start_time) for m in hours]
242
+
243
+ # 1. Отступы LR
244
+ axes[0].plot(times, pid_history['lr_low'], 'g-', linewidth=2, label='LR Low')
245
+ axes[0].plot(times, pid_history['lr_high'], 'r-', linewidth=2, label='LR High')
246
+ axes[0].set_ylabel('Отступ')
247
+ axes[0].set_title('Отступы Logistic Regression')
248
+ axes[0].legend()
249
+ axes[0].grid(True, alpha=0.3)
250
+ axes[0].set_xticks(hours)
251
+ axes[0].set_xticklabels(hour_labels, rotation=45)
252
+
253
+ # 2. Отступы второй модели (с именем из параметра)
254
+ axes[1].plot(times, pid_history['second_low'], 'g-', linewidth=2, label=f'{second_model_name} Low')
255
+ axes[1].plot(times, pid_history['second_high'], 'r-', linewidth=2, label=f'{second_model_name} High')
256
+ axes[1].set_ylabel('Отступ')
257
+ axes[1].set_title(f'Отступы {second_model_name}')
258
+ axes[1].legend()
259
+ axes[1].grid(True, alpha=0.3)
260
+ axes[1].set_xticks(hours)
261
+ axes[1].set_xticklabels(hour_labels, rotation=45)
262
+
263
+ # 3. Ошибка загрузки и выход регулятора
264
+ axes[2].plot(times, pid_history['error_load'], 'b-', label='Error load', alpha=0.7, linewidth=1.5)
265
+ axes[2].plot(times, pid_history['output'], 'r-', label='Output', linewidth=2, alpha=0.7)
266
+ axes[2].axhline(y=0, color='black', linestyle='-', linewidth=0.5)
267
+ axes[2].set_xlabel('Время')
268
+ axes[2].set_ylabel('Значение')
269
+ axes[2].set_title('Ошибка загрузки и выход регулятора')
270
+ axes[2].legend()
271
+ axes[2].grid(True, alpha=0.3)
272
+ axes[2].set_xticks(hours)
273
+ axes[2].set_xticklabels(hour_labels, rotation=45)
274
+
275
+ plt.tight_layout()
276
+ return plt
277
+
278
+
279
+ # def plot_summary(processor):
280
+ # """Сводный дашборд"""
281
+ # fig, axes = plt.subplots(2, 3, figsize=(15, 10))
282
+ #
283
+ # stats = processor.stats
284
+ #
285
+ # # 1. Динамика очередей
286
+ # axes[0, 0].plot(stats['queue_history'], 'b-', linewidth=1.5, label='Очередь моделей')
287
+ # if 'business_queue_history' in stats:
288
+ # axes[0, 0].plot(stats['business_queue_history'], 'orange', linewidth=1.5, label='Очередь бизнес-правил')
289
+ # axes[0, 0].set_title('Динамика очередей')
290
+ # axes[0, 0].set_xlabel('Минута')
291
+ # axes[0, 0].set_ylabel('Заявок')
292
+ # axes[0, 0].legend()
293
+ # axes[0, 0].grid(True, alpha=0.3)
294
+ #
295
+ # # 2. Загрузка специалистов (модели)
296
+ # load = [b / processor.specialists_count * 100 for b in stats['specialist_busy']]
297
+ # axes[0, 1].plot(load, 'g-', linewidth=1.5, label='Основные специалисты')
298
+ # axes[0, 1].axhline(y=100, color='r', linestyle='--', alpha=0.5, label='Максимум')
299
+ # if hasattr(processor, 'target_load'):
300
+ # axes[0, 1].axhline(y=processor.target_load * 100, color='b', linestyle='--',
301
+ # alpha=0.5, label=f'Цель {processor.target_load * 100:.0f}%')
302
+ # axes[0, 1].set_title('Загрузка специалистов (модели)')
303
+ # axes[0, 1].set_xlabel('Минута')
304
+ # axes[0, 1].set_ylabel('%')
305
+ # axes[0, 1].legend()
306
+ # axes[0, 1].grid(True, alpha=0.3)
307
+ #
308
+ # # 3. Загрузка экспертов
309
+ # if 'business_specialist_busy' in stats and stats['business_specialist_busy']:
310
+ # business_load = [b / processor.business_specialists_count * 100 for b in stats['business_specialist_busy']]
311
+ # axes[1, 0].plot(business_load, 'orange', linewidth=1.5, label='Эксперты')
312
+ # axes[1, 0].axhline(y=100, color='r', linestyle='--', alpha=0.5, label='Максимум')
313
+ # axes[1, 0].set_title('Загрузка экспертов (бизнес-правила)')
314
+ # axes[1, 0].set_xlabel('Минута')
315
+ # axes[1, 0].set_ylabel('%')
316
+ # axes[1, 0].legend()
317
+ # axes[1, 0].grid(True, alpha=0.3)
318
+ # else:
319
+ # axes[1, 0].text(0.5, 0.5, 'Нет данных по экспертам', ha='center', va='center')
320
+ # axes[1, 0].set_title('Загрузка экспертов')
321
+ #
322
+ # # 4. Распределение решений
323
+ # sizes = [
324
+ # stats['auto_approved'],
325
+ # stats['auto_declined'],
326
+ # stats['manual_processed'],
327
+ # stats.get('business_manual_processed', 0)
328
+ # ]
329
+ # labels = ['Одобрено авто', 'Отказ авто', 'Ручной (модели)', 'Ручной (бизнес)']
330
+ # colors = ['#2ecc71', '#e74c3c', '#3498db', '#f39c12']
331
+ #
332
+ # if sum(sizes) > 0:
333
+ # wedges, texts, autotexts = axes[1, 1].pie(sizes, labels=labels, autopct='%1.1f%%',
334
+ # colors=colors, startangle=90)
335
+ # for autotext in autotexts:
336
+ # autotext.set_color('white')
337
+ # autotext.set_fontweight('bold')
338
+ # axes[1, 1].set_title('Итоговые решения')
339
+ #
340
+ # # 5. Ключевые метрики (освободилось место)
341
+ # total = stats['total_processed']
342
+ # if total > 0:
343
+ # avg_wait = np.mean(stats['wait_times']) if stats['wait_times'] else 0
344
+ # avg_business_wait = np.mean(stats.get('business_wait_times', [0])) if stats.get('business_wait_times') else 0
345
+ #
346
+ # metrics_text = f"""
347
+ # Всего заявок: {total:,}
348
+ # Одобрено авто: {stats['auto_approved']:,} ({stats['auto_approved'] / total * 100:.1f}%)
349
+ # Отказ авто: {stats['auto_declined']:,} ({stats['auto_declined'] / total * 100:.1f}%)
350
+ #
351
+ # Ручной разбор (модели): {stats['manual_processed']:,} ({stats['manual_processed'] / total * 100:.1f}%)
352
+ # Ручной разбор (бизнес): {stats.get('business_manual_processed', 0):,}
353
+ #
354
+ # Среднее время ожидания (модели): {avg_wait:.1f} мин
355
+ # Среднее время ожидания (бизнес): {avg_business_wait:.1f} мин
356
+ #
357
+ # Средняя загрузка специалистов: {np.mean(load):.1f}%
358
+ # """
359
+ # else:
360
+ # metrics_text = "Нет данных"
361
+ #
362
+ # axes[0, 2].text(0.1, 0.5, metrics_text, transform=axes[0, 2].transAxes,
363
+ # fontsize=10, verticalalignment='center', fontfamily='monospace')
364
+ # axes[0, 2].axis('off')
365
+ # axes[0, 2].set_title('Ключевые метрики')
366
+ #
367
+ # # 6. Пустой график или можно что-то еще
368
+ # axes[1, 2].axis('off')
369
+ #
370
+ # plt.suptitle('Сводная статистика симуляции', fontsize=14, fontweight='bold')
371
+ # plt.tight_layout()
372
+ # return plt
373
+
374
+
app/simulation/visualization/simulation_20:11.gif ADDED
app/simulation/visualization/simulation_20:19.gif ADDED
app/simulation/visualization/simulation_20:25.gif ADDED
app/simulation/visualization/simulation_20:30.gif ADDED
app/utils/__pycache__/credit_preprocessor.cpython-311.pyc ADDED
Binary file (14.5 kB). View file
 
app/utils/__pycache__/data_loader.cpython-311.pyc ADDED
Binary file (1.51 kB). View file
 
app/utils/credit_preprocessor.py ADDED
@@ -0,0 +1,329 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import joblib
4
+ from sklearn.base import BaseEstimator, TransformerMixin
5
+
6
+
7
+ class CreditDataPreprocessor(BaseEstimator, TransformerMixin):
8
+
9
+ # Полный препроцессинг данных
10
+
11
+ def __init__(self,
12
+ NumberOfDependents_fill_value=0,
13
+ NumberOfDependents_up_threshold=10,
14
+ MonthlyIncome_fill_value=0,
15
+ RevolvingUtilizationOfUnsecuredLines_drop_threshold=2,
16
+ age_low_drop_threshold=18,
17
+ age_up_drop_threshold=80,
18
+ DebtRatio_up_threshold=5,
19
+ PastDueRiskScore_weights=[1.0, 1.2, 1.3],
20
+ NumberRealEstateLoansOrLines_drop_threshold=20,
21
+ drop_special_codes=False):
22
+ self.NumberOfDependents_fill_value = NumberOfDependents_fill_value
23
+ self.NumberOfDependents_up_threshold = NumberOfDependents_up_threshold
24
+
25
+ self.MonthlyIncome_fill_value = MonthlyIncome_fill_value
26
+
27
+ self.RevolvingUtilizationOfUnsecuredLines_drop_threshold = RevolvingUtilizationOfUnsecuredLines_drop_threshold
28
+
29
+ self.age_low_drop_threshold = age_low_drop_threshold
30
+ self.age_up_drop_threshold = age_up_drop_threshold
31
+
32
+ self.DebtRatio_up_threshold = DebtRatio_up_threshold
33
+
34
+ self.PastDueRiskScore_weights = PastDueRiskScore_weights
35
+
36
+ self.NumberRealEstateLoansOrLines_drop_threshold = NumberRealEstateLoansOrLines_drop_threshold
37
+
38
+ self.drop_special_codes = drop_special_codes
39
+
40
+ def fit(self, X, y=None):
41
+ return self
42
+
43
+ def transform(self, X):
44
+ X_copy = X.copy()
45
+
46
+ X_copy['NumberOfDependents'] = X_copy['NumberOfDependents'].fillna(value=self.NumberOfDependents_fill_value)
47
+ X_copy['NumberOfDependents'] = X_copy['NumberOfDependents'].clip(0, self.NumberOfDependents_up_threshold).copy()
48
+
49
+ X_copy['MonthlyIncomeIsMissing'] = 0
50
+ X_copy.loc[X_copy['MonthlyIncome'].isna(), 'MonthlyIncomeIsMissing'] = 1
51
+ X_copy['MonthlyIncome'] = X['MonthlyIncome'].fillna(value=self.MonthlyIncome_fill_value)
52
+
53
+ X_copy['RevolvingUtilizationOverOne'] = 0.0
54
+ X_copy.loc[X_copy['RevolvingUtilizationOfUnsecuredLines'] > 1, 'RevolvingUtilizationOverOne'] = 1.0
55
+ X_copy['RevolvingUtilizationOfUnsecuredLines'] = X_copy['RevolvingUtilizationOfUnsecuredLines'].clip(0,
56
+ 1).copy()
57
+
58
+ X_copy['DebtPayments'] = 0.0
59
+ X_copy.loc[X_copy['MonthlyIncome'] == 0, 'DebtPayments'] = X_copy.loc[X_copy['MonthlyIncome'] == 0, 'DebtRatio']
60
+ X_copy.loc[X_copy['MonthlyIncome'] != 0, 'DebtPayments'] = X_copy.loc[
61
+ X_copy['MonthlyIncome'] != 0, 'DebtRatio'] * \
62
+ X_copy.loc[
63
+ X_copy['MonthlyIncome'] != 0, 'MonthlyIncome']
64
+ X_copy['DebtRatio'] = X_copy['DebtRatio'].clip(0, self.DebtRatio_up_threshold).copy()
65
+
66
+ X_copy['DebtPayments_over_10k'] = 0.0
67
+ X_copy.loc[X_copy['DebtPayments'] > 10000, 'DebtPayments_over_10k'] = 1.0
68
+ X_copy['DebtPayments'] = X_copy['DebtPayments'].clip(0, 10000).copy()
69
+
70
+ X_copy['MonthlyIncome_over_20k'] = 0.0
71
+ X_copy.loc[X_copy['MonthlyIncome'] >= 20000, 'MonthlyIncome_over_20k'] = 1.0
72
+ X_copy['MonthlyIncome'] = X_copy['MonthlyIncome'].clip(0, 20000)
73
+
74
+ X_copy['Code96'] = 0.0
75
+ X_copy['Code98'] = 0.0
76
+ X_copy.loc[X_copy['NumberOfTime30-59DaysPastDueNotWorse'] == 96, 'Code96'] = 1.0
77
+ X_copy.loc[X_copy['NumberOfTime30-59DaysPastDueNotWorse'] == 98, 'Code98'] = 1.0
78
+
79
+ X_copy['PastDueRiskScore'] = (
80
+ self.PastDueRiskScore_weights[0] * X_copy['NumberOfTime30-59DaysPastDueNotWorse'] +
81
+ self.PastDueRiskScore_weights[1] * X_copy['NumberOfTime60-89DaysPastDueNotWorse'] +
82
+ self.PastDueRiskScore_weights[2] * X_copy['NumberOfTimes90DaysLate'])
83
+ X_copy.loc[X_copy['NumberOfTime30-59DaysPastDueNotWorse'] == 96, 'PastDueRiskScore'] = 96
84
+ X_copy.loc[X_copy['NumberOfTime30-59DaysPastDueNotWorse'] == 98, 'PastDueRiskScore'] = 98
85
+ X_copy = X_copy.drop(columns=['NumberOfTime30-59DaysPastDueNotWorse', 'NumberOfTime60-89DaysPastDueNotWorse',
86
+ 'NumberOfTimes90DaysLate'])
87
+
88
+ X_copy['NumberOfOpenCreditLinesAndLoans_over_30'] = 0.0
89
+ X_copy.loc[X_copy['NumberOfOpenCreditLinesAndLoans'] > 30, 'NumberOfOpenCreditLinesAndLoans_over_30'] = 1.0
90
+ X_copy['NumberOfOpenCreditLinesAndLoans'] = X_copy['NumberOfOpenCreditLinesAndLoans'].clip(0, 30).copy()
91
+
92
+ X_copy['NumberRealEstateLoansOrLines_over_5'] = 0.0
93
+ X_copy.loc[X_copy['NumberRealEstateLoansOrLines'] > 5, 'NumberRealEstateLoansOrLines_over_5'] = 1.0
94
+ X_copy['NumberRealEstateLoansOrLines'] = X_copy['NumberRealEstateLoansOrLines'].clip(0, 5).copy()
95
+
96
+ X_copy['ConsumerCredit_Group'] = pd.cut(X_copy['NumberOfOpenCreditLinesAndLoans'],
97
+ bins=[0, 1, 2, 6, 15, 31],
98
+ labels=[
99
+ '0_loans',
100
+ '1_loans',
101
+ '2-5_loans',
102
+ '6-14_loans',
103
+ '16-30_loans'
104
+ ])
105
+ consumer_dummy = pd.get_dummies(X_copy['ConsumerCredit_Group'], prefix='Consumer', drop_first=False).astype(
106
+ 'float')
107
+
108
+ X_copy['RealEstateLoans_Group'] = pd.cut(X_copy['NumberRealEstateLoansOrLines'],
109
+ bins=[-1, 0, 3, 100],
110
+ labels=[
111
+ '0_loans',
112
+ '1-3_loans',
113
+ '4+_loans',
114
+ ])
115
+ estate_dummy = pd.get_dummies(X_copy['RealEstateLoans_Group'], prefix='RealEstateLoans',
116
+ drop_first=False).astype('float')
117
+
118
+ X_copy = pd.concat([X_copy, consumer_dummy, estate_dummy], axis=1).copy()
119
+ X_copy = X_copy.drop(columns=['ConsumerCredit_Group',
120
+ 'RealEstateLoans_Group']).copy()
121
+
122
+ X_copy = X_copy.drop(columns=['Consumer_6-14_loans',
123
+ 'RealEstateLoans_0_loans']).copy()
124
+
125
+ X_copy = X_copy.drop(columns=['NumberOfOpenCreditLinesAndLoans',
126
+ 'NumberRealEstateLoansOrLines',
127
+ 'MonthlyIncomeIsMissing',
128
+ 'MonthlyIncome_over_20k',
129
+ 'Consumer_0_loans',
130
+ 'NumberOfOpenCreditLinesAndLoans_over_30']).copy()
131
+
132
+ if self.drop_special_codes:
133
+ X_copy = X_copy.drop(columns=['Code96', 'Code98'])
134
+
135
+ return X_copy
136
+
137
+
138
+ def fit_transform(self, X, y=None):
139
+ return self.fit(X, y).transform(X)
140
+
141
+ def clean_train(self, X, y=None):
142
+ mask = (
143
+ (X[
144
+ 'RevolvingUtilizationOfUnsecuredLines'] <= self.RevolvingUtilizationOfUnsecuredLines_drop_threshold) &
145
+ (X['age'] >= self.age_low_drop_threshold) &
146
+ (X['age'] <= self.age_up_drop_threshold) &
147
+ (X['NumberRealEstateLoansOrLines'] <= self.NumberRealEstateLoansOrLines_drop_threshold)
148
+ )
149
+
150
+ X_clean = X[mask].copy()
151
+
152
+ if y is not None:
153
+ y_clean = y[mask].copy()
154
+ return X_clean, y_clean
155
+
156
+ return X_clean
157
+
158
+ from sklearn.base import BaseEstimator, TransformerMixin
159
+ from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler, MaxAbsScaler
160
+
161
+ class CreditScaler(BaseEstimator, TransformerMixin):
162
+ """
163
+ Масштабирует только не-булевые колонки.
164
+ Можно задать различные способы масштабирования
165
+ """
166
+
167
+ def __init__(self, scaler_type='standard'):
168
+ """
169
+ Параметр scaler_type - тип scaler'а.
170
+
171
+ Доступные типы:
172
+ - 'standard': StandardScaler (среднее=0, дисперсия=1)
173
+ - 'robust': RobustScaler (устойчив к выбросам)
174
+ - 'minmax': MinMaxScaler (приводит к [0, 1])
175
+ - 'maxabs': MaxAbsScaler (приводит к [-1, 1])
176
+ """
177
+
178
+ self.boolean_columns = [
179
+ 'RevolvingUtilizationOverOne',
180
+ 'DebtPayments_over_10k',
181
+ 'Code96',
182
+ 'Code98',
183
+ 'NumberRealEstateLoansOrLines_over_5',
184
+ 'Consumer_1_loans',
185
+ 'Consumer_2-5_loans',
186
+ 'Consumer_16-30_loans',
187
+ 'RealEstateLoans_1-3_loans',
188
+ 'RealEstateLoans_4+_loans'
189
+ ]
190
+
191
+ self.scaler_type = scaler_type
192
+ self._create_scaler()
193
+
194
+ # Эти переменные заполнятся во время fit
195
+ self.columns_to_scale_ = None
196
+ self.n_features_in_ = None
197
+ self.feature_names_in_ = None
198
+
199
+ def _create_scaler(self):
200
+ """Создает scaler по типу"""
201
+ if self.scaler_type == 'standard':
202
+ self.scaler = StandardScaler()
203
+ elif self.scaler_type == 'robust':
204
+ self.scaler = RobustScaler()
205
+ elif self.scaler_type == 'minmax':
206
+ self.scaler = MinMaxScaler()
207
+ elif self.scaler_type == 'maxabs':
208
+ self.scaler = MaxAbsScaler()
209
+ else:
210
+ raise ValueError(
211
+ f"Unknown scaler_type: {self.scaler_type}. "
212
+ f"Available: standard, robust, minmax, maxabs"
213
+ )
214
+
215
+ def fit(self, X, y=None):
216
+ """
217
+ Определяет колонки для масштабирования (все, кроме булевых)
218
+ и обучает scaler.
219
+ """
220
+
221
+ self.feature_names_in_ = X.columns.tolist()
222
+ self.n_features_in_ = len(self.feature_names_in_)
223
+
224
+ self.columns_to_scale_ = [
225
+ col for col in self.feature_names_in_
226
+ if col not in self.boolean_columns
227
+ ]
228
+
229
+ self.scaler.fit(X[self.columns_to_scale_])
230
+ return self
231
+
232
+ def transform(self, X, y=None):
233
+ """
234
+ Масштабирует только не-булевы колонки.
235
+ """
236
+ X_copy = X.copy()
237
+
238
+ X_copy[self.columns_to_scale_] = self.scaler.transform(X_copy[self.columns_to_scale_])
239
+
240
+ return X_copy
241
+
242
+ def fit_transform(self, X, y=None):
243
+ return self.fit(X, y).transform(X, y)
244
+
245
+ def get_feature_names_out(self, input_features=None):
246
+ """Для совместимости с sklearn"""
247
+ if input_features is not None:
248
+ return input_features
249
+ return self.feature_names_in_ if self.feature_names_in_ is not None else []
250
+
251
+ def set_params(self, **params):
252
+ """Для совместимости с GridSearchCV"""
253
+ if 'scaler_type' in params and params['scaler_type'] != self.scaler_type:
254
+ self.scaler_type = params['scaler_type']
255
+ self._create_scaler()
256
+ return super().set_params(**params)
257
+
258
+
259
+ def check_business_rules(age, monthly_income, monthly_debt, debt_ratio,
260
+ late_90, late_60_89, late_30_59, credit_lines,
261
+ real_estate, utilization, dependents):
262
+
263
+ # КРИТИЧЕСКИЕ ПРАВИЛА - сразу отказ
264
+ if age < 18:
265
+ return {
266
+ 'needs_manual': False,
267
+ 'message': 'Возраст менее 18 лет - кредит не выдаётся',
268
+ 'decision': 1 # отказ
269
+ }
270
+
271
+ # СПЕЦИАЛЬНЫЕ БАНКОВСКИЕ КОДЫ - сразу ручной разбор
272
+ if (late_90 == 98) or (late_60_89 == 98) or (late_30_59 == 98):
273
+ return {
274
+ 'needs_manual': True,
275
+ 'message': 'Код 98: Списание долга как безнадежного',
276
+ 'decision': None
277
+ }
278
+
279
+ if (late_90 == 96) or (late_60_89 == 96) or (late_30_59 == 96):
280
+ return {
281
+ 'needs_manual': True,
282
+ 'message': 'Код 96: Изъятие залога или реализация имущества',
283
+ 'decision': None
284
+ }
285
+
286
+ # КРИТИЧЕСКИЕ ПРАВИЛА - сразу ручной разбор
287
+ if age > 80:
288
+ return {
289
+ 'needs_manual': True,
290
+ 'message': 'Возраст > 80 лет - требуется ручной разбор (индивидуальные условия)',
291
+ 'decision': None
292
+ }
293
+
294
+ if monthly_income > 1000000:
295
+ return {
296
+ 'needs_manual': True,
297
+ 'message': 'Доход свыше 1,000,000 $ - требуется ручной разбор',
298
+ 'decision': None
299
+ }
300
+
301
+ if monthly_debt > 1000000:
302
+ return {
303
+ 'needs_manual': True,
304
+ 'message': 'Платежи свыше 1,000,000 $ - требуется ручной разбор',
305
+ 'decision': None
306
+ }
307
+
308
+ if utilization > 2:
309
+ return {
310
+ 'needs_manual': True,
311
+ 'message': 'Использование кредитных средств превышает 200%',
312
+ 'decision': None
313
+ }
314
+
315
+ if real_estate > 20:
316
+ return {
317
+ 'needs_manual': True,
318
+ 'message': 'Количество кредитов под залог недвижимости слишком велико - ручной разбор',
319
+ 'decision': None
320
+ }
321
+
322
+ # 4. ВСЕ ПРОВЕРКИ ПРОЙДЕНЫ - допуск к авторазбору моделью
323
+ return {
324
+ 'needs_manual': False,
325
+ 'decision': None,
326
+ }
327
+
328
+
329
+
app/utils/data_loader.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import joblib
3
+ import os
4
+
5
+
6
+ @st.cache_resource
7
+ def load_artifacts(models_path, preprocessor_path):
8
+ """Загрузка препроцессоров и моделей"""
9
+ preprocessor = joblib.load(os.path.join(preprocessor_path, 'preprocessor_150.pkl'))
10
+ scaler = joblib.load(os.path.join(preprocessor_path, 'scaler_150.pkl'))
11
+
12
+ models = {}
13
+ model_files = {
14
+ 'Logistic Regression': 'logreg_150_model.pkl',
15
+ 'XGBoost': 'xgb_150_model.pkl',
16
+ 'LightGBM': 'lgbm_150_model.pkl',
17
+ 'CatBoost': 'catboost_150_model.pkl',
18
+ 'Random Forest': 'rfc_150_model.pkl'
19
+ }
20
+
21
+ for name, filename in model_files.items():
22
+ path = os.path.join(models_path, filename)
23
+ if os.path.exists(path):
24
+ models[name] = joblib.load(path)
25
+
26
+ return preprocessor, scaler, models
catboost_info/catboost_training.json ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "meta":{"test_sets":[],"test_metrics":[],"learn_metrics":[{"best_value":"Min","name":"Logloss"}],"launch_mode":"Train","parameters":"","iteration_count":100,"learn_sets":["learn"],"name":"experiment"},
3
+ "iterations":[
4
+ {"learn":[0.6582255385],"iteration":0,"passed_time":0.1064302509,"remaining_time":10.53659484},
5
+ {"learn":[0.628929721],"iteration":1,"passed_time":0.1525804859,"remaining_time":7.476443808},
6
+ {"learn":[0.6055823656],"iteration":2,"passed_time":0.1890582018,"remaining_time":6.112881857},
7
+ {"learn":[0.585745295],"iteration":3,"passed_time":0.2252067115,"remaining_time":5.404961076},
8
+ {"learn":[0.5691497866],"iteration":4,"passed_time":0.2406276923,"remaining_time":4.571926155},
9
+ {"learn":[0.5553995801],"iteration":5,"passed_time":0.2564603788,"remaining_time":4.017879268},
10
+ {"learn":[0.5431466822],"iteration":6,"passed_time":0.2927427626,"remaining_time":3.889296704},
11
+ {"learn":[0.5321745596],"iteration":7,"passed_time":0.326945827,"remaining_time":3.75987701},
12
+ {"learn":[0.5230197248],"iteration":8,"passed_time":0.3622708005,"remaining_time":3.662960316},
13
+ {"learn":[0.5150673326],"iteration":9,"passed_time":0.3964141569,"remaining_time":3.567727412},
14
+ {"learn":[0.5085723427],"iteration":10,"passed_time":0.4321486694,"remaining_time":3.496475598},
15
+ {"learn":[0.5029521178],"iteration":11,"passed_time":0.4666833149,"remaining_time":3.422344309},
16
+ {"learn":[0.4982952699],"iteration":12,"passed_time":0.4887040018,"remaining_time":3.270557551},
17
+ {"learn":[0.4940193081],"iteration":13,"passed_time":0.5244568892,"remaining_time":3.221663748},
18
+ {"learn":[0.4903079864],"iteration":14,"passed_time":0.5588645355,"remaining_time":3.166899035},
19
+ {"learn":[0.4877126951],"iteration":15,"passed_time":0.5737366867,"remaining_time":3.012117605},
20
+ {"learn":[0.4849442288],"iteration":16,"passed_time":0.594951629,"remaining_time":2.904763836},
21
+ {"learn":[0.4823937275],"iteration":17,"passed_time":0.6301627283,"remaining_time":2.870741318},
22
+ {"learn":[0.4803765605],"iteration":18,"passed_time":0.6663060298,"remaining_time":2.840567811},
23
+ {"learn":[0.4781523185],"iteration":19,"passed_time":0.687587555,"remaining_time":2.75035022},
24
+ {"learn":[0.4767059201],"iteration":20,"passed_time":0.7219467433,"remaining_time":2.715894891},
25
+ {"learn":[0.475163465],"iteration":21,"passed_time":0.7562504736,"remaining_time":2.681251679},
26
+ {"learn":[0.4741219727],"iteration":22,"passed_time":0.777308792,"remaining_time":2.602294651},
27
+ {"learn":[0.473008657],"iteration":23,"passed_time":0.8123926421,"remaining_time":2.5725767},
28
+ {"learn":[0.4722618181],"iteration":24,"passed_time":0.8479381142,"remaining_time":2.543814343},
29
+ {"learn":[0.4714753648],"iteration":25,"passed_time":0.8643292969,"remaining_time":2.460014153},
30
+ {"learn":[0.470390758],"iteration":26,"passed_time":0.904391114,"remaining_time":2.445205605},
31
+ {"learn":[0.4696201438],"iteration":27,"passed_time":0.9384147212,"remaining_time":2.413066426},
32
+ {"learn":[0.46859249],"iteration":28,"passed_time":0.9729809498,"remaining_time":2.382125774},
33
+ {"learn":[0.4677104182],"iteration":29,"passed_time":1.007459887,"remaining_time":2.350739737},
34
+ {"learn":[0.4673133885],"iteration":30,"passed_time":1.019293767,"remaining_time":2.268750643},
35
+ {"learn":[0.4667841252],"iteration":31,"passed_time":1.031243396,"remaining_time":2.191392216},
36
+ {"learn":[0.4664536674],"iteration":32,"passed_time":1.066954367,"remaining_time":2.166240684},
37
+ {"learn":[0.4662622609],"iteration":33,"passed_time":1.076521387,"remaining_time":2.089717986},
38
+ {"learn":[0.466086662],"iteration":34,"passed_time":1.088162018,"remaining_time":2.020872318},
39
+ {"learn":[0.4657380808],"iteration":35,"passed_time":1.122871037,"remaining_time":1.996215177},
40
+ {"learn":[0.4651284039],"iteration":36,"passed_time":1.157755597,"remaining_time":1.971313584},
41
+ {"learn":[0.4646531445],"iteration":37,"passed_time":1.195417263,"remaining_time":1.95041764},
42
+ {"learn":[0.4641257326],"iteration":38,"passed_time":1.230054783,"remaining_time":1.92393184},
43
+ {"learn":[0.4637898175],"iteration":39,"passed_time":1.246455715,"remaining_time":1.869683573},
44
+ {"learn":[0.4633285186],"iteration":40,"passed_time":1.280777487,"remaining_time":1.843070043},
45
+ {"learn":[0.4628428368],"iteration":41,"passed_time":1.315556006,"remaining_time":1.816720199},
46
+ {"learn":[0.4626124403],"iteration":42,"passed_time":1.349455031,"remaining_time":1.788812483},
47
+ {"learn":[0.4623858706],"iteration":43,"passed_time":1.362016739,"remaining_time":1.73347585},
48
+ {"learn":[0.4621286714],"iteration":44,"passed_time":1.398965243,"remaining_time":1.709846409},
49
+ {"learn":[0.4617974001],"iteration":45,"passed_time":1.433543555,"remaining_time":1.682855478},
50
+ {"learn":[0.4614775166],"iteration":46,"passed_time":1.466964625,"remaining_time":1.654236705},
51
+ {"learn":[0.46142579],"iteration":47,"passed_time":1.479586041,"remaining_time":1.602884878},
52
+ {"learn":[0.4614011205],"iteration":48,"passed_time":1.486797243,"remaining_time":1.547482845},
53
+ {"learn":[0.4611845342],"iteration":49,"passed_time":1.521749302,"remaining_time":1.521749302},
54
+ {"learn":[0.4609852804],"iteration":50,"passed_time":1.557818146,"remaining_time":1.496727238},
55
+ {"learn":[0.4604321277],"iteration":51,"passed_time":1.596752012,"remaining_time":1.473924934},
56
+ {"learn":[0.4601645791],"iteration":52,"passed_time":1.646375224,"remaining_time":1.459993123},
57
+ {"learn":[0.459804458],"iteration":53,"passed_time":1.683746309,"remaining_time":1.434302411},
58
+ {"learn":[0.4592589475],"iteration":54,"passed_time":1.71827358,"remaining_time":1.405860201},
59
+ {"learn":[0.4589643366],"iteration":55,"passed_time":1.752749892,"remaining_time":1.377160629},
60
+ {"learn":[0.4585201818],"iteration":56,"passed_time":1.787086539,"remaining_time":1.348153003},
61
+ {"learn":[0.4582657803],"iteration":57,"passed_time":1.822507553,"remaining_time":1.319746849},
62
+ {"learn":[0.4580557799],"iteration":58,"passed_time":1.844325617,"remaining_time":1.281650005},
63
+ {"learn":[0.457864554],"iteration":59,"passed_time":1.877865186,"remaining_time":1.251910124},
64
+ {"learn":[0.4576167412],"iteration":60,"passed_time":1.915169063,"remaining_time":1.224452351},
65
+ {"learn":[0.4575874936],"iteration":61,"passed_time":1.924591958,"remaining_time":1.17958862},
66
+ {"learn":[0.457362279],"iteration":62,"passed_time":1.960633385,"remaining_time":1.151483099},
67
+ {"learn":[0.4572946663],"iteration":63,"passed_time":1.972484848,"remaining_time":1.109522727},
68
+ {"learn":[0.4569830294],"iteration":64,"passed_time":2.007666031,"remaining_time":1.08105094},
69
+ {"learn":[0.456610445],"iteration":65,"passed_time":2.042187927,"remaining_time":1.052036205},
70
+ {"learn":[0.4560918865],"iteration":66,"passed_time":2.076725989,"remaining_time":1.022865039},
71
+ {"learn":[0.4558479503],"iteration":67,"passed_time":2.110631805,"remaining_time":0.9932384965},
72
+ {"learn":[0.455740418],"iteration":68,"passed_time":2.124730753,"remaining_time":0.9545891789},
73
+ {"learn":[0.455501269],"iteration":69,"passed_time":2.159214566,"remaining_time":0.9253776709},
74
+ {"learn":[0.4554787935],"iteration":70,"passed_time":2.168732127,"remaining_time":0.8858201647},
75
+ {"learn":[0.4552744806],"iteration":71,"passed_time":2.205001553,"remaining_time":0.8575006039},
76
+ {"learn":[0.455234248],"iteration":72,"passed_time":2.220812656,"remaining_time":0.8213964619},
77
+ {"learn":[0.455137986],"iteration":73,"passed_time":2.255120136,"remaining_time":0.7923395074},
78
+ {"learn":[0.4549484305],"iteration":74,"passed_time":2.28992153,"remaining_time":0.7633071767},
79
+ {"learn":[0.4548062199],"iteration":75,"passed_time":2.324904798,"remaining_time":0.7341804624},
80
+ {"learn":[0.4546474797],"iteration":76,"passed_time":2.360039856,"remaining_time":0.7049469699},
81
+ {"learn":[0.4545581835],"iteration":77,"passed_time":2.372090859,"remaining_time":0.6690512679},
82
+ {"learn":[0.4544265313],"iteration":78,"passed_time":2.40573901,"remaining_time":0.6395002433},
83
+ {"learn":[0.4544030978],"iteration":79,"passed_time":2.413539667,"remaining_time":0.6033849167},
84
+ {"learn":[0.4543650724],"iteration":80,"passed_time":2.422935896,"remaining_time":0.568342988},
85
+ {"learn":[0.4542698101],"iteration":81,"passed_time":2.455982135,"remaining_time":0.5391180296},
86
+ {"learn":[0.4540294101],"iteration":82,"passed_time":2.490015159,"remaining_time":0.5100031048},
87
+ {"learn":[0.4539463005],"iteration":83,"passed_time":2.501245001,"remaining_time":0.4764276192},
88
+ {"learn":[0.4537784829],"iteration":84,"passed_time":2.53640835,"remaining_time":0.4476014736},
89
+ {"learn":[0.4536943889],"iteration":85,"passed_time":2.572798317,"remaining_time":0.4188276329},
90
+ {"learn":[0.4536386999],"iteration":86,"passed_time":2.608432288,"remaining_time":0.3897657442},
91
+ {"learn":[0.4533342039],"iteration":87,"passed_time":2.644785463,"remaining_time":0.3606525632},
92
+ {"learn":[0.4531946585],"iteration":88,"passed_time":2.679509774,"remaining_time":0.3311753653},
93
+ {"learn":[0.4529846134],"iteration":89,"passed_time":2.713719672,"remaining_time":0.301524408},
94
+ {"learn":[0.4529583581],"iteration":90,"passed_time":2.728664447,"remaining_time":0.2698679124},
95
+ {"learn":[0.4528171854],"iteration":91,"passed_time":2.765771076,"remaining_time":0.2405018327},
96
+ {"learn":[0.4526575987],"iteration":92,"passed_time":2.800900134,"remaining_time":0.2108204402},
97
+ {"learn":[0.4526170824],"iteration":93,"passed_time":2.822734239,"remaining_time":0.1801745259},
98
+ {"learn":[0.4525149982],"iteration":94,"passed_time":2.857135885,"remaining_time":0.1503755729},
99
+ {"learn":[0.4524663385],"iteration":95,"passed_time":2.892015112,"remaining_time":0.1205006297},
100
+ {"learn":[0.4524315166],"iteration":96,"passed_time":2.908353878,"remaining_time":0.08994908901},
101
+ {"learn":[0.4523241677],"iteration":97,"passed_time":2.943535644,"remaining_time":0.060072156},
102
+ {"learn":[0.4523010903],"iteration":98,"passed_time":2.964563379,"remaining_time":0.02994508464},
103
+ {"learn":[0.4522791181],"iteration":99,"passed_time":2.980352983,"remaining_time":0}
104
+ ]}
catboost_info/learn/events.out.tfevents ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1fe5092567732e395a3420a1205c4190f0e63d50edc678509bd4104fc34a503
3
+ size 5398
catboost_info/learn_error.tsv ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ iter Logloss
2
+ 0 0.6582255385
3
+ 1 0.628929721
4
+ 2 0.6055823656
5
+ 3 0.585745295
6
+ 4 0.5691497866
7
+ 5 0.5553995801
8
+ 6 0.5431466822
9
+ 7 0.5321745596
10
+ 8 0.5230197248
11
+ 9 0.5150673326
12
+ 10 0.5085723427
13
+ 11 0.5029521178
14
+ 12 0.4982952699
15
+ 13 0.4940193081
16
+ 14 0.4903079864
17
+ 15 0.4877126951
18
+ 16 0.4849442288
19
+ 17 0.4823937275
20
+ 18 0.4803765605
21
+ 19 0.4781523185
22
+ 20 0.4767059201
23
+ 21 0.475163465
24
+ 22 0.4741219727
25
+ 23 0.473008657
26
+ 24 0.4722618181
27
+ 25 0.4714753648
28
+ 26 0.470390758
29
+ 27 0.4696201438
30
+ 28 0.46859249
31
+ 29 0.4677104182
32
+ 30 0.4673133885
33
+ 31 0.4667841252
34
+ 32 0.4664536674
35
+ 33 0.4662622609
36
+ 34 0.466086662
37
+ 35 0.4657380808
38
+ 36 0.4651284039
39
+ 37 0.4646531445
40
+ 38 0.4641257326
41
+ 39 0.4637898175
42
+ 40 0.4633285186
43
+ 41 0.4628428368
44
+ 42 0.4626124403
45
+ 43 0.4623858706
46
+ 44 0.4621286714
47
+ 45 0.4617974001
48
+ 46 0.4614775166
49
+ 47 0.46142579
50
+ 48 0.4614011205
51
+ 49 0.4611845342
52
+ 50 0.4609852804
53
+ 51 0.4604321277
54
+ 52 0.4601645791
55
+ 53 0.459804458
56
+ 54 0.4592589475
57
+ 55 0.4589643366
58
+ 56 0.4585201818
59
+ 57 0.4582657803
60
+ 58 0.4580557799
61
+ 59 0.457864554
62
+ 60 0.4576167412
63
+ 61 0.4575874936
64
+ 62 0.457362279
65
+ 63 0.4572946663
66
+ 64 0.4569830294
67
+ 65 0.456610445
68
+ 66 0.4560918865
69
+ 67 0.4558479503
70
+ 68 0.455740418
71
+ 69 0.455501269
72
+ 70 0.4554787935
73
+ 71 0.4552744806
74
+ 72 0.455234248
75
+ 73 0.455137986
76
+ 74 0.4549484305
77
+ 75 0.4548062199
78
+ 76 0.4546474797
79
+ 77 0.4545581835
80
+ 78 0.4544265313
81
+ 79 0.4544030978
82
+ 80 0.4543650724
83
+ 81 0.4542698101
84
+ 82 0.4540294101
85
+ 83 0.4539463005
86
+ 84 0.4537784829
87
+ 85 0.4536943889
88
+ 86 0.4536386999
89
+ 87 0.4533342039
90
+ 88 0.4531946585
91
+ 89 0.4529846134
92
+ 90 0.4529583581
93
+ 91 0.4528171854
94
+ 92 0.4526575987
95
+ 93 0.4526170824
96
+ 94 0.4525149982
97
+ 95 0.4524663385
98
+ 96 0.4524315166
99
+ 97 0.4523241677
100
+ 98 0.4523010903
101
+ 99 0.4522791181
catboost_info/time_left.tsv ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ iter Passed Remaining
2
+ 0 106 10536
3
+ 1 152 7476
4
+ 2 189 6112
5
+ 3 225 5404
6
+ 4 240 4571
7
+ 5 256 4017
8
+ 6 292 3889
9
+ 7 326 3759
10
+ 8 362 3662
11
+ 9 396 3567
12
+ 10 432 3496
13
+ 11 466 3422
14
+ 12 488 3270
15
+ 13 524 3221
16
+ 14 558 3166
17
+ 15 573 3012
18
+ 16 594 2904
19
+ 17 630 2870
20
+ 18 666 2840
21
+ 19 687 2750
22
+ 20 721 2715
23
+ 21 756 2681
24
+ 22 777 2602
25
+ 23 812 2572
26
+ 24 847 2543
27
+ 25 864 2460
28
+ 26 904 2445
29
+ 27 938 2413
30
+ 28 972 2382
31
+ 29 1007 2350
32
+ 30 1019 2268
33
+ 31 1031 2191
34
+ 32 1066 2166
35
+ 33 1076 2089
36
+ 34 1088 2020
37
+ 35 1122 1996
38
+ 36 1157 1971
39
+ 37 1195 1950
40
+ 38 1230 1923
41
+ 39 1246 1869
42
+ 40 1280 1843
43
+ 41 1315 1816
44
+ 42 1349 1788
45
+ 43 1362 1733
46
+ 44 1398 1709
47
+ 45 1433 1682
48
+ 46 1466 1654
49
+ 47 1479 1602
50
+ 48 1486 1547
51
+ 49 1521 1521
52
+ 50 1557 1496
53
+ 51 1596 1473
54
+ 52 1646 1459
55
+ 53 1683 1434
56
+ 54 1718 1405
57
+ 55 1752 1377
58
+ 56 1787 1348
59
+ 57 1822 1319
60
+ 58 1844 1281
61
+ 59 1877 1251
62
+ 60 1915 1224
63
+ 61 1924 1179
64
+ 62 1960 1151
65
+ 63 1972 1109
66
+ 64 2007 1081
67
+ 65 2042 1052
68
+ 66 2076 1022
69
+ 67 2110 993
70
+ 68 2124 954
71
+ 69 2159 925
72
+ 70 2168 885
73
+ 71 2205 857
74
+ 72 2220 821
75
+ 73 2255 792
76
+ 74 2289 763
77
+ 75 2324 734
78
+ 76 2360 704
79
+ 77 2372 669
80
+ 78 2405 639
81
+ 79 2413 603
82
+ 80 2422 568
83
+ 81 2455 539
84
+ 82 2490 510
85
+ 83 2501 476
86
+ 84 2536 447
87
+ 85 2572 418
88
+ 86 2608 389
89
+ 87 2644 360
90
+ 88 2679 331
91
+ 89 2713 301
92
+ 90 2728 269
93
+ 91 2765 240
94
+ 92 2800 210
95
+ 93 2822 180
96
+ 94 2857 150
97
+ 95 2892 120
98
+ 96 2908 89
99
+ 97 2943 60
100
+ 98 2964 29
101
+ 99 2980 0
catboost_info/tmp/cat_feature_index.17f2f383-11eeddeb-4a42fe68-9d5bbcbb.tmp ADDED
Binary file (4 Bytes). View file
 
datasets/.DS_Store ADDED
Binary file (8.2 kB). View file