Artem Zmailov commited on
Commit
af457d2
·
1 Parent(s): a81fc0a

Final cleanup: all heavy files moved to LFS

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .DS_Store +0 -0
  2. .gitattributes +1 -0
  3. app/.DS_Store +0 -0
  4. app/.streamlit/config.toml +2 -0
  5. app/__init__.py +0 -0
  6. app/__pycache__/__init__.cpython-311.pyc +0 -0
  7. app/main.py +33 -0
  8. app/models/__pycache__/escalation.cpython-311.pyc +0 -0
  9. app/models/__pycache__/interpretation.cpython-311.pyc +0 -0
  10. app/models/escalation.py +267 -0
  11. app/models/interpretation.py +194 -0
  12. app/pages/__pycache__/application.cpython-311.pyc +0 -0
  13. app/pages/__pycache__/simulation.cpython-311.pyc +0 -0
  14. app/pages/application.py +329 -0
  15. app/pages/simulation.py +345 -0
  16. app/simulation/.DS_Store +0 -0
  17. app/simulation/__init__.py +0 -0
  18. app/simulation/__pycache__/__init__.cpython-311.pyc +0 -0
  19. app/simulation/controllers/__init__.py +0 -0
  20. app/simulation/controllers/__pycache__/__init__.cpython-311.pyc +0 -0
  21. app/simulation/controllers/__pycache__/base.cpython-311.pyc +0 -0
  22. app/simulation/controllers/__pycache__/pid.cpython-311.pyc +0 -0
  23. app/simulation/controllers/base.py +28 -0
  24. app/simulation/controllers/pid.py +129 -0
  25. app/simulation/core/__init__.py +0 -0
  26. app/simulation/core/__pycache__/__init__.cpython-311.pyc +0 -0
  27. app/simulation/core/__pycache__/processor.cpython-311.pyc +0 -0
  28. app/simulation/core/__pycache__/traffic_generator.cpython-311.pyc +0 -0
  29. app/simulation/core/processor.py +339 -0
  30. app/simulation/core/traffic_generator.py +234 -0
  31. app/simulation/visualization/__init__.py +0 -0
  32. app/simulation/visualization/__pycache__/__init__.cpython-311.pyc +0 -0
  33. app/simulation/visualization/__pycache__/animation.cpython-311.pyc +0 -0
  34. app/simulation/visualization/__pycache__/plots.cpython-311.pyc +0 -0
  35. app/simulation/visualization/animation.py +246 -0
  36. app/simulation/visualization/plots.py +374 -0
  37. app/simulation/visualization/simulation_20:11.gif +0 -0
  38. app/simulation/visualization/simulation_20:19.gif +0 -0
  39. app/simulation/visualization/simulation_20:25.gif +0 -0
  40. app/simulation/visualization/simulation_20:30.gif +0 -0
  41. app/utils/__pycache__/credit_preprocessor.cpython-311.pyc +0 -0
  42. app/utils/__pycache__/data_loader.cpython-311.pyc +0 -0
  43. app/utils/credit_preprocessor.py +329 -0
  44. app/utils/data_loader.py +26 -0
  45. catboost_info/catboost_training.json +104 -0
  46. catboost_info/learn/events.out.tfevents +3 -0
  47. catboost_info/learn_error.tsv +101 -0
  48. catboost_info/time_left.tsv +101 -0
  49. catboost_info/tmp/cat_feature_index.17f2f383-11eeddeb-4a42fe68-9d5bbcbb.tmp +0 -0
  50. datasets/.DS_Store +0 -0
.DS_Store ADDED
Binary file (12.3 kB). View file
 
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.csv filter=lfs diff=lfs merge=lfs -text
app/.DS_Store ADDED
Binary file (10.2 kB). View file
 
app/.streamlit/config.toml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ [client]
2
+ showSidebarNavigation = false
app/__init__.py ADDED
File without changes
app/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (169 Bytes). View file
 
app/main.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import sys
4
+
5
+ sys.path.append(os.path.dirname(os.path.dirname(__file__)))
6
+
7
+ st.set_page_config(
8
+ page_title="GiveMeSomeCredit",
9
+ page_icon="🏦",
10
+ layout="wide",
11
+ initial_sidebar_state="collapsed" # ← сворачивает сайдбар по умолчанию
12
+ )
13
+
14
+
15
+ st.title("🏦 GiveMeSomeCredit - Кредитный скоринг")
16
+ st.markdown("---")
17
+
18
+ col1, col2 = st.columns(2)
19
+
20
+ with col1:
21
+ st.subheader("📝 Анкета")
22
+ if st.button("Перейти к анкете"):
23
+ st.switch_page("pages/application.py") # ← вызовет main()
24
+
25
+ with col2:
26
+ st.subheader("📊 Симуляция")
27
+ if st.button("Перейти к симуляции"):
28
+ st.switch_page("pages/simulation.py") # ← вызовет main()
29
+
30
+ st.markdown("---")
31
+
32
+ # streamlit run app/main.py
33
+
app/models/__pycache__/escalation.cpython-311.pyc ADDED
Binary file (10.9 kB). View file
 
app/models/__pycache__/interpretation.cpython-311.pyc ADDED
Binary file (14.9 kB). View file
 
app/models/escalation.py ADDED
@@ -0,0 +1,267 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+
4
+ def check_business_rules(df):
5
+ """
6
+ Батчевая проверка бизнес-правил
7
+
8
+ Возвращает:
9
+ - manual_mask: булев массив (True = в ручной разбор)
10
+ - auto_reject_mask: булев массив (True = сразу отказ)
11
+ - messages: массив сообщений
12
+ - auto_decisions: массив решений для auto_reject_mask (всегда 1 - отказ)
13
+ """
14
+ n = len(df)
15
+ manual_mask = np.zeros(n, dtype=bool)
16
+ auto_reject_mask = np.zeros(n, dtype=bool)
17
+ messages = [''] * n
18
+ auto_decisions = np.zeros(n, dtype=int)
19
+
20
+ # Извлекаем колонки
21
+ age = df['age'].fillna(0).values
22
+ monthly_income = df['MonthlyIncome'].fillna(0).values
23
+ debt_ratio = df['DebtRatio'].fillna(0).values
24
+ monthly_debt = np.where(monthly_income > 0,
25
+ debt_ratio * monthly_income,
26
+ debt_ratio)
27
+
28
+ late_90 = df['NumberOfTimes90DaysLate'].fillna(0).values
29
+ late_60_89 = df['NumberOfTime60-89DaysPastDueNotWorse'].fillna(0).values
30
+ late_30_59 = df['NumberOfTime30-59DaysPastDueNotWorse'].fillna(0).values
31
+
32
+ real_estate = df['NumberRealEstateLoansOrLines'].fillna(0).values
33
+ utilization = df['RevolvingUtilizationOfUnsecuredLines'].fillna(0).values
34
+
35
+ # 1. КРИТИЧЕСКИЕ ПРАВИЛА - сразу отказ
36
+ mask = (age < 18)
37
+ auto_reject_mask[mask] = True
38
+ auto_decisions[mask] = 1
39
+ messages = np.where(mask, 'Возраст менее 18 лет - кредит не выдаётся', messages)
40
+
41
+ # 2. СПЕЦИАЛЬНЫЕ БАНКОВСКИЕ КОДЫ - сразу ручной разбор
42
+ mask = (late_90 == 98) | (late_60_89 == 98) | (late_30_59 == 98)
43
+ manual_mask[mask] = True
44
+ messages = np.where(mask, 'Код 98: Списание долга как безнадежного', messages)
45
+
46
+ mask = (late_90 == 96) | (late_60_89 == 96) | (late_30_59 == 96)
47
+ manual_mask[mask] = True
48
+ messages = np.where(mask, 'Код 96: Изъятие залога или реализация имущества', messages)
49
+
50
+ # 3. КРИТИЧЕСКИЕ ПРАВИЛА - сразу ручной разбор
51
+ mask = (age > 80)
52
+ manual_mask[mask] = True
53
+ messages = np.where(mask, 'Возраст > 80 лет - требуется ручной разбор (индивидуальные условия)', messages)
54
+
55
+ mask = (monthly_income > 1000000)
56
+ manual_mask[mask] = True
57
+ messages = np.where(mask, 'Доход свыше 1,000,000 $ - требуется ручной разбор', messages)
58
+
59
+ mask = (monthly_debt > 1000000)
60
+ manual_mask[mask] = True
61
+ messages = np.where(mask, 'Платежи свыше 1,000,000 $ - требуется ручной разбор', messages)
62
+
63
+ mask = (utilization > 2)
64
+ manual_mask[mask] = True
65
+ messages = np.where(mask, 'Использование кредитных средств превышает 200%', messages)
66
+
67
+ mask = (real_estate > 20)
68
+ manual_mask[mask] = True
69
+ messages = np.where(mask, 'Количество кредитов под залог недвижимости слишком велико - ручной разбор', messages)
70
+
71
+ # print(f"age: min={age.min()}, max={age.max()}")
72
+ # print(f"income: max={monthly_income.max()}")
73
+ # print(f"late_90: values 96/98: {np.sum((late_90 == 96) | (late_90 == 98))}")
74
+ # print(f"utilization: max={utilization.max()}")
75
+ # print(f"real_estate: max={real_estate.max()}")
76
+
77
+ return manual_mask, auto_reject_mask, messages, auto_decisions
78
+
79
+
80
+ def escalation_decision(applications_df, lr_model, second_model, second_model_name,
81
+ threshold=0.5, lr_margins=[0.35], second_margins=[0.4],
82
+ preprocessor=None, scaler=None):
83
+ """
84
+ Универсальная эскалационная логика
85
+
86
+ 1. Бизнес-правила:
87
+ - часть заявок сразу в ручной разбор
88
+ - часть заявок сразу отказ
89
+ 2. Оставшиеся -> LR
90
+ 3. Если LR неуверена -> вторая модель
91
+ """
92
+ n = len(applications_df)
93
+ decisions = [None] * n
94
+ manual_mask = np.zeros(n, dtype=bool)
95
+
96
+ # СЧЁТЧИКИ
97
+ stats = {
98
+ 'business_manual': 0, # ручной разбор по бизнес-правилам
99
+ 'business_auto': 0, # авто отказ по бизнес-правилам
100
+ 'lr_confident': 0, # уверенно решены LR
101
+ 'second_confident': 0, # уверенно решены второй моделью
102
+ 'second_uncertain': 0, # неуверенность второй модели → ручной
103
+ 'total': n
104
+ }
105
+
106
+ # 1. Бизнес-правила
107
+ bus_manual_mask, bus_reject_mask, bus_messages, bus_decisions = check_business_rules(applications_df)
108
+ # После check_business_rules
109
+ #print(f"Бизнес-правила: manual={bus_manual_mask.sum()}, auto_reject={bus_reject_mask.sum()}")
110
+ # Обрабатываем сразу отказ
111
+ for i in range(n):
112
+ if bus_reject_mask[i]:
113
+ stats['business_auto'] += 1
114
+ decisions[i] = {
115
+ 'final_decision': 1,
116
+ 'model_used': 'Business Rules',
117
+ 'needs_review': False,
118
+ 'probability': 1.0,
119
+ 'message': bus_messages[i],
120
+ 'lr_proba': None,
121
+ 'second_proba': None,
122
+ 'decision_path': [f"❌ Бизнес-правила: {bus_messages[i]}"]
123
+ }
124
+
125
+ # Обрабатываем сразу ручной разбор
126
+ for i in range(n):
127
+ if bus_manual_mask[i]:
128
+ stats['business_manual'] += 1
129
+ manual_mask[i] = True
130
+ decisions[i] = {
131
+ 'final_decision': None,
132
+ 'model_used': 'Business Rules',
133
+ 'needs_review': True,
134
+ 'probability': None,
135
+ 'message': bus_messages[i],
136
+ 'lr_proba': None,
137
+ 'second_proba': None,
138
+ 'decision_path': [f"⚠️ Бизнес-правила: {bus_messages[i]}"]
139
+ }
140
+
141
+ # 2. Заявки, которые идут к моделям (не отсеялись бизнес-правилами)
142
+ model_indices = [i for i in range(n) if decisions[i] is None]
143
+
144
+ if not model_indices:
145
+ return decisions, manual_mask, stats
146
+
147
+ # 3. Обработка моделями
148
+ df_models = applications_df.iloc[model_indices]
149
+
150
+ # Препроцессинг
151
+ processed = preprocessor.transform(df_models)
152
+ processed_scaled = scaler.transform(processed)
153
+
154
+ # LR предсказания (батч)
155
+ lr_probas = lr_model.predict_proba(processed_scaled)[:, 1]
156
+
157
+ # Определяем отступы для LR
158
+ if len(lr_margins) == 1:
159
+ lr_low = lr_high = lr_margins[0]
160
+ else:
161
+ lr_low, lr_high = lr_margins[0], lr_margins[1]
162
+
163
+ # Проверяем уверенность LR
164
+ lr_confident = np.zeros(len(model_indices), dtype=bool)
165
+ lr_margin_values = np.zeros(len(model_indices))
166
+
167
+ for j, proba in enumerate(lr_probas):
168
+ if proba < threshold:
169
+ margin = threshold - proba
170
+ lr_confident[j] = margin >= lr_low
171
+ else:
172
+ margin = proba - threshold
173
+ lr_confident[j] = margin >= lr_high
174
+ lr_margin_values[j] = margin
175
+
176
+ # Обрабатываем уверенные LR
177
+ for j, idx in enumerate(model_indices):
178
+ if lr_confident[j]:
179
+ stats['lr_confident'] += 1
180
+ decisions[idx] = {
181
+ 'final_decision': int(lr_probas[j] >= threshold),
182
+ 'probability': lr_probas[j],
183
+ 'model_used': 'Logistic Regression',
184
+ 'needs_review': False,
185
+ 'lr_proba': lr_probas[j],
186
+ 'second_proba': None,
187
+ 'lr_margin': lr_margin_values[j],
188
+ 'lr_confident': True,
189
+ 'second_used': False,
190
+ 'decision_path': [
191
+ f"1️⃣ Logistic Regression: {lr_probas[j]:.1%} (отступ: {lr_margin_values[j]:.1%})",
192
+ f" ✅ LR уверена - финальное решение"
193
+ ]
194
+ }
195
+
196
+ # Неуверенные LR - идут ко второй модели
197
+ uncertain_indices = [model_indices[j] for j in range(len(model_indices)) if not lr_confident[j]]
198
+
199
+ if uncertain_indices:
200
+ # Находим позиции неуверенных заявок
201
+ uncertain_positions = [j for j in range(len(model_indices)) if not lr_confident[j]]
202
+ processed_uncertain_scaled = processed_scaled.iloc[uncertain_positions]
203
+
204
+ # Вторая модель (батч)
205
+ second_probas = second_model.predict_proba(processed_uncertain_scaled)[:, 1]
206
+
207
+ # Определяем отступы для второй модели
208
+ if len(second_margins) == 1:
209
+ second_low = second_high = second_margins[0]
210
+ else:
211
+ second_low, second_high = second_margins[0], second_margins[1]
212
+
213
+ # Проверяем уверенность второй модели
214
+ for k, idx in enumerate(uncertain_indices):
215
+ proba = second_probas[k]
216
+ if proba < threshold:
217
+ second_margin = threshold - proba
218
+ second_confident = second_margin >= second_low
219
+ else:
220
+ second_margin = proba - threshold
221
+ second_confident = second_margin >= second_high
222
+
223
+ # Формируем decision_path
224
+ path = [
225
+ f"1️⃣ Logistic Regression: {lr_probas[uncertain_positions[k]]:.1%} (отступ: {lr_margin_values[uncertain_positions[k]]:.1%})",
226
+ f" ⚠️ LR не уверена → вызываем {second_model_name}",
227
+ f"2️⃣ {second_model_name}: {proba:.1%} (отступ: {second_margin:.1%})"
228
+ ]
229
+
230
+ if second_confident:
231
+ stats['second_confident'] += 1
232
+ path.append(f" ✅ {second_model_name} уверен - финальное решение")
233
+ decisions[idx] = {
234
+ 'final_decision': int(proba >= threshold),
235
+ 'probability': proba,
236
+ 'model_used': second_model_name,
237
+ 'needs_review': False,
238
+ 'lr_proba': lr_probas[uncertain_positions[k]],
239
+ 'second_proba': proba,
240
+ 'lr_margin': lr_margin_values[uncertain_positions[k]],
241
+ 'second_margin': second_margin,
242
+ 'lr_confident': False,
243
+ 'second_confident': True,
244
+ 'second_used': True,
245
+ 'decision_path': path
246
+ }
247
+ else:
248
+ stats['second_uncertain'] += 1
249
+ path.append(f" ⚠️ {second_model_name} не уверен → ручной разбор")
250
+ manual_mask[idx] = True
251
+ decisions[idx] = {
252
+ 'final_decision': None,
253
+ 'probability': proba,
254
+ 'model_used': 'Manual Review',
255
+ 'needs_review': True,
256
+ 'lr_proba': lr_probas[uncertain_positions[k]],
257
+ 'second_proba': proba,
258
+ 'lr_margin': lr_margin_values[uncertain_positions[k]],
259
+ 'second_margin': second_margin,
260
+ 'lr_confident': False,
261
+ 'second_confident': False,
262
+ 'second_used': True,
263
+ 'message': 'Модели не уверены в решении',
264
+ 'decision_path': path
265
+ }
266
+
267
+ return decisions, manual_mask, stats
app/models/interpretation.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+
6
+ FEATURE_DESCRIPTIONS = { ... }
7
+
8
+ def get_feature_display_name(feature_name):
9
+ if feature_name in FEATURE_DESCRIPTIONS:
10
+ return FEATURE_DESCRIPTIONS[feature_name]
11
+ name = feature_name.replace('_', ' ').title()
12
+ name = name.replace('Over', '>')
13
+ name = name.replace('Loans', 'Кредитов')
14
+ return name
15
+
16
+
17
+ def interpret_lr(features, lr_model, feature_names):
18
+ """Интерпретация логистической регрессии"""
19
+ if isinstance(features, np.ndarray):
20
+ features = pd.DataFrame(features, columns=feature_names)
21
+ coefficients = lr_model.coef_[0]
22
+ intercept = lr_model.intercept_[0]
23
+
24
+ importance_df = pd.DataFrame({
25
+ 'feature': feature_names,
26
+ 'coefficient': coefficients,
27
+ 'value': features.iloc[0].values
28
+ })
29
+ importance_df['logit_contribution'] = importance_df['coefficient'] * importance_df['value']
30
+ importance_df['abs_logit'] = abs(importance_df['logit_contribution'])
31
+ importance_df = importance_df.sort_values('abs_logit', ascending=False)
32
+
33
+ base_proba = lr_model.predict_proba(features)[0, 1]
34
+ marginal_effects = []
35
+ features_array = features.values
36
+
37
+ for i, feature in enumerate(feature_names):
38
+ features_zero = features_array.copy()
39
+ features_zero[0, i] = 0
40
+ zero_proba = lr_model.predict_proba(features_zero)[0, 1]
41
+ marginal_effect = base_proba - zero_proba
42
+ marginal_effects.append({
43
+ 'feature': feature,
44
+ 'marginal_effect': marginal_effect,
45
+ 'abs_marginal': abs(marginal_effect)
46
+ })
47
+
48
+ marginal_df = pd.DataFrame(marginal_effects).sort_values('abs_marginal', ascending=False)
49
+
50
+ logit = intercept + importance_df['logit_contribution'].sum()
51
+ proba = 1 / (1 + np.exp(-logit))
52
+
53
+ return {
54
+ 'logit_contributions': importance_df,
55
+ 'marginal_effects': marginal_df,
56
+ 'probability': proba,
57
+ 'logit': logit,
58
+ 'intercept': intercept
59
+ }
60
+
61
+ def plot_feature_importance_sns(importance_df, value_col='logit_contribution', title="Вклад признаков в логит"):
62
+ df = importance_df.head(10).copy()
63
+ df = df.sort_values(value_col, ascending=True)
64
+
65
+ fig, ax = plt.subplots(figsize=(10, 6), facecolor='#f8f9fa')
66
+ ax.set_facecolor('#f8f9fa')
67
+
68
+ colors = ['#d7191c' if x > 0 else '#1a9641' if x < 0 else '#ffffbf' for x in df[value_col]]
69
+ bars = ax.barh(df['feature'], df[value_col], color=colors, edgecolor='white', linewidth=1.5, alpha=0.9)
70
+
71
+ for bar, val in zip(bars, df[value_col]):
72
+ if abs(val) > 0.02:
73
+ x_pos = val - 0.02 if val > 0 else val + 0.02
74
+ ha = 'right' if val > 0 else 'left'
75
+ ax.text(x_pos, bar.get_y() + bar.get_height() / 2, f'{val:.3f}', ha=ha, va='center', fontsize=9)
76
+
77
+ ax.axvline(x=0, color='#495057', linestyle='-', linewidth=1, alpha=0.3)
78
+ ax.grid(axis='x', alpha=0.15, linestyle='--', color='#adb5bd')
79
+ ax.set_axisbelow(True)
80
+ ax.set_xlabel('Вклад в логит', fontsize=11)
81
+ ax.set_ylabel('')
82
+ ax.set_title(title, fontsize=12, fontweight='bold', pad=15)
83
+ ax.set_yticklabels([get_feature_display_name(x) for x in df['feature']], fontsize=10)
84
+ ax.set_yticklabels([get_feature_display_name(x) for x in df['feature']], fontsize=10)
85
+ sns.despine(top=True, right=True, left=False, bottom=False)
86
+ plt.tight_layout()
87
+ return fig
88
+
89
+ def plot_marginal_effects_sns(marginal_df, title="Влияние на вероятность дефолта"):
90
+ df = marginal_df.head(10).copy()
91
+ df = df.sort_values('marginal_effect', ascending=True)
92
+
93
+ fig, ax = plt.subplots(figsize=(10, 6), facecolor='#f8f9fa')
94
+ ax.set_facecolor('#f8f9fa')
95
+
96
+ colors = ['#d7191c' if x > 0 else '#1a9641' if x < 0 else '#ffffbf' for x in df['marginal_effect']]
97
+ bars = ax.barh(df['feature'], df['marginal_effect'], color=colors, edgecolor='white', linewidth=1.5, alpha=0.9)
98
+
99
+ for bar, val in zip(bars, df['marginal_effect']):
100
+ if abs(val) > 0.01:
101
+ x_pos = val - 0.01 if val > 0 else val + 0.01
102
+ ha = 'right' if val > 0 else 'left'
103
+ ax.text(x_pos, bar.get_y() + bar.get_height() / 2, f'{val:.1%}', ha=ha, va='center', fontsize=9)
104
+
105
+ ax.axvline(x=0, color='#495057', linestyle='-', linewidth=1, alpha=0.3)
106
+ ax.grid(axis='x', alpha=0.15, linestyle='--', color='#adb5bd')
107
+ ax.set_axisbelow(True)
108
+ ax.set_xlabel('Изменение вероятности', fontsize=11)
109
+ ax.set_ylabel('')
110
+ ax.set_title(title, fontsize=12, fontweight='bold', pad=15)
111
+ ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'{x:.0%}'))
112
+ ax.set_yticklabels([get_feature_display_name(x) for x in df['feature']], fontsize=10)
113
+ sns.despine(top=True, right=True, left=False, bottom=False)
114
+ plt.tight_layout()
115
+ return fig
116
+
117
+
118
+ def plot_shap_analysis(second_model, processed_scaled, feature_names, second_model_name):
119
+ """Отображение SHAP анализа для tree-based моделей"""
120
+ import streamlit as st
121
+ st.markdown("---")
122
+ st.subheader(f"⚡ Детальный анализ: {second_model_name} (SHAP)")
123
+
124
+ with st.spinner("🔄 Рассчитываем SHAP значения..."):
125
+ try:
126
+ import shap
127
+
128
+ # Создаем explainer и считаем SHAP
129
+ explainer = shap.TreeExplainer(second_model)
130
+ shap_values = explainer.shap_values(processed_scaled)
131
+
132
+ # Для бинарной классификации
133
+ if isinstance(shap_values, list):
134
+ shap_values = shap_values[1]
135
+
136
+ # 1. Waterfall plot
137
+ fig, ax = plt.subplots(figsize=(12, 7))
138
+ shap.waterfall_plot(
139
+ shap.Explanation(
140
+ values=shap_values[0],
141
+ base_values=explainer.expected_value,
142
+ data=processed_scaled.iloc[0].values,
143
+ feature_names=feature_names
144
+ ),
145
+ show=False,
146
+ )
147
+ plt.tight_layout()
148
+ st.pyplot(fig)
149
+
150
+ # 2. Объяснение как читать график
151
+ with st.expander("📋 Как читать SHAP график?"):
152
+ st.markdown("""
153
+ - **f(x)** = итоговое предсказание модели
154
+ - **base value** = среднее предсказание по всем клиентам
155
+ - 🔴 Красное → признаки, повышающие риск
156
+ - 🔵 Синее → признаки, снижающие риск
157
+ """)
158
+
159
+ # 3. Таблица с SHAP значениями
160
+ shap_df = pd.DataFrame({
161
+ 'feature': feature_names,
162
+ 'shap_value': shap_values[0],
163
+ 'abs_shap': abs(shap_values[0])
164
+ }).sort_values('abs_shap', ascending=False)
165
+
166
+ shap_df['description'] = shap_df['feature'].apply(get_feature_display_name)
167
+
168
+ st.markdown("### 📋 Факторы, влияющие на решение:")
169
+
170
+ col1, col2 = st.columns(2)
171
+
172
+ with col1:
173
+ pos = shap_df[shap_df['shap_value'] > 0].head(5)
174
+ if len(pos) > 0:
175
+ st.markdown("**🔴 Повышают риск:**")
176
+ for _, row in pos.iterrows():
177
+ st.markdown(f"- {row['description']}: +{row['shap_value']:.3f}")
178
+
179
+ with col2:
180
+ neg = shap_df[shap_df['shap_value'] < 0].head(5)
181
+ if len(neg) > 0:
182
+ st.markdown("**🟢 Снижают риск:**")
183
+ for _, row in neg.iterrows():
184
+ st.markdown(f"- {row['description']}: {row['shap_value']:.3f}")
185
+
186
+ with st.expander("📋 Все SHAP значения"):
187
+ display_df = shap_df[['feature', 'description', 'shap_value']].copy()
188
+ display_df.columns = ['Признак', 'Описание', 'SHAP']
189
+ display_df['SHAP'] = display_df['SHAP'].round(3)
190
+ st.dataframe(display_df.sort_values('SHAP', ascending=False), width='stretch')
191
+
192
+ except Exception as e:
193
+ st.error(f"❌ Ошибка SHAP: {e}")
194
+ st.info("Установите shap: `pip install shap`")
app/pages/__pycache__/application.cpython-311.pyc ADDED
Binary file (22.7 kB). View file
 
app/pages/__pycache__/simulation.cpython-311.pyc ADDED
Binary file (8.93 kB). View file
 
app/pages/application.py ADDED
@@ -0,0 +1,329 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import os
4
+ from app.utils.data_loader import load_artifacts
5
+ from app.models.escalation import escalation_decision
6
+ from app.models.interpretation import (
7
+ interpret_lr, plot_feature_importance_sns,
8
+ plot_marginal_effects_sns, plot_shap_analysis,
9
+ get_feature_display_name
10
+ )
11
+ from app.utils.credit_preprocessor import check_business_rules
12
+
13
+ # Пути
14
+ PROJECT_PATH = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
15
+ MODELS_PATH = os.path.join(PROJECT_PATH, 'models/best/train_150/')
16
+ PREPROCESSOR_PATH = os.path.join(PROJECT_PATH, 'preprocessors/')
17
+
18
+
19
+ def main():
20
+ st.title("🏦 Кредитный скоринг - Анкета")
21
+
22
+ # Загрузка артефактов
23
+ preprocessor, scaler, models = load_artifacts(MODELS_PATH, PREPROCESSOR_PATH)
24
+
25
+ # Инициализация статистики
26
+ if 'stats' not in st.session_state:
27
+ st.session_state.stats = {
28
+ 'total': 0,
29
+ 'manual': 0,
30
+ 'lr_confident': 0,
31
+ 'second_used': 0,
32
+ 'second_confident': 0,
33
+ 'approved': 0,
34
+ 'declined': 0
35
+ }
36
+
37
+ if 'step' not in st.session_state:
38
+ st.session_state.step = 'input'
39
+
40
+ # ВВОД ДАННЫХ
41
+
42
+ if st.session_state.step == 'input':
43
+ st.header("📋 Анкета заемщика")
44
+
45
+ with st.form("credit_form"):
46
+ st.subheader("👤 Личная информация")
47
+ col1, col2 = st.columns(2)
48
+ with col1:
49
+ age = st.number_input("Возраст", 0, 150, 35)
50
+ with col2:
51
+ dependents = st.number_input("Иждивенцы", 0, 20, 0)
52
+
53
+ st.subheader("💰 Ежемесячный доход")
54
+ income_method = st.radio("Способ указания дохода", ["Слайдер (до 20,000$)", "Точное значение"],
55
+ horizontal=True)
56
+
57
+ st.subheader("💳 Ежемесячные платежи")
58
+ debt_method = st.radio("Способ указания платежей", ["Слайдер (до 10,000$)", "Точное значение"],
59
+ horizontal=True)
60
+
61
+ st.subheader("📊 Кредитная история")
62
+ credit_lines = st.number_input("Открытых кредитов и карт", 0, 100, 5)
63
+ real_estate = st.number_input("Кредитов под залог недвижимости", 0, 100, 1)
64
+
65
+ st.subheader("📈 Использование лимитов")
66
+ util_method = st.radio("Уровень использования",
67
+ ["Норма (0-100%)", "Овердрафт (100-200%)", "Экстремальный (>200%)"], horizontal=True)
68
+
69
+ st.subheader("⏱️ Просрочки за последние 2 года")
70
+ col1, col2, col3 = st.columns(3)
71
+ with col1:
72
+ late_30_59 = st.number_input("30-59 дней", 0, 100, 0)
73
+ with col2:
74
+ late_60_89 = st.number_input("60-89 дней", 0, 100, 0)
75
+ with col3:
76
+ late_90 = st.number_input("90+ дней", 0, 100, 0)
77
+
78
+ submitted = st.form_submit_button("➡️ Далее: указать точные значения")
79
+
80
+ if submitted:
81
+ st.session_state.update({
82
+ 'age': age, 'dependents': dependents, 'income_method': income_method,
83
+ 'debt_method': debt_method, 'credit_lines': credit_lines,
84
+ 'real_estate': real_estate, 'util_method': util_method,
85
+ 'late_30_59': late_30_59, 'late_60_89': late_60_89, 'late_90': late_90
86
+ })
87
+ st.session_state.step = 'values'
88
+ st.rerun()
89
+
90
+
91
+ # ВВОД ТОЧНЫХ ЗНАЧЕНИЙ
92
+
93
+ elif st.session_state.step == 'values':
94
+ st.header("💰 Укажите точные значения")
95
+
96
+ with st.form("values_form"):
97
+ col1, col2 = st.columns(2)
98
+ with col1:
99
+ st.subheader("Доход")
100
+ if st.session_state.income_method == "Слайдер (до 20,000$)":
101
+ monthly_income = st.slider("Ежемесячный доход ($)", 0, 20000, 5000)
102
+ else:
103
+ monthly_income = st.number_input("Ежемесячный доход ($)", 0, 1000000, 5000)
104
+
105
+ with col2:
106
+ st.subheader("Платежи")
107
+ if st.session_state.debt_method == "Слайдер (до 10,000$)":
108
+ monthly_debt = st.slider("Ежемесячные платежи ($)", 0, 10000, 1500)
109
+ else:
110
+ monthly_debt = st.number_input("Ежемесячные платежи ($)", 0, 1000000, 1500)
111
+
112
+ st.subheader("📈 Использование лимитов")
113
+ if st.session_state.util_method == "��орма (0-100%)":
114
+ util_value = st.slider("Процент использования", 0, 100, 20)
115
+ utilization = util_value / 100
116
+ elif st.session_state.util_method == "Овердрафт (100-200%)":
117
+ util_value = st.slider("Процент использования", 100, 200, 120)
118
+ utilization = util_value / 100
119
+ else:
120
+ st.warning("Экстремальное использование (>200%) - автоматический ручной разбор")
121
+ utilization = st.number_input("Процент использования", 200, 1000, 200) / 100
122
+
123
+ submitted = st.form_submit_button("✅ Получить решение")
124
+
125
+ # САЙДБАР
126
+ with st.sidebar:
127
+ st.markdown("---")
128
+ st.subheader("⚙️ Настройки")
129
+
130
+ with st.expander("🎯 Пороги уверенности", expanded=False):
131
+ threshold = st.slider("Порог одобрения", 0.3, 0.7, 0.5, 0.05)
132
+ lr_margin = st.slider("Отступ LR", 0.2, 0.5, 0.35, 0.05)
133
+ second_margin = st.slider("Отступ второй модели", 0.2, 0.5, 0.4, 0.05)
134
+
135
+ with st.expander("🤖 Выбор модели", expanded=False):
136
+ available_models = [name for name in models.keys() if name != 'Logistic Regression']
137
+ second_model_name = st.selectbox("Модель для эскалации", available_models)
138
+
139
+ with st.expander("📊 Статистика", expanded=False):
140
+ stats = st.session_state.stats
141
+ if stats['total'] > 0:
142
+ st.metric("Всего заявок", stats['total'])
143
+ st.metric("Ручной разбор", f"{stats['manual'] / stats['total']:.1%}")
144
+ st.metric("LR уверена", f"{stats['lr_confident'] / stats['total']:.1%}")
145
+ if stats['second_used'] > 0:
146
+ st.metric("Вторая модель уверена",
147
+ f"{stats['second_confident'] / stats['second_used']:.1%}")
148
+
149
+ if st.button("🔄 Сброс"):
150
+ st.session_state.stats = {'total': 0, 'manual': 0, 'lr_confident': 0,
151
+ 'second_used': 0, 'second_confident': 0,
152
+ 'approved': 0, 'declined': 0}
153
+ st.rerun()
154
+ else:
155
+ st.info("Нет данных")
156
+
157
+ with st.expander("ℹ️ О проекте", expanded=False):
158
+ st.markdown(f"""
159
+ **Модели:**
160
+ - Logistic Regression
161
+ - {', '.join(available_models)}
162
+
163
+ **AUC:** 0.8578 (LR), ~0.87 (остальные)
164
+ """)
165
+
166
+ st.session_state.threshold = threshold
167
+ st.session_state.lr_margin = lr_margin
168
+ st.session_state.second_margin = second_margin
169
+ st.session_state.second_model_name = second_model_name
170
+
171
+ if submitted:
172
+ debt_ratio = monthly_debt / monthly_income if monthly_income > 0 else monthly_debt
173
+
174
+ # Подготовка данных (ОДИН РАЗ)
175
+ input_data = pd.DataFrame([{
176
+ 'RevolvingUtilizationOfUnsecuredLines': utilization,
177
+ 'age': st.session_state.age,
178
+ 'NumberOfTime30-59DaysPastDueNotWorse': st.session_state.late_30_59,
179
+ 'DebtRatio': debt_ratio,
180
+ 'MonthlyIncome': monthly_income,
181
+ 'NumberOfOpenCreditLinesAndLoans': st.session_state.credit_lines,
182
+ 'NumberOfTimes90DaysLate': st.session_state.late_90,
183
+ 'NumberRealEstateLoansOrLines': st.session_state.real_estate,
184
+ 'NumberOfTime60-89DaysPastDueNotWorse': st.session_state.late_60_89,
185
+ 'NumberOfDependents': st.session_state.dependents
186
+ }])
187
+
188
+ st.markdown("---")
189
+
190
+ with st.spinner("🔄 Анализ заявки..."):
191
+ lr_model = models['Logistic Regression']
192
+ second_model = models[second_model_name]
193
+
194
+ # Единый вызов эскалации (включает бизнес-правила)
195
+ decisions, manual_mask, task = escalation_decision(
196
+ input_data,
197
+ lr_model,
198
+ second_model,
199
+ second_model_name,
200
+ threshold=st.session_state.threshold,
201
+ lr_margins=[st.session_state.lr_margin],
202
+ second_margins=[st.session_state.second_margin],
203
+ preprocessor=preprocessor,
204
+ scaler=scaler
205
+ )
206
+ decision = decisions[0]
207
+
208
+ # Для интерпретации LR нужны обработанные данные
209
+ processed = preprocessor.transform(input_data)
210
+ processed_scaled = scaler.transform(processed)
211
+
212
+ # Обновление статистики
213
+ st.session_state.stats['total'] += 1
214
+ if decision['needs_review']:
215
+ st.session_state.stats['manual'] += 1
216
+ else:
217
+ if decision['final_decision'] == 0:
218
+ st.session_state.stats['approved'] += 1
219
+ else:
220
+ st.session_state.stats['declined'] += 1
221
+
222
+ if decision.get('lr_confident', False):
223
+ st.session_state.stats['lr_confident'] += 1
224
+
225
+ if decision.get('second_used', False):
226
+ st.session_state.stats['second_used'] += 1
227
+ if decision.get('second_confident', False):
228
+ st.session_state.stats['second_confident'] += 1
229
+
230
+ # ОТОБРАЖЕНИЕ РЕЗУЛЬТАТОВ
231
+ st.subheader("🔄 Цепочка принятия решения")
232
+ for step in decision['decision_path']:
233
+ st.write(step)
234
+
235
+ col1, col2 = st.columns(2)
236
+ with col1:
237
+ st.markdown("**🏦 Logistic Regression**")
238
+ st.metric("Вероятность", f"{decision['lr_proba']:.1%}")
239
+ st.write(f"Отступ: {decision['lr_margin']:.1%}")
240
+ if decision['lr_confident']:
241
+ st.success("✅ Уверена")
242
+ else:
243
+ st.warning("⚠️ Не уверена")
244
+
245
+ with col2:
246
+ st.markdown(f"**⚡ {second_model_name}**")
247
+ if decision['second_used']:
248
+ st.metric("Вероятность", f"{decision['second_proba']:.1%}")
249
+ st.write(f"Отступ: {decision['second_margin']:.1%}")
250
+ if decision['second_confident']:
251
+ st.success("✅ Уверен")
252
+ else:
253
+ st.warning("⚠️ Не уверен")
254
+ else:
255
+ st.info("⏳ Не вызывался")
256
+
257
+ st.markdown("---")
258
+ if decision['needs_review']:
259
+ st.warning("👨‍💼 **РУЧНОЙ РАЗБОР**")
260
+ st.info("Модели не уверены - требуется проверка специалистом")
261
+ else:
262
+ col1, col2 = st.columns(2)
263
+ with col1:
264
+ if decision['final_decision'] == 0:
265
+ st.success("✅ **КРЕДИТ ОДОБРЕН**")
266
+ else:
267
+ st.error("❌ **КРЕДИТ НЕ ОДОБРЕН**")
268
+ with col2:
269
+ st.metric("Модель", decision['model_used'])
270
+
271
+ # ДЕТАЛЬНЫЙ АНАЛИЗ LR
272
+ st.markdown("---")
273
+ st.subheader("🔍 Детальный анализ: Logistic Regression")
274
+
275
+ feature_names = processed_scaled.columns.tolist()
276
+ interpretation = interpret_lr(processed_scaled, lr_model, feature_names)
277
+
278
+ tab1, tab2 = st.tabs(["📊 Вклад в логит", "📈 Влияние на вероятность"])
279
+
280
+ with tab1:
281
+ st.markdown("🔴 Положительный вклад = ↑ риск, 🟢 Отрицательный = ↓ риск")
282
+ fig1 = plot_feature_importance_sns(interpretation['logit_contributions'])
283
+ st.pyplot(fig1)
284
+
285
+ with st.expander("📋 Все вклады"):
286
+ display_df = interpretation['logit_contributions'][
287
+ ['feature', 'value', 'coefficient', 'logit_contribution']].copy()
288
+ display_df['Описание'] = display_df['feature'].apply(get_feature_display_name)
289
+ display_df = display_df[['Описание', 'value', 'coefficient', 'logit_contribution']]
290
+ display_df.columns = ['Признак', 'Значение', 'Коэф', 'Вклад']
291
+ display_df = display_df.round(3)
292
+ st.dataframe(display_df)
293
+
294
+ with tab2:
295
+ st.markdown("🔴 Положительное = фактор ↑ риск, 🟢 Отрицательное = ↓ риск")
296
+ fig2 = plot_marginal_effects_sns(interpretation['marginal_effects'])
297
+ st.pyplot(fig2)
298
+
299
+ with st.expander("📋 Все эффекты"):
300
+ display_df = interpretation['marginal_effects'][['feature', 'marginal_effect']].copy()
301
+ display_df['Описание'] = display_df['feature'].apply(get_feature_display_name)
302
+ display_df = display_df[['Описание', 'marginal_effect']]
303
+ display_df.columns = ['Признак', 'Влияние']
304
+ display_df['Влияние'] = display_df['Влияние'].map('{:.1%}'.format)
305
+ st.dataframe(display_df)
306
+
307
+ st.info(f"Итоговая вероятность дефолта (LR): {interpretation['probability']:.1%}")
308
+
309
+ # ДЕТАЛЬНЫЙ АНАЛИЗ ВТОРОЙ МОДЕЛИ (SHAP для tree-based)
310
+ if decision['second_used'] and second_model_name in ['XGBoost', 'LightGBM', 'Random Forest', 'CatBoost']:
311
+ plot_shap_analysis(second_model, processed_scaled, feature_names, second_model_name)
312
+
313
+ # КНОПКА НАЗАД
314
+ if st.button("◀️ Вернуться к выбору способов"):
315
+ st.session_state.step = 'input'
316
+ st.rerun()
317
+
318
+ st.markdown("---")
319
+ col1, col2, col3 = st.columns([1, 2, 1])
320
+ with col2:
321
+ if st.button("🏠 На главную", use_container_width=True):
322
+ st.switch_page("main.py")
323
+
324
+ st.markdown("---")
325
+ st.caption("🏦 GiveMeSomeCredit - Интерпретируемый кредитный скоринг | Модели: Logistic Regression + выбор")
326
+
327
+
328
+ if __name__ == "__main__":
329
+ main()
app/pages/simulation.py ADDED
@@ -0,0 +1,345 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import os
5
+ import sys
6
+ import tempfile
7
+ import time
8
+ from datetime import datetime
9
+ from PIL import Image
10
+ import matplotlib.pyplot as plt
11
+
12
+ # Остальные импорты...
13
+
14
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
15
+ from app.utils.data_loader import load_artifacts
16
+ from app.simulation.core.traffic_generator import TrafficGenerator
17
+ from app.simulation.core.processor import ApplicationProcessor
18
+ from app.simulation.controllers.pid import PIDController
19
+ from app.simulation.visualization.plots import (
20
+ plot_queue_dynamics,
21
+ plot_specialist_load,
22
+ plot_inflow,
23
+ plot_parameters_history,
24
+ plot_detailed_decisions
25
+ )
26
+ # ============================================================================
27
+ # БЛОК АНИМАЦИИ: Импорт функций для визуализации
28
+ # ============================================================================
29
+ from app.simulation.visualization.animation import create_simulation_video
30
+
31
+ # ============================================================================
32
+
33
+
34
+ def minutes_to_time(minutes, start_time="00:00"):
35
+ """Преобразует минуты от старта в строку времени ЧЧ:ММ"""
36
+ start_hour, start_min = map(int, start_time.split(':'))
37
+ total_minutes = start_hour * 60 + start_min + minutes
38
+ hour = (total_minutes // 60) % 24
39
+ minute = total_minutes % 60
40
+ return f"{hour:02d}:{minute:02d}"
41
+
42
+
43
+ def main():
44
+ st.title("📊 Симуляция работы системы")
45
+
46
+ # Загрузка артефактов
47
+ PROJECT_PATH = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
48
+ MODELS_PATH = os.path.join(PROJECT_PATH, 'models/best/train_150/')
49
+ PREPROCESSOR_PATH = os.path.join(PROJECT_PATH, 'preprocessors/')
50
+ TEST_DATA_PATH = os.path.join(PROJECT_PATH, 'datasets/cs-test.csv')
51
+
52
+ preprocessor, scaler, models = load_artifacts(MODELS_PATH, PREPROCESSOR_PATH)
53
+
54
+ available_models = [name for name in models.keys() if name != 'Logistic Regression']
55
+
56
+ # В сайдбаре добавляем выбор
57
+ st.sidebar.subheader("🤖 Выбор модели")
58
+ second_model_name = st.sidebar.selectbox(
59
+ "Вторая модель для эскалации",
60
+ available_models,
61
+ index=0
62
+ )
63
+
64
+ # Параметры симуляции
65
+ st.sidebar.header("⚙️ Параметры")
66
+ # ============================================================================
67
+ # БЛОК АНИМАЦИИ: Ограничение количества специалистов до 400 для таблицы 20x20
68
+ # ============================================================================
69
+ specialists_count = st.sidebar.slider("Количество специалистов (модели)", 10, 400, 100, 10)
70
+ # ============================================================================
71
+ business_specialists_count = st.sidebar.slider("Количество экспертов (бизнес-правила)", 1, 100, 30, 1)
72
+
73
+ business_time = st.sidebar.slider("Время обработки бизнес правил(мин)", 5, 30, 15, 5)
74
+ base_time = st.sidebar.slider("Базовое время обработки (мин)", 2, 15, 5)
75
+
76
+ target_load = st.sidebar.slider(
77
+ "Целевая загрузка специалистов", 0.5, 1.0, 0.8, 0.05,
78
+ help="0.8 = 80% - оставляем запас на пики")
79
+
80
+ st.sidebar.subheader("🎯 Порог одобрения")
81
+ fixed_threshold = st.sidebar.slider(
82
+ "Порог (фиксированный)",
83
+ 0.3, 0.7, 0.5, 0.05,
84
+ help="Порог одобрения - стратегический параметр, не меняется PID"
85
+ )
86
+
87
+ st.sidebar.subheader("🎯 Начальные отступы (%)")
88
+
89
+ lr_low_pct = st.sidebar.slider("LR нижний отступ (% от порога)", 0, 100, 20, 5,
90
+ help="% от расстояния между 0 и порогом")
91
+ lr_high_pct = st.sidebar.slider("LR верхний отступ (% от 1-порога)", 0, 100, 20, 5,
92
+ help="% от расстояния между порогом и 1")
93
+ second_low_pct = st.sidebar.slider("Вторая модель нижний (%)", 0, 100, 20, 5)
94
+ second_high_pct = st.sidebar.slider("Вторая модель верхний (%)", 0, 100, 20, 5)
95
+
96
+ # Преобразуем проценты в абсолютные значения
97
+ init_lr_low = fixed_threshold * lr_low_pct / 100
98
+ init_lr_high = (1 - fixed_threshold) * lr_high_pct / 100
99
+ init_second_low = fixed_threshold * second_low_pct / 100
100
+ init_second_high = (1 - fixed_threshold) * second_high_pct / 100
101
+
102
+ # Параметры PID
103
+ st.sidebar.subheader("🎛️ PID регулятор")
104
+ use_pid = st.sidebar.checkbox("Включить PID", value=True)
105
+
106
+ # ============================================================================
107
+ # БЛОК АНИМАЦИИ: Переключатель для создания GIF
108
+ # ============================================================================
109
+ st.sidebar.subheader("🎬 Анимация")
110
+ create_gif = st.sidebar.checkbox("Создать GIF после симуляции", value=False)
111
+ gif_fps = st.sidebar.slider("FPS для GIF", 5, 30, 10, 5)
112
+ # ============================================================================
113
+
114
+ if use_pid:
115
+ kp = st.sidebar.slider("P (пропорциональный)", 0.0, 1.0, 0.33)
116
+ ki = st.sidebar.slider("I (интегральный)", 0.0, 1.0, 0.03)
117
+ kd = st.sidebar.slider("D (дифференциальный)", 0.0, 1.0, 0.22)
118
+ w_load = st.sidebar.slider("Вес загрузки", 0.0, 1.0, 0.3)
119
+
120
+ # Кнопка запуска
121
+ if st.button("🎬 Запустить симуляцию 24 часа"):
122
+ with st.spinner(f"Загрузка данных и симуляция..."):
123
+ # 1. Загружаем тестовый датасет
124
+ test_df = pd.read_csv(TEST_DATA_PATH)
125
+ if 'SeriousDlqin2yrs' in test_df.columns:
126
+ test_df = test_df.drop(columns=['SeriousDlqin2yrs'])
127
+ test_pool = test_df.to_dict('records')
128
+
129
+ # 2. Генерируем распределение заявок по минутам
130
+ current_time = datetime.now()
131
+ start_hour = current_time.hour
132
+ start_minute = current_time.minute
133
+
134
+ gen = TrafficGenerator(total_applications=len(test_pool))
135
+ minute_counts = gen.generate_minute_counts(start_hour=start_hour, start_minute=start_minute)
136
+
137
+ # Сохраняем для графиков
138
+ st.session_state.start_time = f"{start_hour:02d}:{start_minute:02d}"
139
+ st.session_state.minute_counts = minute_counts
140
+
141
+ # 3. Создаём процессор
142
+ processor = ApplicationProcessor(
143
+ lr_model=models['Logistic Regression'],
144
+ second_model=models[second_model_name],
145
+ second_model_name=second_model_name,
146
+ specialists_count=specialists_count,
147
+ business_specialists_count=business_specialists_count,
148
+ base_processing_time=base_time,
149
+ business_processing_time=business_time
150
+ )
151
+
152
+ # 4. Создаём PID если нужно
153
+ if use_pid:
154
+ pid = PIDController(
155
+ init_threshold=fixed_threshold,
156
+ kp_load=kp, ki_load=ki, kd_load=kd,
157
+ load_weight=w_load,
158
+ init_lr_low=init_lr_low,
159
+ init_lr_high=init_lr_high,
160
+ init_second_low=init_second_low,
161
+ init_second_high=init_second_high,
162
+ target_load=target_load
163
+ )
164
+ else:
165
+ pid = None
166
+
167
+ # 5. Симуляция по минутам
168
+ pool_copy = test_pool.copy()
169
+ idx = 0
170
+ progress_bar = st.progress(0)
171
+ n_steps = len(minute_counts)
172
+
173
+ # ============================================================================
174
+ # БЛОК АНИМАЦИИ: Сбор данных для кадров
175
+ # ============================================================================
176
+ animation_frames = [] # список для хранения кадров анимации
177
+ # ============================================================================
178
+
179
+ for step, n_apps in enumerate(minute_counts):
180
+ # Берём заявки из пула
181
+ batch = pool_copy[idx:idx + n_apps]
182
+ idx += n_apps
183
+
184
+ # Получаем текущие параметры
185
+ if pid:
186
+ margins = pid.get_margins()
187
+ lr_margins = [margins['lr_low'], margins['lr_high']]
188
+ second_margins = [margins['second_low'], margins['second_high']]
189
+ threshold = fixed_threshold
190
+ else:
191
+ lr_margins = [0.35]
192
+ second_margins = [0.4]
193
+ threshold = fixed_threshold
194
+
195
+ # Обрабатываем батч
196
+ result = processor.process_batch(
197
+ batch, preprocessor, scaler,
198
+ threshold=threshold,
199
+ lr_margins=lr_margins,
200
+ second_margins=second_margins,
201
+ current_time=step
202
+ )
203
+
204
+ # Обновляем PID
205
+ if pid:
206
+ load = result['specialists_busy'] / specialists_count
207
+ pid.update(load)
208
+
209
+ # ============================================================================
210
+ # БЛОК АНИМАЦИИ: Сохраняем кадр каждые 10 минут (чтобы не было 1440 кадров)
211
+ # ============================================================================
212
+ # --- Внутри цикла симуляции в simulation.py ---
213
+ # Записываем КАЖДУЮ минуту для плавности
214
+ if step % 1 == 0 or step == n_steps - 1:
215
+ specialist_states = processor.specialists.copy()
216
+
217
+ frame_data = {
218
+ 'time': step,
219
+ 'step': step, # Добавь это поле для совместимости с кодом видео
220
+ 'time_str': minutes_to_time(step, st.session_state.start_time),
221
+ 'inflow': n_apps,
222
+ 'inflow_history': st.session_state.minute_counts[:step + 1],
223
+ 'load_history': [v / specialists_count for v in processor.stats['specialist_busy'][:step + 1]],
224
+ 'queue': result['queue_size'],
225
+ 'business_queue': result.get('business_queue_size', 0),
226
+ 'load': load if pid else 0,
227
+ 'specialist_states': specialist_states,
228
+ 'cumulative': {
229
+ 'total_processed': processor.stats['total_processed'],
230
+ 'auto_approved': processor.stats['auto_approved'],
231
+ 'auto_declined': processor.stats['auto_declined'],
232
+ 'manual_processed': processor.stats['manual_processed'],
233
+ 'business_manual_processed': processor.stats.get('business_manual_processed', 0)
234
+ }
235
+ }
236
+ animation_frames.append(frame_data)
237
+ # ============================================================================
238
+
239
+ # Обновляем прогресс
240
+ progress_bar.progress((step + 1) / n_steps)
241
+
242
+ # 6. Сохраняем результаты
243
+ st.session_state.processor = processor
244
+ st.session_state.pid_history = pid.get_history() if pid else None
245
+ st.session_state.simulation_done = True
246
+ st.session_state.batch_stats = processor.batch_stats
247
+ # ============================================================================
248
+ # БЛОК АНИМАЦИИ: Сохраняем кадры в session_state
249
+ # ============================================================================
250
+ st.session_state.animation_frames = animation_frames
251
+ # ============================================================================
252
+
253
+ # Отображение результатов
254
+ if st.session_state.get('simulation_done', False):
255
+ st.success("✅ Симуляция завершена!")
256
+
257
+ stats = st.session_state.processor.stats
258
+
259
+ # Быстрая статистика
260
+ col1, col2, col3, col4, col5 = st.columns(5)
261
+ col1.metric("Всего заявок", stats['total_processed'])
262
+ col2.metric("Одобрено авто", stats['auto_approved'])
263
+ col3.metric("Отказ авто", stats['auto_declined'])
264
+ col4.metric("Ручной разбор", stats['manual_processed'])
265
+ manual_rate = stats['manual_sent'] / stats['total_processed'] * 100 if stats['total_processed'] > 0 else 0
266
+ col5.metric("Ручной разбор %", f"{manual_rate:.1f}%")
267
+
268
+ # Графики - ТОЛЬКО ВЫЗОВЫ ФУНКЦИЙ ИЗ plots.py
269
+ st.subheader("📈 Графики")
270
+
271
+ # Очереди
272
+ st.pyplot(plot_queue_dynamics(
273
+ queue_history=stats['queue_history'],
274
+ business_queue_history=stats.get('business_queue_history'),
275
+ start_time=st.session_state.get('start_time', '00:00')
276
+ ))
277
+ plt.close()
278
+
279
+ # Загрузка специалистов
280
+ st.pyplot(plot_specialist_load(
281
+ specialist_busy_history=stats['specialist_busy'],
282
+ specialists_count=specialists_count,
283
+ start_time=st.session_state.get('start_time', '00:00')
284
+ ))
285
+ plt.close()
286
+ st.pyplot(plot_inflow(
287
+ minute_counts=st.session_state.minute_counts,
288
+ start_time=st.session_state.get('start_time', '00:00')
289
+ ))
290
+ plt.close()
291
+ # Детальный анализ решений
292
+ st.pyplot(plot_detailed_decisions(
293
+ batch_stats=st.session_state.batch_stats,
294
+ second_model_name=second_model_name,
295
+ start_time=st.session_state.get('start_time', '00:00')
296
+ ))
297
+ plt.close()
298
+ # Параметры PID
299
+ st.pyplot(plot_parameters_history(
300
+ pid_history=st.session_state.pid_history,
301
+ second_model_name=second_model_name,
302
+ start_time=st.session_state.get('start_time', '00:00')
303
+ ))
304
+ plt.close()
305
+ # ============================================================================
306
+ # НОВЫЙ БЛОК: Генерация видео (Стратегия для HuggingFace)
307
+ # ============================================================================
308
+ if st.session_state.get('animation_frames'):
309
+ st.divider()
310
+ st.subheader("🎥 Настройки видео-отчета")
311
+
312
+ col_v1, col_v2 = st.columns(2)
313
+ with col_v1:
314
+ # Слайдер для шага кадров (среза)
315
+ v_step = st.slider("Шаг кадров (1 = каждая минута)", 1, 30, 1,
316
+ help="Чем меньше шаг, тем плавнее видео, но дольше рендеринг")
317
+ with col_v2:
318
+ # Слайдер для FPS
319
+ v_fps = st.slider("Скорость видео (FPS)", 10, 60, 24,
320
+ help="Количество кадров в секунду")
321
+
322
+ if st.button("🎬 Сгенерировать видео", type="primary", use_container_width=True):
323
+ with st.spinner("Рендеринг видео..."):
324
+ from app.simulation.visualization.animation import create_simulation_video
325
+
326
+ # Используем выбранные в слайдерах параметры
327
+ video_path = create_simulation_video(
328
+ st.session_state.animation_frames[::v_step],
329
+ specialists_count,
330
+ second_model_name,
331
+ fps=v_fps # Передаем FPS в функцию
332
+ )
333
+ st.video(video_path)
334
+ st.success("✅ Видео готово! Вы можете его скачать или перематывать.")
335
+
336
+ # --- ВОТ ЭТОТ БЛОК У ТЕБЯ УЖЕ ЕСТЬ В КОНЦЕ ФАЙЛА ---
337
+ st.write("")
338
+ col1, col2, col3 = st.columns([1, 2, 1])
339
+ with col2:
340
+ if st.button("🏠 На главную", use_container_width=True):
341
+ st.switch_page("main.py")
342
+
343
+
344
+ if __name__ == "__main__":
345
+ main()
app/simulation/.DS_Store ADDED
Binary file (8.2 kB). View file
 
app/simulation/__init__.py ADDED
File without changes
app/simulation/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (180 Bytes). View file
 
app/simulation/controllers/__init__.py ADDED
File without changes
app/simulation/controllers/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (192 Bytes). View file
 
app/simulation/controllers/__pycache__/base.cpython-311.pyc ADDED
Binary file (1.72 kB). View file
 
app/simulation/controllers/__pycache__/pid.cpython-311.pyc ADDED
Binary file (5.3 kB). View file
 
app/simulation/controllers/base.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+
3
+
4
+ class BaseController(ABC):
5
+ """Базовый класс для всех контроллеров"""
6
+
7
+ def __init__(self, name="Base"):
8
+ self.name = name
9
+ self.history = []
10
+
11
+ @abstractmethod
12
+ def update(self, current_state, target_state, dt=1.0):
13
+ """
14
+ Рассчитывает новые параметры управления
15
+
16
+ Параметры:
17
+ - current_state: текущее состояние системы (очередь, загрузка)
18
+ - target_state: целевое состояние
19
+ - dt: шаг времени
20
+
21
+ Возвращает:
22
+ - новые пороги и отступы
23
+ """
24
+ pass
25
+
26
+ def get_margins(self, hour=None):
27
+ """Возвращает текущие отступы для LR и второй модели"""
28
+ pass
app/simulation/controllers/pid.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ from .base import BaseController
4
+
5
+
6
+ class PIDController(BaseController):
7
+ """PID-регулятор для управления отступами на основе загрузки специалистов"""
8
+
9
+ def __init__(self, name="PID",
10
+ kp_load=0.1, ki_load=0.01, kd_load=0.05,
11
+ load_weight=1.0,
12
+ # Начальные значения параметров
13
+ init_threshold=0.5,
14
+ init_lr_low=0.3, init_lr_high=0.4,
15
+ init_second_low=0.35, init_second_high=0.45,
16
+ target_load=0.8):
17
+ super().__init__(name)
18
+
19
+ # Коэффициенты PID для загрузки
20
+ self.kp_load = kp_load
21
+ self.ki_load = ki_load
22
+ self.kd_load = kd_load
23
+
24
+ self.load_weight = load_weight
25
+ self.target_load = target_load
26
+
27
+ # Состояния PID
28
+ self.prev_error_load = 0
29
+ self.integral_load = 0
30
+
31
+ # Начальные параметры
32
+ self.init_threshold = init_threshold
33
+ self.init_lr_low = init_lr_low
34
+ self.init_lr_high = init_lr_high
35
+ self.init_second_low = init_second_low
36
+ self.init_second_high = init_second_high
37
+
38
+ # Текущие параметры (отступы)
39
+ self.threshold = init_threshold
40
+ self.lr_low = init_lr_low
41
+ self.lr_high = init_lr_high
42
+ self.second_low = init_second_low
43
+ self.second_high = init_second_high
44
+
45
+ # Границы отступов
46
+ self.bounds = {
47
+ 'lr_low': (0.05, self.threshold - 0.05),
48
+ 'lr_high': (0.05, 1 - self.threshold - 0.05),
49
+ 'second_low': (0.05, self.threshold - 0.05),
50
+ 'second_high': (0.05, 1 - self.threshold - 0.05)
51
+ }
52
+
53
+ # Ограничение интеграла
54
+ self.integral_limit = 1.0
55
+
56
+ def update(self, current_load):
57
+ """
58
+ current_load: текущая загрузка специалистов (0-1)
59
+ Остальные параметры оставлены для совместимости, но не используются
60
+ """
61
+ # Ошибка по загрузке
62
+ error_load = self.target_load - current_load
63
+
64
+ # PID для загрузки
65
+ P_load = self.kp_load * error_load
66
+ self.integral_load += error_load
67
+ self.integral_load = np.clip(self.integral_load, -self.integral_limit, self.integral_limit)
68
+ I_load = self.ki_load * self.integral_load
69
+ D_load = self.kd_load * (error_load - self.prev_error_load)
70
+ self.prev_error_load = error_load
71
+
72
+ # Выход регулятора
73
+ output_load = P_load + I_load + D_load
74
+ output = self.load_weight * output_load
75
+
76
+ # Адаптируем отступы
77
+ self._update_parameters(output)
78
+
79
+ # Сохраняем историю
80
+ self.history.append({
81
+ 'time': len(self.history),
82
+ 'error_load': error_load,
83
+ 'output': output,
84
+ 'threshold': self.threshold,
85
+ 'lr_low': self.lr_low,
86
+ 'lr_high': self.lr_high,
87
+ 'second_low': self.second_low,
88
+ 'second_high': self.second_high,
89
+ 'load': current_load,
90
+ })
91
+
92
+ return self.get_margins()
93
+
94
+ def _update_parameters(self, output):
95
+ """Обновляет отступы на основе выхода регулятора"""
96
+ delta = output * 0.1
97
+ self.lr_low = np.clip(
98
+ self.lr_low + delta,
99
+ self.bounds['lr_low'][0],
100
+ self.bounds['lr_low'][1]
101
+ )
102
+ self.lr_high = np.clip(
103
+ self.lr_high + delta,
104
+ self.bounds['lr_high'][0],
105
+ self.bounds['lr_high'][1]
106
+ )
107
+ self.second_low = np.clip(
108
+ self.second_low + delta,
109
+ self.bounds['second_low'][0],
110
+ self.bounds['second_low'][1]
111
+ )
112
+ self.second_high = np.clip(
113
+ self.second_high + delta,
114
+ self.bounds['second_high'][0],
115
+ self.bounds['second_high'][1]
116
+ )
117
+
118
+ def get_margins(self, hour=None):
119
+ """Возвращает текущие отступы"""
120
+ return {
121
+ 'lr_low': self.lr_low,
122
+ 'lr_high': self.lr_high,
123
+ 'second_low': self.second_low,
124
+ 'second_high': self.second_high
125
+ }
126
+
127
+ def get_history(self):
128
+ """Возвращает историю для визуализации"""
129
+ return pd.DataFrame(self.history)
app/simulation/core/__init__.py ADDED
File without changes
app/simulation/core/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (185 Bytes). View file
 
app/simulation/core/__pycache__/processor.cpython-311.pyc ADDED
Binary file (13.8 kB). View file
 
app/simulation/core/__pycache__/traffic_generator.cpython-311.pyc ADDED
Binary file (13.4 kB). View file
 
app/simulation/core/processor.py ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ from app.models.escalation import escalation_decision
4
+ from app.models.escalation import check_business_rules
5
+
6
+
7
+ def processing_time_function(lr_proba, second_proba, threshold=0.5, base_time=5,
8
+ lr_weight=1.0, second_weight=1.5):
9
+ """
10
+ Генерирует время обработки для заявок, попавших в ручной разбор
11
+ """
12
+ total_weight = lr_weight + second_weight
13
+ proba = (lr_proba * lr_weight + second_proba * second_weight) / total_weight
14
+
15
+ margin = abs(proba - threshold)
16
+ max_margin = max(threshold, 1 - threshold)
17
+ uncertainty = 1 - (margin / max_margin)
18
+
19
+ mean_time = base_time * (1 + 3 * uncertainty)
20
+ processing_time = np.random.exponential(scale=mean_time)
21
+
22
+ return max(1, processing_time)
23
+
24
+
25
+ class ApplicationProcessor:
26
+ def __init__(self, lr_model, second_model, second_model_name,
27
+ specialists_count=5, # основные специалисты (модели)
28
+ business_specialists_count=2, # эксперты (бизнес-правила)
29
+ base_processing_time=5,
30
+ business_processing_time=10, # эксперты дольше копаются
31
+ lr_weight=1.0, second_weight=1.5):
32
+ self.lr_model = lr_model
33
+ self.second_model = second_model
34
+ self.second_model_name = second_model_name
35
+ self.specialists_count = specialists_count
36
+ self.business_specialists_count = business_specialists_count
37
+ self.base_processing_time = base_processing_time
38
+ self.business_processing_time = business_processing_time
39
+ self.lr_weight = lr_weight
40
+ self.second_weight = second_weight
41
+
42
+ self.specialists = [0] * specialists_count
43
+ self.business_specialists = [0] * business_specialists_count # отдельный пул
44
+ self.manual_queue = [] # очередь от моделей
45
+ self.business_queue = [] # очередь от бизнес-правил
46
+
47
+ self.stats = {
48
+ 'total_processed': 0,
49
+ 'auto_approved': 0,
50
+ 'auto_declined': 0,
51
+ 'manual_sent': 0,
52
+ 'manual_processed': 0,
53
+ 'business_manual_sent': 0,
54
+ 'business_manual_processed': 0,
55
+ 'queue_history': [],
56
+ 'business_queue_history': [],
57
+ 'wait_times': [],
58
+ 'business_wait_times': [],
59
+ 'specialist_busy': [],
60
+ 'business_specialist_busy': [],
61
+ 'business_rules_manual': 0,
62
+ 'business_rules_auto': 0
63
+ }
64
+ self.batch_stats = []
65
+
66
+ def process_batch(self, applications_batch, preprocessor, scaler,
67
+ threshold, lr_margins, second_margins, current_time):
68
+ """
69
+ Обрабатывает батч заявок за текущую минуту (батчевая версия)
70
+ """
71
+ minute_results = {
72
+ 'new_apps': len(applications_batch),
73
+ 'auto_decisions': [],
74
+ 'new_manual': 0,
75
+ 'new_business_manual': 0,
76
+ 'processed_manual': 0,
77
+ 'processed_business_manual': 0,
78
+ 'queue_size': 0,
79
+ 'business_queue_size': 0,
80
+ 'specialists_busy': sum(1 for s in self.specialists if s > 0),
81
+ 'business_specialists_busy': sum(1 for s in self.business_specialists if s > 0),
82
+ 'business_rules': 0
83
+ }
84
+
85
+ # 1. Уменьшаем время работы специалистов
86
+ self.specialists = [max(0, s - 1) for s in self.specialists]
87
+ self.business_specialists = [max(0, s - 1) for s in self.business_specialists]
88
+
89
+ if not applications_batch:
90
+ minute_results['queue_size'] = len(self.manual_queue)
91
+ minute_results['business_queue_size'] = len(self.business_queue)
92
+ self.stats['queue_history'].append(len(self.manual_queue))
93
+ self.stats['business_queue_history'].append(len(self.business_queue))
94
+ self.stats['specialist_busy'].append(minute_results['specialists_busy'])
95
+ self.stats['business_specialist_busy'].append(minute_results['business_specialists_busy'])
96
+ return minute_results
97
+
98
+ # 2. Превращаем батч в DataFrame для удобства
99
+ df = pd.DataFrame(applications_batch)
100
+
101
+ # 3. Применяем бизнес-правила ко всем заявкам (БАТЧЕВО)
102
+ manual_mask, auto_reject_mask, messages, auto_decisions = check_business_rules(df)
103
+
104
+ # Сохраняем статистику по бизнес-правилам
105
+ business_manual_count = manual_mask.sum()
106
+ business_auto_count = auto_reject_mask.sum()
107
+
108
+ # Инициализируем
109
+ n = len(applications_batch)
110
+ model_indices = []
111
+
112
+ # 4. Обрабатываем результаты бизнес-правил
113
+ for idx in range(n):
114
+ if manual_mask[idx]:
115
+ # Ручной разбор по бизнес-правилам - в отдельную очередь
116
+ self.business_queue.append({
117
+ 'app': applications_batch[idx],
118
+ 'arrival_time': current_time,
119
+ 'reason': 'business_rules',
120
+ 'message': messages[idx],
121
+ 'lr_proba': None,
122
+ 'second_proba': None
123
+ })
124
+ minute_results['new_business_manual'] += 1
125
+ minute_results['business_rules'] += 1
126
+ self.stats['business_rules_manual'] += 1
127
+ self.stats['business_manual_sent'] += 1
128
+
129
+ elif auto_reject_mask[idx]:
130
+ # Автоматический отказ по бизнес-правилам
131
+ decision = {
132
+ 'final_decision': auto_decisions[idx], # всегда 1
133
+ 'model_used': 'Business Rules',
134
+ 'probability': 1.0,
135
+ 'needs_review': False,
136
+ 'message': messages[idx]
137
+ }
138
+ minute_results['auto_decisions'].append(decision)
139
+ self.stats['auto_declined'] += 1
140
+ self.stats['business_rules_auto'] += 1
141
+ self.stats['total_processed'] += 1
142
+
143
+ else:
144
+ # Заявка идет в модели
145
+ model_indices.append(idx)
146
+
147
+ # Инициализируем переменные для статистики моделей
148
+ lr_confident_count = 0
149
+ second_confident_count = 0
150
+ second_uncertain_count = 0
151
+
152
+ # 5. Батчевая обработка моделей
153
+ if model_indices:
154
+ # Берём только заявки, которые прошли бизнес-правила
155
+ df_models = df.iloc[model_indices].copy()
156
+
157
+ # Формируем DataFrame для моделей
158
+ model_df = pd.DataFrame({
159
+ 'RevolvingUtilizationOfUnsecuredLines': df_models['RevolvingUtilizationOfUnsecuredLines'],
160
+ 'age': df_models['age'],
161
+ 'NumberOfTime30-59DaysPastDueNotWorse': df_models['NumberOfTime30-59DaysPastDueNotWorse'],
162
+ 'DebtRatio': df_models['DebtRatio'].fillna(0),
163
+ 'MonthlyIncome': df_models['MonthlyIncome'].fillna(0),
164
+ 'NumberOfOpenCreditLinesAndLoans': df_models['NumberOfOpenCreditLinesAndLoans'],
165
+ 'NumberOfTimes90DaysLate': df_models['NumberOfTimes90DaysLate'],
166
+ 'NumberRealEstateLoansOrLines': df_models['NumberRealEstateLoansOrLines'],
167
+ 'NumberOfTime60-89DaysPastDueNotWorse': df_models['NumberOfTime60-89DaysPastDueNotWorse'],
168
+ 'NumberOfDependents': df_models['NumberOfDependents'].fillna(0)
169
+ })
170
+
171
+ # Вызываем escalation_decision для всего батча
172
+ batch_decisions, batch_manual_mask, stats = escalation_decision(
173
+ model_df,
174
+ self.lr_model,
175
+ self.second_model,
176
+ self.second_model_name,
177
+ threshold=threshold,
178
+ lr_margins=lr_margins,
179
+ second_margins=second_margins,
180
+ preprocessor=preprocessor,
181
+ scaler=scaler
182
+ )
183
+
184
+ # Сохраняем статистику из escalation_decision
185
+ lr_confident_count = stats['lr_confident']
186
+ second_confident_count = stats['second_confident']
187
+ second_uncertain_count = stats['second_uncertain']
188
+
189
+ # print(f"Статистика батча: бизнес-ручной={business_manual_count}, "
190
+ # f"бизнес-отказ={business_auto_count}, "
191
+ # f"LR уверен={lr_confident_count}, "
192
+ # f"вторая уверен={second_confident_count}, "
193
+ # f"вторая не уверен={second_uncertain_count}")
194
+
195
+ # Распределяем результаты по исходным индексам
196
+ for local_idx, orig_idx in enumerate(model_indices):
197
+ decision = batch_decisions[local_idx]
198
+
199
+ if decision['needs_review']:
200
+ self.manual_queue.append({
201
+ 'app': applications_batch[orig_idx],
202
+ 'arrival_time': current_time,
203
+ 'reason': 'model_uncertainty',
204
+ 'decision': decision,
205
+ 'lr_proba': decision.get('lr_proba'),
206
+ 'second_proba': decision.get('second_proba')
207
+ })
208
+ minute_results['new_manual'] += 1
209
+ self.stats['manual_sent'] += 1
210
+ else:
211
+ minute_results['auto_decisions'].append(decision)
212
+ if decision['final_decision'] == 0:
213
+ self.stats['auto_approved'] += 1
214
+ else:
215
+ self.stats['auto_declined'] += 1
216
+
217
+ self.stats['total_processed'] += 1
218
+
219
+ # Сохраняем общую статистику батча
220
+ self.batch_stats.append({
221
+ 'time': current_time,
222
+ 'business_manual': business_manual_count,
223
+ 'business_auto': business_auto_count,
224
+ 'lr_confident': lr_confident_count,
225
+ 'second_confident': second_confident_count,
226
+ 'second_uncertain': second_uncertain_count,
227
+ 'total_in_batch': len(applications_batch),
228
+ 'new_manual': minute_results['new_manual'],
229
+ 'new_business_manual': minute_results['new_business_manual'],
230
+ 'auto_total': len(minute_results['auto_decisions'])
231
+ })
232
+
233
+ # 6. Распределяем заявки из бизнес-очереди по свободным экспертам
234
+ for i in range(self.business_specialists_count):
235
+ if self.business_specialists[i] <= 0 and self.business_queue:
236
+ next_app = self.business_queue.pop(0)
237
+
238
+ wait_time = current_time - next_app['arrival_time']
239
+ self.stats['business_wait_times'].append(wait_time)
240
+
241
+ # Эксперты обрабатывают бизнес-правила
242
+ proc_time = self.business_processing_time
243
+
244
+ self.business_specialists[i] = proc_time
245
+ minute_results['processed_business_manual'] += 1
246
+ self.stats['business_manual_processed'] += 1
247
+
248
+ # 7. Распределяем заявки из основной очереди по свободным специалистам
249
+ for i in range(self.specialists_count):
250
+ if self.specialists[i] <= 0 and self.manual_queue:
251
+ next_app = self.manual_queue.pop(0)
252
+
253
+ wait_time = current_time - next_app['arrival_time']
254
+ self.stats['wait_times'].append(wait_time)
255
+
256
+ if next_app['reason'] == 'business_rules':
257
+ proc_time = self.business_processing_time
258
+ else:
259
+ # Используем функцию processing_time_function
260
+ proc_time = processing_time_function(
261
+ lr_proba=next_app.get('lr_proba', 0.5),
262
+ second_proba=next_app.get('second_proba', 0.5),
263
+ threshold=threshold,
264
+ base_time=self.base_processing_time,
265
+ lr_weight=self.lr_weight,
266
+ second_weight=self.second_weight
267
+ )
268
+
269
+ self.specialists[i] = proc_time
270
+ minute_results['processed_manual'] += 1
271
+ self.stats['manual_processed'] += 1
272
+
273
+ minute_results['queue_size'] = len(self.manual_queue)
274
+ minute_results['business_queue_size'] = len(self.business_queue)
275
+ self.stats['queue_history'].append(len(self.manual_queue))
276
+ self.stats['business_queue_history'].append(len(self.business_queue))
277
+ self.stats['specialist_busy'].append(minute_results['specialists_busy'])
278
+ self.stats['business_specialist_busy'].append(minute_results['business_specialists_busy'])
279
+
280
+ return minute_results
281
+
282
+ def load_test_dataset(self, filepath):
283
+ df = pd.read_csv(filepath)
284
+ if 'SeriousDlqin2yrs' in df.columns:
285
+ df = df.drop(columns=['SeriousDlqin2yrs'])
286
+ return df.to_dict('records')
287
+
288
+ def get_queue_stats(self):
289
+ if self.stats['wait_times']:
290
+ avg_wait = np.mean(self.stats['wait_times'])
291
+ max_wait = np.max(self.stats['wait_times'])
292
+ else:
293
+ avg_wait = max_wait = 0
294
+
295
+ if self.stats['business_wait_times']:
296
+ avg_business_wait = np.mean(self.stats['business_wait_times'])
297
+ max_business_wait = np.max(self.stats['business_wait_times'])
298
+ else:
299
+ avg_business_wait = max_business_wait = 0
300
+
301
+ return {
302
+ 'current_queue': len(self.manual_queue),
303
+ 'current_business_queue': len(self.business_queue),
304
+ 'avg_wait_minutes': avg_wait,
305
+ 'max_wait_minutes': max_wait,
306
+ 'avg_business_wait_minutes': avg_business_wait,
307
+ 'max_business_wait_minutes': max_business_wait,
308
+ 'queue_history': self.stats['queue_history'],
309
+ 'business_queue_history': self.stats['business_queue_history'],
310
+ 'specialist_busy': self.stats['specialist_busy'],
311
+ 'business_specialist_busy': self.stats['business_specialist_busy'],
312
+ 'business_rules_split': {
313
+ 'manual': self.stats['business_rules_manual'],
314
+ 'auto': self.stats['business_rules_auto']
315
+ }
316
+ }
317
+
318
+ # def reset(self):
319
+ # self.specialists = [0] * self.specialists_count
320
+ # self.business_specialists = [0] * self.business_specialists_count
321
+ # self.manual_queue = []
322
+ # self.business_queue = []
323
+ # self.stats = {
324
+ # 'total_processed': 0,
325
+ # 'auto_approved': 0,
326
+ # 'auto_declined': 0,
327
+ # 'manual_sent': 0,
328
+ # 'manual_processed': 0,
329
+ # 'business_manual_sent': 0,
330
+ # 'business_manual_processed': 0,
331
+ # 'queue_history': [],
332
+ # 'business_queue_history': [],
333
+ # 'wait_times': [],
334
+ # 'business_wait_times': [],
335
+ # 'specialist_busy': [],
336
+ # 'business_specialist_busy': [],
337
+ # 'business_rules_manual': 0,
338
+ # 'business_rules_auto': 0
339
+ # }
app/simulation/core/traffic_generator.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import matplotlib.pyplot as plt
3
+ from datetime import datetime
4
+
5
+
6
+ class TrafficGenerator:
7
+ def __init__(self, total_applications=101503, random_seed=42):
8
+ self.total = total_applications
9
+ np.random.seed(random_seed)
10
+
11
+ # Параметры интенсивности с провалом после обеда
12
+ self.intensity_params = {
13
+ 'background': 0.1,
14
+ 'day_center': 13, 'day_amplitude': 0.9, 'day_width': 2.5, # день поуже
15
+ 'evening_center': 19.5, 'evening_amplitude': 1.3, 'evening_width': 2.2, # вечер пораньше и пошире
16
+ 'afternoon_dip_center': 15.5, 'afternoon_dip_strength': 0.3, 'afternoon_dip_width': 1.5,
17
+ # провал после обеда
18
+ 'noise_level': 0.1
19
+ }
20
+
21
+ def _time_to_hours(self, time_tuple):
22
+ """Переводит (часы, минуты) в часы с дробной частью"""
23
+ return time_tuple[0] + time_tuple[1] / 60
24
+
25
+ def loan_intensity_periodic(self, t, impulses=None):
26
+ """
27
+ Функция интенсивности с провалом после обеда
28
+
29
+ t: время в часах (может быть дробным)
30
+ impulses: список словарей вида
31
+ [{'time': (16, 37), 'strength': 2.0}, ...] # время как (часы, минуты)
32
+ """
33
+ t_cycle = t % 24
34
+
35
+ bg = self.intensity_params['background']
36
+
37
+ # Утренне-дневной пик (13:00)
38
+ day = self.intensity_params['day_amplitude'] * np.exp(
39
+ -(t_cycle - self.intensity_params['day_center']) ** 2 /
40
+ (2 * self.intensity_params['day_width'] ** 2)
41
+ )
42
+
43
+ # Вечерний пик (19:30)
44
+ evening_diff = np.minimum(
45
+ np.abs(t_cycle - self.intensity_params['evening_center']),
46
+ np.abs(t_cycle - self.intensity_params['evening_center'] + 24)
47
+ )
48
+ evening = self.intensity_params['evening_amplitude'] * np.exp(
49
+ -(evening_diff) ** 2 / (2 * self.intensity_params['evening_width'] ** 2)
50
+ )
51
+
52
+ # Провал после обеда (15:30)
53
+ dip_diff = np.minimum(
54
+ np.abs(t_cycle - self.intensity_params['afternoon_dip_center']),
55
+ np.abs(t_cycle - self.intensity_params['afternoon_dip_center'] + 24)
56
+ )
57
+ dip = -self.intensity_params['afternoon_dip_strength'] * np.exp(
58
+ -(dip_diff) ** 2 / (2 * self.intensity_params['afternoon_dip_width'] ** 2)
59
+ )
60
+
61
+ intensity = bg + day + evening + dip
62
+ intensity = np.maximum(intensity, 0.05) # не ниже минимума
63
+
64
+ # Шум
65
+ if self.intensity_params['noise_level'] > 0:
66
+ noise = 1.0 + np.random.uniform(
67
+ -self.intensity_params['noise_level'],
68
+ self.intensity_params['noise_level']
69
+ )
70
+ intensity *= noise
71
+
72
+ # Импульсы
73
+ if impulses:
74
+ for imp in impulses:
75
+ imp_time = self._time_to_hours(imp['time']) % 24
76
+ # Используем гауссиану для плавного импульса (ширина ~30 минут)
77
+ imp_diff = np.minimum(
78
+ np.abs(t_cycle - imp_time),
79
+ np.abs(t_cycle - imp_time + 24)
80
+ )
81
+ imp_factor = 1.0 + imp['strength'] * np.exp(-(imp_diff) ** 2 / (2 * 0.25 ** 2))
82
+ intensity *= imp_factor
83
+
84
+ return intensity
85
+
86
+ def generate_minute_counts(self, start_hour=None, start_minute=0, impulses=None):
87
+ """
88
+ Возвращает массив количества заявок на каждую минуту (1440 значений)
89
+
90
+ start_hour: час старта (по умолчанию текущий)
91
+ start_minute: минута старта
92
+ impulses: список импульсов, например:
93
+ [{'time': (5, 30), 'strength': 2.0}, ...] # импульс в 5:30 силой 2.0
94
+ """
95
+ if start_hour is None:
96
+ now = datetime.now()
97
+ start_hour = now.hour
98
+ start_minute = now.minute
99
+
100
+ start_time = start_hour + start_minute / 60
101
+
102
+ # Массив минут (от start_time до start_time + 24)
103
+ minutes = np.arange(0, 24, 1 / 60)
104
+ intensity_values = np.array([
105
+ self.loan_intensity_periodic(start_time + m, impulses)
106
+ for m in minutes
107
+ ])
108
+
109
+ total_intensity = np.sum(intensity_values)
110
+ scale_factor = self.total / total_intensity
111
+
112
+ minute_counts = np.floor(intensity_values * scale_factor).astype(int)
113
+
114
+ # Распределяем остаток (чтоб точно сошлось общее число)
115
+ total_assigned = np.sum(minute_counts)
116
+ if total_assigned < self.total:
117
+ remainder = self.total - total_assigned
118
+ top_minutes = np.argsort(intensity_values)[-remainder:]
119
+ minute_counts[top_minutes] += 1
120
+
121
+ return minute_counts
122
+
123
+ def generate_hourly_counts(self, start_hour=None, start_minute=0, impulses=None):
124
+ """
125
+ Возвращает массив количества заявок по часам (24 значения)
126
+ """
127
+ minute_counts = self.generate_minute_counts(start_hour, start_minute, impulses)
128
+ hourly_counts = [np.sum(minute_counts[i * 60:(i + 1) * 60]) for i in range(24)]
129
+ return hourly_counts
130
+
131
+ def generate_random_impulses(self, n_impulses=1, min_strength=1.5, max_strength=3.0):
132
+ """
133
+ Генерирует случайные импульсы
134
+ """
135
+ impulses = []
136
+ for _ in range(n_impulses):
137
+ hour = np.random.randint(0, 24)
138
+ minute = np.random.randint(0, 60)
139
+ strength = np.random.uniform(min_strength, max_strength)
140
+ impulses.append({'time': (hour, minute), 'strength': strength})
141
+ return impulses
142
+
143
+ def plot_distribution(self, start_hour=None, start_minute=0, impulses=None):
144
+ """Строит график распределения заявок по часам"""
145
+ hourly_counts = self.generate_hourly_counts(start_hour, start_minute, impulses)
146
+
147
+ if start_hour is None:
148
+ start_hour = datetime.now().hour
149
+
150
+ hours = [(start_hour + i) % 24 for i in range(24)]
151
+ sorted_pairs = sorted(zip(hours, hourly_counts))
152
+ hours_sorted, counts_sorted = zip(*sorted_pairs)
153
+
154
+ plt.figure(figsize=(14, 6))
155
+
156
+ # Цвета в зависимости от времени суток
157
+ colors = []
158
+ for h in hours_sorted:
159
+ if 0 <= h <= 5:
160
+ colors.append('#2c3e50') # ночь
161
+ elif 6 <= h <= 11:
162
+ colors.append('#3498db') # утро
163
+ elif 12 <= h <= 16:
164
+ colors.append('#f39c12') # день (с провалом)
165
+ else:
166
+ colors.append('#e67e22') # вечер
167
+
168
+ bars = plt.bar([str(h) for h in hours_sorted], counts_sorted,
169
+ alpha=0.8, color=colors, edgecolor='black', linewidth=1)
170
+
171
+ # Средняя линия
172
+ mean_val = np.mean(counts_sorted)
173
+ plt.axhline(y=mean_val, color='red', linestyle='--',
174
+ alpha=0.7, linewidth=2, label=f'Среднее: {mean_val:.0f}')
175
+
176
+ # Отметим импульсы на графике
177
+ if impulses:
178
+ for imp in impulses:
179
+ imp_hours = self._time_to_hours(imp['time']) % 24
180
+ # Найдём ближайший час
181
+ closest_hour = min(hours_sorted, key=lambda x: abs(x - imp_hours))
182
+ idx = list(hours_sorted).index(closest_hour)
183
+ plt.plot(idx, counts_sorted[idx], 'g*', markersize=15,
184
+ label=f'Импульс {imp["strength"]:.1f}x' if idx == 0 else '')
185
+
186
+ # Отметим провал после обеда
187
+ dip_idx = [i for i, h in enumerate(hours_sorted) if 14 <= h <= 16]
188
+ if dip_idx:
189
+ plt.axvspan(dip_idx[0] - 0.4, dip_idx[-1] + 0.4, alpha=0.2, color='gray',
190
+ label='Послеобеденный спад')
191
+
192
+ plt.xlabel('Час', fontsize=12)
193
+ plt.ylabel('Количество заявок', fontsize=12)
194
+ plt.title(f'Распределение заявок по часам (старт в {start_hour:02d}:{start_minute:02d})',
195
+ fontsize=14, fontweight='bold')
196
+ plt.grid(True, alpha=0.3, axis='y')
197
+ plt.legend(loc='upper right')
198
+ plt.xticks(rotation=45)
199
+ plt.tight_layout()
200
+ plt.show()
201
+
202
+ # Статистика
203
+ print("\n📊 Статистика распределения:")
204
+ print(f" Всего заявок: {sum(counts_sorted)}")
205
+ print(f" Среднее: {mean_val:.0f} заявок/час")
206
+ print(f" Максимум: {max(counts_sorted)} заявок")
207
+ print(f" Минимум: {min(counts_sorted)} заявок")
208
+
209
+ return hours_sorted, counts_sorted
210
+
211
+
212
+ # Пример использования
213
+ # if __name__ == "__main__":
214
+ # # Создаём генератор
215
+ # gen = TrafficGenerator(total_applications=110000)
216
+ #
217
+ # # 1. Без импульсов
218
+ # print("Без импульсов:")
219
+ # counts = gen.generate_minute_counts(start_hour=17)
220
+ # print(f"Всего минут: {len(counts)}")
221
+ # print(f"Всего заявок: {sum(counts)}")
222
+ #
223
+ # # 2. С импульсом в 5:30 утра
224
+ # impulses = [{'time': (5, 30), 'strength': 2.0}]
225
+ # print("\nС импульсом в 5:30:")
226
+ # counts = gen.generate_minute_counts(start_hour=17, impulses=impulses)
227
+ #
228
+ # # 3. Построить график
229
+ # gen.plot_distribution(start_hour=17, impulses=impulses)
230
+ #
231
+ # # 4. Случайные импульсы
232
+ # random_impulses = gen.generate_random_impulses(n_impulses=2)
233
+ # print("\nСлучайные импульсы:", random_impulses)
234
+ # gen.plot_distribution(start_hour=17, impulses=random_impulses)
app/simulation/visualization/__init__.py ADDED
File without changes
app/simulation/visualization/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (194 Bytes). View file
 
app/simulation/visualization/__pycache__/animation.cpython-311.pyc ADDED
Binary file (15.7 kB). View file
 
app/simulation/visualization/__pycache__/plots.cpython-311.pyc ADDED
Binary file (20.8 kB). View file
 
app/simulation/visualization/animation.py ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import plotly.graph_objects as go
2
+ from plotly.subplots import make_subplots
3
+ import numpy as np
4
+
5
+
6
+ def minutes_to_time(minutes, start_time="00:00"):
7
+ start_hour, start_min = map(int, start_time.split(':'))
8
+ total_minutes = start_hour * 60 + start_min + minutes
9
+ hour = (total_minutes // 60) % 24
10
+ minute = total_minutes % 60
11
+ return f"{hour:02d}:{minute:02d}"
12
+
13
+
14
+ def create_animation_frame_plotly(frame_data, specialists_count, second_model_name="XGBoost"):
15
+ # Фиксированная ось X для графиков
16
+ time_ticks = list(range(0, 1441, 180))
17
+ time_labels = [minutes_to_time(t, "00:00") for t in time_ticks]
18
+
19
+ fig = make_subplots(
20
+ rows=3, cols=2,
21
+ subplot_titles=('📈 Динамика входящего потока', '⚙️ Загрузка специалистов (%)',
22
+ '👥 МОНИТОРИНГ РАБОТЫ СПЕЦИАЛИСТОВ', '',
23
+ '📊 Сводная статистика обработки', '🎯 Оперативные показатели'),
24
+ specs=[
25
+ [{'type': 'scatter'}, {'type': 'scatter'}],
26
+ [{'type': 'heatmap', 'colspan': 2}, None],
27
+ [{'type': 'table'}, {'type': 'scatter'}]
28
+ ],
29
+ row_heights=[0.25, 0.40, 0.35],
30
+ vertical_spacing=0.1,
31
+ )
32
+
33
+ # --- РЯД 1: ГРАФИКИ ---
34
+ inflow_h = frame_data.get('inflow_history', [])
35
+ load_h = frame_data.get('load_history', [])
36
+
37
+ fig.add_trace(go.Scatter(y=inflow_h, fill='tozeroy', line=dict(color='#4361ee', width=2)), row=1, col=1)
38
+ fig.add_trace(go.Scatter(y=[l * 100 for l in load_h], fill='tozeroy', line=dict(color='#4cc9f0', width=2)), row=1,
39
+ col=2)
40
+
41
+ for col in [1, 2]:
42
+ fig.update_xaxes(range=[0, 1440], tickvals=time_ticks, ticktext=time_labels, row=1, col=col)
43
+ fig.update_yaxes(rangemode="tozero", row=1, col=col)
44
+
45
+ # --- РЯД 2: HEATMAP (Строго 20 ячеек в ширину) ---
46
+ states = np.array(frame_data['specialist_states'])
47
+ cols = 20
48
+ rows = int(np.ceil(specialists_count / cols))
49
+
50
+ # Создаем матрицу, заполненную None (или NaN), чтобы пустые места не красились
51
+ z_matrix = np.full((rows, cols), np.nan)
52
+ for i, val in enumerate(states):
53
+ r, c = divmod(i, cols)
54
+ # Мапим значения: 0 -> 0.1 (голубой), 1-3 -> 0.4 (зеленый) и т.д.
55
+ if val == 0:
56
+ z_matrix[r, c] = 0.1
57
+ elif val <= 3:
58
+ z_matrix[r, c] = 0.4
59
+ elif val <= 7:
60
+ z_matrix[r, c] = 0.7
61
+ else:
62
+ z_matrix[r, c] = 1.0
63
+
64
+ # Настраиваем цвета: NaN будет прозрачным/фоновым
65
+ colorscale = [
66
+ [0.0, '#66ccff'], # Свободен (0)
67
+ [0.4, '#4ade80'], # 1-3 мин
68
+ [0.7, '#facc15'], # 4-7 мин
69
+ [1.0, '#f87171'] # 8+ мин
70
+ ]
71
+
72
+ fig.add_trace(go.Heatmap(
73
+ z=z_matrix, colorscale=colorscale, showscale=False,
74
+ xgap=2, ygap=2, zmin=0, zmax=1, hoverinfo='none'
75
+ ), row=2, col=1)
76
+
77
+ # Легенда над хитмапом
78
+ free = sum(1 for t in states if t <= 0)
79
+ legend = (f"Свободно: <b>{free}</b> | <span style='color:#66ccff'>■</span> Свободен "
80
+ f"<span style='color:#4ade80'>■</span> 1-3м <span style='color:#facc15'>■</span> 4-7м "
81
+ f"<span style='color:#f87171'>■</span> 8м+")
82
+ fig.add_annotation(text=legend, xref="paper", yref="paper", x=0.5, y=0.70, showarrow=False, font=dict(size=14))
83
+
84
+ # --- РЯД 3: ТАБЛИЦА (Формальная) ---
85
+ cum = frame_data['cumulative']
86
+ fig.add_trace(go.Table(
87
+ header=dict(values=['Параметр', 'Значение'], fill_color='#1e293b', font=dict(color='white', size=15),
88
+ height=35),
89
+ cells=dict(values=[
90
+ ['✅ Авто-одобрено', '❌ Авто-отказы', '👤 На рассмотрении (Manual)', '<b>ИТОГО ОБРАБОТАНО</b>'],
91
+ [cum['auto_approved'], cum['auto_declined'],
92
+ cum['manual_processed'] + cum['business_manual_processed'], f"<b>{cum['total_processed']}</b>"]
93
+ ], align='left', font=dict(size=14), height=35, fill_color='#f8f9fa')
94
+ ), row=3, col=1)
95
+
96
+ # --- ОПЕРАТИВНЫЕ ПОКАЗАТЕЛИ (Крупный заголовок) ---
97
+ q_models = frame_data['queue'] # Очередь к спецам
98
+ q_business = frame_data.get('business_queue', 0) # Бизнес-очередь
99
+
100
+ # Расчет ожидания только для очереди моделей (как на левом графике)
101
+ avg_w = frame_data.get('avg_wait', 0)
102
+
103
+ status_card = (
104
+ f"<span style='font-size:22px; font-weight:bold;'>МОНИТОРИНГ</span><br><br>"
105
+ f"<span style='background-color:#dcfce7; color:#166534; padding:8px; border-radius:5px;'>"
106
+ f"<b>👤 ОЧЕРЕДЬ (СПЕЦ): {q_models}</b></span><br><br>"
107
+ f"<span style='font-size:18px; color:#666;'>"
108
+ f"⚙️ Бизнес-правила: {q_business}</span><br><br>"
109
+ f"🕒 Время: <b>{frame_data['time_str']}</b><br>"
110
+ f"⏳ Ожидание: <b>{avg_w:.1f} мин</b>"
111
+ )
112
+
113
+ fig.add_trace(go.Scatter(x=[0], y=[0], mode='text', text=[status_card], textfont=dict(size=16)), row=3, col=2)
114
+
115
+ # Очистка осей
116
+ fig.update_xaxes(visible=False, row=2, col=1);
117
+ fig.update_yaxes(visible=False, row=2, col=1)
118
+ fig.update_xaxes(visible=False, row=3, col=2);
119
+ fig.update_yaxes(visible=False, row=3, col=2)
120
+
121
+ # Фиксируем оси, чтобы график не "дышал" (это главная причина мерцания)
122
+ fig.update_yaxes(range=[0, 60], row=1, col=1) # Замени 60 на твой макс. поток
123
+ fig.update_yaxes(range=[0, 105], row=1, col=2) # Загрузка всегда до 100%
124
+
125
+ fig.update_layout(
126
+ height=950,
127
+ margin=dict(t=80, b=40, l=50, r=50),
128
+ template="plotly_white",
129
+ showlegend=False,
130
+ # ОТКЛЮЧАЕМ анимации переходов, которые создают эффект мигания
131
+ transition_duration=0,
132
+ hovermode=False
133
+ )
134
+
135
+ # Это заставит Plotly обновлять только данные, не перерисовывая всё полотно
136
+ fig.layout.datarevision = frame_data['time']
137
+ return fig
138
+
139
+
140
+ from matplotlib.animation import FFMpegWriter
141
+
142
+ import matplotlib.pyplot as plt
143
+ import matplotlib.animation as animation
144
+ import tempfile
145
+ import numpy as np
146
+
147
+ import matplotlib.pyplot as plt
148
+ import matplotlib.animation as animation
149
+ import tempfile
150
+ import numpy as np
151
+ import os
152
+
153
+
154
+ # Внести изменения в функцию create_simulation_video в animation.py
155
+ def create_simulation_video(frames, specialists_count, second_model_name, fps=24):
156
+ if not frames:
157
+ return None
158
+
159
+ # Настройка стиля
160
+ plt.style.use('seaborn-v0_8-whitegrid')
161
+ fig, axes = plt.subplots(2, 2, figsize=(16, 10), facecolor='#f8f9fa')
162
+ plt.subplots_adjust(hspace=0.4, wspace=0.25)
163
+ plt.close()
164
+
165
+ def update(i):
166
+ data = frames[i]
167
+ for ax in axes.flatten():
168
+ ax.clear()
169
+ ax.set_facecolor('white')
170
+
171
+ # 1. ДИНАМИКА ПОТОКА (Локализация)
172
+ y_inflow = data['inflow_history']
173
+ axes[0, 0].fill_between(range(len(y_inflow)), y_inflow, color='#4361ee', alpha=0.3)
174
+ axes[0, 0].plot(range(len(y_inflow)), y_inflow, color='#4361ee', linewidth=2)
175
+ axes[0, 0].set_xlim(0, 1440) # Фиксация оси времени
176
+ axes[0, 0].set_title("ДИНАМИКА ПОТОКА (заявок/мин)", fontsize=12, fontweight='bold')
177
+ axes[0, 0].set_xlabel("Минуты симуляции")
178
+
179
+ # 2. ЗАГРУЗКА СИСТЕМЫ
180
+ y_load = [v * 100 for v in data['load_history']]
181
+ axes[0, 1].fill_between(range(len(y_load)), y_load, color='#4cc9f0', alpha=0.3)
182
+ axes[0, 1].plot(range(len(y_load)), y_load, color='#4cc9f0', linewidth=2)
183
+ axes[0, 1].axhline(y=80, color='#f72585', linestyle='--', alpha=0.6)
184
+ axes[0, 1].set_xlim(0, 1440)
185
+ axes[0, 1].set_ylim(0, 110)
186
+ axes[0, 1].set_title(f"ЗАГРУЖЕННОСТЬ СПЕЦИАЛИСТОВ %: {y_load[-1]:.1f}%", fontsize=12, fontweight='bold')
187
+
188
+ # 3. HEATMAP И ЛЕГЕНДА (Возвращаем информативность)
189
+ states = np.array(data['specialist_states'])
190
+ cols = 20
191
+ rows = int(np.ceil(specialists_count / cols))
192
+ z = np.zeros((rows, cols))
193
+ for idx, val in enumerate(states[:rows * cols]):
194
+ z[idx // cols, idx % cols] = val
195
+
196
+ im = axes[1, 0].imshow(z, cmap='RdYlGn_r', aspect='auto', vmin=0, vmax=10)
197
+ axes[1, 0].set_title(f"МОНИТОРИНГ: {specialists_count} СПЕЦИАЛИСТОВ", fontsize=12, fontweight='bold')
198
+ axes[1, 0].axis('off')
199
+
200
+ # Добавляем текстовую легенду под хитмапом
201
+ legend_text = "Цвета: Зеленый (Свободен) → Желтый (3-5 мин) → Красный (8+ мин)"
202
+ axes[1, 0].text(0.5, -0.1, legend_text, ha='center', transform=axes[1, 0].transAxes, fontsize=10)
203
+
204
+ # --- 4. РАЗДЕЛЕННЫЕ ОЧЕРЕДИ И СТАТИСТИКА ---
205
+ ax_stat = axes[1, 1]
206
+ ax_stat.clear()
207
+ ax_stat.axis('off')
208
+
209
+ # Цвета для очередей (краснеют, если очередь > 50)
210
+ q_mod_color = '#991b1b' if data['queue'] > 50 else '#166534'
211
+ q_biz_color = '#991b1b' if data.get('business_queue', 0) > 50 else '#1e293b'
212
+
213
+ # Две надписи очередей сверху
214
+ ax_stat.text(0.25, 0.9, "ОЧЕРЕДЬ\n(МОДЕЛИ)", fontsize=10, ha='center', fontweight='bold')
215
+ ax_stat.text(0.25, 0.78, f"{data['queue']}", fontsize=26, ha='center', fontweight='bold', color=q_mod_color)
216
+
217
+ ax_stat.text(0.75, 0.9, "ОЧЕРЕДЬ\n(БИЗНЕС ПРАВИЛА)", fontsize=10, ha='center', fontweight='bold')
218
+ ax_stat.text(0.75, 0.78, f"{data.get('business_queue', 0)}", fontsize=26, ha='center', fontweight='bold',
219
+ color=q_biz_color)
220
+
221
+ # Сводная таблица ниже
222
+ cum = data['cumulative']
223
+ stats_text = (
224
+ f"Итоговые показатели к {data['time_str']}\n"
225
+ f"--------------------------------------\n"
226
+ f"ОБРАБОТАНО ВСЕГО: {cum['total_processed']}\n"
227
+ f"Авто-одобрено: {cum['auto_approved']}\n"
228
+ f"Авто-отказы: {cum['auto_declined']}\n"
229
+ f"Ручной разбор (модель): {cum['manual_processed']}\n"
230
+ f"Ручной разбор (бизнес правила): {cum['business_manual_processed']}\n"
231
+ f"--------------------------------------\n"
232
+ f"Используемая модель: {second_model_name}"
233
+ )
234
+
235
+ ax_stat.text(0.5, 0.3, stats_text, fontsize=10, fontfamily='monospace',
236
+ ha='center', va='center', transform=ax_stat.transAxes,
237
+ bbox=dict(facecolor='#f8f9fa', alpha=1, boxstyle='round,pad=1', edgecolor='#dee2e6'))
238
+
239
+ return axes.flatten()
240
+
241
+ ani = animation.FuncAnimation(fig, update, frames=len(frames), interval=1000 / fps)
242
+ tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
243
+
244
+ writer = animation.FFMpegWriter(fps=fps, bitrate=2000, extra_args=['-vcodec', 'libx264', '-pix_fmt', 'yuv420p'])
245
+ ani.save(tmp_file.name, writer=writer)
246
+ return tmp_file.name
app/simulation/visualization/plots.py ADDED
@@ -0,0 +1,374 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib.pyplot as plt
2
+ import matplotlib.animation as animation
3
+ import numpy as np
4
+
5
+
6
+
7
+ def minutes_to_time(minutes, start_time="00:00"):
8
+ """Преобразует минуты от старта в строку времени ЧЧ:ММ"""
9
+ start_hour, start_min = map(int, start_time.split(':'))
10
+ total_minutes = start_hour * 60 + start_min + minutes
11
+ hour = (total_minutes // 60) % 24
12
+ minute = total_minutes % 60
13
+ return f"{hour:02d}:{minute:02d}"
14
+
15
+
16
+ def plot_queue_dynamics(queue_history, business_queue_history=None, start_time="00:00"):
17
+ """
18
+ Два отдельных графика для очередей с временной шкалой ЧЧ:ММ
19
+ """
20
+ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
21
+
22
+ # Создаем метки времени для каждого часа
23
+ total_minutes = len(queue_history)
24
+ hours = range(0, total_minutes, 60) # каждый час
25
+ hour_labels = [minutes_to_time(m, start_time) for m in hours]
26
+
27
+ # График 1: Очередь моделей
28
+ ax1.plot(range(total_minutes), queue_history, 'b-', linewidth=1.5)
29
+ ax1.set_xticks(hours)
30
+ ax1.set_xticklabels(hour_labels, rotation=45)
31
+ ax1.set_xlabel('Время')
32
+ ax1.set_ylabel('Размер очереди')
33
+ ax1.set_title('Очередь моделей')
34
+ ax1.grid(True, alpha=0.3)
35
+
36
+ # График 2: Очередь бизнес-правил
37
+ if business_queue_history and len(business_queue_history) > 0:
38
+ ax2.plot(range(total_minutes), business_queue_history, 'orange', linewidth=1.5)
39
+ ax2.set_xticks(hours)
40
+ ax2.set_xticklabels(hour_labels, rotation=45)
41
+ ax2.set_xlabel('Время')
42
+ ax2.set_ylabel('Размер очереди')
43
+ ax2.set_title('Очередь бизнес-правил')
44
+ ax2.grid(True, alpha=0.3)
45
+ else:
46
+ ax2.text(0.5, 0.5, 'Нет данных', ha='center', va='center', transform=ax2.transAxes)
47
+ ax2.set_title('Очередь бизнес-правил')
48
+ ax2.set_xlabel('Время')
49
+
50
+ plt.tight_layout()
51
+ return plt
52
+
53
+
54
+ def plot_specialist_load(specialist_busy_history, specialists_count, start_time="00:00"):
55
+ """График загрузки специалистов с временной шкалой ЧЧ:ММ"""
56
+ load_percent = [busy / specialists_count * 100 for busy in specialist_busy_history]
57
+
58
+ fig, ax = plt.subplots(figsize=(10, 4))
59
+
60
+ total_minutes = len(load_percent)
61
+ hours = range(0, total_minutes, 60) # каждый час
62
+ hour_labels = [minutes_to_time(m, start_time) for m in hours]
63
+
64
+ ax.plot(range(total_minutes), load_percent, 'g-', linewidth=1.5)
65
+ ax.axhline(y=100, color='r', linestyle='--', alpha=0.5, label='Максимум')
66
+ ax.axhline(y=80, color='b', linestyle='--', alpha=0.5, label='Цель 80%')
67
+
68
+ ax.set_xticks(hours)
69
+ ax.set_xticklabels(hour_labels, rotation=45)
70
+ ax.set_xlabel('Время')
71
+ ax.set_ylabel('Загрузка (%)')
72
+ ax.set_title('Загрузка специалистов')
73
+ ax.legend()
74
+ ax.grid(True, alpha=0.3)
75
+ ax.set_ylim(0, 110)
76
+
77
+ plt.tight_layout()
78
+ return plt
79
+
80
+
81
+ def plot_inflow(minute_counts, start_time="00:00"):
82
+ """
83
+ График входящего потока заявок с заливкой под кривой
84
+ """
85
+ fig, ax = plt.subplots(figsize=(14, 5))
86
+
87
+ total_minutes = len(minute_counts)
88
+ minutes = range(total_minutes)
89
+
90
+ # Заливка под кривой (area plot)
91
+ ax.fill_between(minutes, minute_counts, alpha=0.3, color='blue', label='Общий поток')
92
+
93
+ # Основной график (линия поверх заливки)
94
+ ax.plot(minutes, minute_counts, 'b-', linewidth=1.5, alpha=0.8)
95
+
96
+ # Скользящее среднее
97
+ window = 30
98
+ if total_minutes > window:
99
+ smoothed = np.convolve(minute_counts, np.ones(window) / window, mode='valid')
100
+ ax.plot(range(window - 1, total_minutes), smoothed,
101
+ 'r-', linewidth=2.5, label=f'Среднее за 30 мин')
102
+
103
+ # Можно добавить заливку и для среднего (опционально)
104
+ # ax.fill_between(range(window - 1, total_minutes), smoothed, alpha=0.2, color='red')
105
+
106
+ # Метки времени
107
+ hours = range(0, total_minutes, 60)
108
+ hour_labels = [minutes_to_time(m, start_time) for m in hours]
109
+
110
+ ax.set_xticks(hours)
111
+ ax.set_xticklabels(hour_labels, rotation=45)
112
+ ax.set_xlabel('Время')
113
+ ax.set_ylabel('Количество заявок')
114
+ ax.set_title('Входящий поток заявок')
115
+ ax.legend()
116
+ ax.grid(True, alpha=0.3)
117
+
118
+ # Добавим горизонтальную линию среднего
119
+ mean_value = np.mean(minute_counts)
120
+ ax.axhline(y=mean_value, color='gray', linestyle='--', alpha=0.7,
121
+ label=f'Среднее: {mean_value:.1f}')
122
+
123
+ plt.tight_layout()
124
+ return plt
125
+
126
+
127
+ def minutes_to_time(minutes, start_time="00:00"):
128
+ """Преобразует минуты от старта в строку времени ЧЧ:ММ"""
129
+ start_hour, start_min = map(int, start_time.split(':'))
130
+ total_minutes = start_hour * 60 + start_min + minutes
131
+ hour = (total_minutes // 60) % 24
132
+ minute = total_minutes % 60
133
+ return f"{hour:02d}:{minute:02d}"
134
+
135
+
136
+ def plot_detailed_decisions(batch_stats, second_model_name="XGBoost", start_time="00:00"):
137
+ """
138
+ Набор графиков для каждого типа решений отдельно с временной шкалой ЧЧ:ММ
139
+ """
140
+ if not batch_stats:
141
+ return None
142
+
143
+ fig, axes = plt.subplots(3, 2, figsize=(14, 10))
144
+
145
+ times = [stat['time'] for stat in batch_stats] # минуты
146
+ total_minutes = max(times) if times else 0
147
+
148
+ # Метки времени каждый час
149
+ hours = range(0, total_minutes + 60, 60)
150
+ hour_labels = [minutes_to_time(m, start_time) for m in hours]
151
+
152
+ # 1. Бизнес-правила (ручной разбор)
153
+ axes[0, 0].plot(times, [stat['business_manual'] for stat in batch_stats],
154
+ 'r-', linewidth=1.5)
155
+ axes[0, 0].fill_between(times, 0, [stat['business_manual'] for stat in batch_stats],
156
+ alpha=0.2, color='red')
157
+ axes[0, 0].set_title('Ручной разбор: бизнес-правила', fontweight='bold')
158
+ axes[0, 0].set_xticks(hours)
159
+ axes[0, 0].set_xticklabels(hour_labels, rotation=45)
160
+ axes[0, 0].set_xlabel('Время')
161
+ axes[0, 0].set_ylabel('Заявок')
162
+ axes[0, 0].grid(True, alpha=0.3)
163
+
164
+ # 2. Бизнес-правила (авто отказ)
165
+ axes[0, 1].plot(times, [stat['business_auto'] for stat in batch_stats],
166
+ 'darkred', linewidth=1.5)
167
+ axes[0, 1].fill_between(times, 0, [stat['business_auto'] for stat in batch_stats],
168
+ alpha=0.2, color='darkred')
169
+ axes[0, 1].set_title('Авто отказ: бизнес-правила', fontweight='bold')
170
+ axes[0, 1].set_xticks(hours)
171
+ axes[0, 1].set_xticklabels(hour_labels, rotation=45)
172
+ axes[0, 1].set_xlabel('Время')
173
+ axes[0, 1].set_ylabel('Заявок')
174
+ axes[0, 1].grid(True, alpha=0.3)
175
+
176
+ # 3. LR уверенные решения
177
+ axes[1, 0].plot(times, [stat['lr_confident'] for stat in batch_stats],
178
+ 'blue', linewidth=1.5)
179
+ axes[1, 0].fill_between(times, 0, [stat['lr_confident'] for stat in batch_stats],
180
+ alpha=0.2, color='blue')
181
+ axes[1, 0].set_title('Уверенные решения: Logistic Regression', fontweight='bold')
182
+ axes[1, 0].set_xticks(hours)
183
+ axes[1, 0].set_xticklabels(hour_labels, rotation=45)
184
+ axes[1, 0].set_xlabel('Время')
185
+ axes[1, 0].set_ylabel('Заявок')
186
+ axes[1, 0].grid(True, alpha=0.3)
187
+
188
+ # 4. Вторая модель уверенные решения
189
+ axes[1, 1].plot(times, [stat['second_confident'] for stat in batch_stats],
190
+ 'green', linewidth=1.5)
191
+ axes[1, 1].fill_between(times, 0, [stat['second_confident'] for stat in batch_stats],
192
+ alpha=0.2, color='green')
193
+ axes[1, 1].set_title(f'Уверенные решения: {second_model_name}', fontweight='bold')
194
+ axes[1, 1].set_xticks(hours)
195
+ axes[1, 1].set_xticklabels(hour_labels, rotation=45)
196
+ axes[1, 1].set_xlabel('Время')
197
+ axes[1, 1].set_ylabel('Заявок')
198
+ axes[1, 1].grid(True, alpha=0.3)
199
+
200
+ # 5. Ручной разбор от моделей
201
+ axes[2, 0].plot(times, [stat['second_uncertain'] for stat in batch_stats],
202
+ 'orange', linewidth=1.5)
203
+ axes[2, 0].fill_between(times, 0, [stat['second_uncertain'] for stat in batch_stats],
204
+ alpha=0.2, color='orange')
205
+ axes[2, 0].set_title('Ручной разбор: модели неуверенны', fontweight='bold')
206
+ axes[2, 0].set_xticks(hours)
207
+ axes[2, 0].set_xticklabels(hour_labels, rotation=45)
208
+ axes[2, 0].set_xlabel('Время')
209
+ axes[2, 0].set_ylabel('Заявок')
210
+ axes[2, 0].grid(True, alpha=0.3)
211
+
212
+ # 6. Сравнительный график
213
+ axes[2, 1].plot(times, [stat['business_manual'] for stat in batch_stats],
214
+ 'r-', linewidth=1.5, label='Бизнес-правила', alpha=0.7)
215
+ axes[2, 1].plot(times, [stat['second_uncertain'] for stat in batch_stats],
216
+ 'orange', linewidth=1.5, label='Модели неуверенны', alpha=0.7)
217
+ axes[2, 1].set_title('Сравнение источников ручного разбора', fontweight='bold')
218
+ axes[2, 1].set_xticks(hours)
219
+ axes[2, 1].set_xticklabels(hour_labels, rotation=45)
220
+ axes[2, 1].set_xlabel('Время')
221
+ axes[2, 1].set_ylabel('Заявок')
222
+ axes[2, 1].legend()
223
+ axes[2, 1].grid(True, alpha=0.3)
224
+
225
+ plt.suptitle('Детальный анализ решений', fontsize=14, fontweight='bold')
226
+ plt.tight_layout()
227
+ return plt
228
+
229
+ def plot_parameters_history(pid_history, second_model_name="XGBoost", start_time="00:00"):
230
+ """График изменения параметров регулятора"""
231
+ if pid_history is None or pid_history.empty:
232
+ return None
233
+
234
+ fig, axes = plt.subplots(3, 1, figsize=(12, 12))
235
+
236
+ total_minutes = len(pid_history)
237
+ times = range(total_minutes)
238
+
239
+ # Метки времени
240
+ hours = range(0, total_minutes, 60)
241
+ hour_labels = [minutes_to_time(m, start_time) for m in hours]
242
+
243
+ # 1. Отступы LR
244
+ axes[0].plot(times, pid_history['lr_low'], 'g-', linewidth=2, label='LR Low')
245
+ axes[0].plot(times, pid_history['lr_high'], 'r-', linewidth=2, label='LR High')
246
+ axes[0].set_ylabel('Отступ')
247
+ axes[0].set_title('Отступы Logistic Regression')
248
+ axes[0].legend()
249
+ axes[0].grid(True, alpha=0.3)
250
+ axes[0].set_xticks(hours)
251
+ axes[0].set_xticklabels(hour_labels, rotation=45)
252
+
253
+ # 2. Отступы второй модели (с именем из параметра)
254
+ axes[1].plot(times, pid_history['second_low'], 'g-', linewidth=2, label=f'{second_model_name} Low')
255
+ axes[1].plot(times, pid_history['second_high'], 'r-', linewidth=2, label=f'{second_model_name} High')
256
+ axes[1].set_ylabel('Отступ')
257
+ axes[1].set_title(f'Отступы {second_model_name}')
258
+ axes[1].legend()
259
+ axes[1].grid(True, alpha=0.3)
260
+ axes[1].set_xticks(hours)
261
+ axes[1].set_xticklabels(hour_labels, rotation=45)
262
+
263
+ # 3. Ошибка загрузки и выход регулятора
264
+ axes[2].plot(times, pid_history['error_load'], 'b-', label='Error load', alpha=0.7, linewidth=1.5)
265
+ axes[2].plot(times, pid_history['output'], 'r-', label='Output', linewidth=2, alpha=0.7)
266
+ axes[2].axhline(y=0, color='black', linestyle='-', linewidth=0.5)
267
+ axes[2].set_xlabel('Время')
268
+ axes[2].set_ylabel('Значение')
269
+ axes[2].set_title('Ошибка загрузки и выход регулятора')
270
+ axes[2].legend()
271
+ axes[2].grid(True, alpha=0.3)
272
+ axes[2].set_xticks(hours)
273
+ axes[2].set_xticklabels(hour_labels, rotation=45)
274
+
275
+ plt.tight_layout()
276
+ return plt
277
+
278
+
279
+ # def plot_summary(processor):
280
+ # """Сводный дашборд"""
281
+ # fig, axes = plt.subplots(2, 3, figsize=(15, 10))
282
+ #
283
+ # stats = processor.stats
284
+ #
285
+ # # 1. Динамика очередей
286
+ # axes[0, 0].plot(stats['queue_history'], 'b-', linewidth=1.5, label='Очередь моделей')
287
+ # if 'business_queue_history' in stats:
288
+ # axes[0, 0].plot(stats['business_queue_history'], 'orange', linewidth=1.5, label='Очередь бизнес-правил')
289
+ # axes[0, 0].set_title('Динамика очередей')
290
+ # axes[0, 0].set_xlabel('Минута')
291
+ # axes[0, 0].set_ylabel('Заявок')
292
+ # axes[0, 0].legend()
293
+ # axes[0, 0].grid(True, alpha=0.3)
294
+ #
295
+ # # 2. Загрузка специалистов (модели)
296
+ # load = [b / processor.specialists_count * 100 for b in stats['specialist_busy']]
297
+ # axes[0, 1].plot(load, 'g-', linewidth=1.5, label='Основные специалисты')
298
+ # axes[0, 1].axhline(y=100, color='r', linestyle='--', alpha=0.5, label='Максимум')
299
+ # if hasattr(processor, 'target_load'):
300
+ # axes[0, 1].axhline(y=processor.target_load * 100, color='b', linestyle='--',
301
+ # alpha=0.5, label=f'Цель {processor.target_load * 100:.0f}%')
302
+ # axes[0, 1].set_title('Загрузка специалистов (модели)')
303
+ # axes[0, 1].set_xlabel('Минута')
304
+ # axes[0, 1].set_ylabel('%')
305
+ # axes[0, 1].legend()
306
+ # axes[0, 1].grid(True, alpha=0.3)
307
+ #
308
+ # # 3. Загрузка экспертов
309
+ # if 'business_specialist_busy' in stats and stats['business_specialist_busy']:
310
+ # business_load = [b / processor.business_specialists_count * 100 for b in stats['business_specialist_busy']]
311
+ # axes[1, 0].plot(business_load, 'orange', linewidth=1.5, label='Эксперты')
312
+ # axes[1, 0].axhline(y=100, color='r', linestyle='--', alpha=0.5, label='Максимум')
313
+ # axes[1, 0].set_title('Загрузка экспертов (бизнес-правила)')
314
+ # axes[1, 0].set_xlabel('Минута')
315
+ # axes[1, 0].set_ylabel('%')
316
+ # axes[1, 0].legend()
317
+ # axes[1, 0].grid(True, alpha=0.3)
318
+ # else:
319
+ # axes[1, 0].text(0.5, 0.5, 'Нет данных по экспертам', ha='center', va='center')
320
+ # axes[1, 0].set_title('Загрузка экспертов')
321
+ #
322
+ # # 4. Распределение решений
323
+ # sizes = [
324
+ # stats['auto_approved'],
325
+ # stats['auto_declined'],
326
+ # stats['manual_processed'],
327
+ # stats.get('business_manual_processed', 0)
328
+ # ]
329
+ # labels = ['Одобрено авто', 'Отказ авто', 'Ручной (модели)', 'Ручной (бизнес)']
330
+ # colors = ['#2ecc71', '#e74c3c', '#3498db', '#f39c12']
331
+ #
332
+ # if sum(sizes) > 0:
333
+ # wedges, texts, autotexts = axes[1, 1].pie(sizes, labels=labels, autopct='%1.1f%%',
334
+ # colors=colors, startangle=90)
335
+ # for autotext in autotexts:
336
+ # autotext.set_color('white')
337
+ # autotext.set_fontweight('bold')
338
+ # axes[1, 1].set_title('Итоговые решения')
339
+ #
340
+ # # 5. Ключевые метрики (освободилось место)
341
+ # total = stats['total_processed']
342
+ # if total > 0:
343
+ # avg_wait = np.mean(stats['wait_times']) if stats['wait_times'] else 0
344
+ # avg_business_wait = np.mean(stats.get('business_wait_times', [0])) if stats.get('business_wait_times') else 0
345
+ #
346
+ # metrics_text = f"""
347
+ # Всего заявок: {total:,}
348
+ # Одобрено авто: {stats['auto_approved']:,} ({stats['auto_approved'] / total * 100:.1f}%)
349
+ # Отказ авто: {stats['auto_declined']:,} ({stats['auto_declined'] / total * 100:.1f}%)
350
+ #
351
+ # Ручной разбор (модели): {stats['manual_processed']:,} ({stats['manual_processed'] / total * 100:.1f}%)
352
+ # Ручной разбор (бизнес): {stats.get('business_manual_processed', 0):,}
353
+ #
354
+ # Среднее время ожидания (модели): {avg_wait:.1f} мин
355
+ # Среднее время ожидания (бизнес): {avg_business_wait:.1f} мин
356
+ #
357
+ # Средняя загрузка специалистов: {np.mean(load):.1f}%
358
+ # """
359
+ # else:
360
+ # metrics_text = "Нет данных"
361
+ #
362
+ # axes[0, 2].text(0.1, 0.5, metrics_text, transform=axes[0, 2].transAxes,
363
+ # fontsize=10, verticalalignment='center', fontfamily='monospace')
364
+ # axes[0, 2].axis('off')
365
+ # axes[0, 2].set_title('Ключевые метрики')
366
+ #
367
+ # # 6. Пустой график или можно что-то еще
368
+ # axes[1, 2].axis('off')
369
+ #
370
+ # plt.suptitle('Сводная статистика симуляции', fontsize=14, fontweight='bold')
371
+ # plt.tight_layout()
372
+ # return plt
373
+
374
+
app/simulation/visualization/simulation_20:11.gif ADDED
app/simulation/visualization/simulation_20:19.gif ADDED
app/simulation/visualization/simulation_20:25.gif ADDED
app/simulation/visualization/simulation_20:30.gif ADDED
app/utils/__pycache__/credit_preprocessor.cpython-311.pyc ADDED
Binary file (14.5 kB). View file
 
app/utils/__pycache__/data_loader.cpython-311.pyc ADDED
Binary file (1.51 kB). View file
 
app/utils/credit_preprocessor.py ADDED
@@ -0,0 +1,329 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import joblib
4
+ from sklearn.base import BaseEstimator, TransformerMixin
5
+
6
+
7
+ class CreditDataPreprocessor(BaseEstimator, TransformerMixin):
8
+
9
+ # Полный препроцессинг данных
10
+
11
+ def __init__(self,
12
+ NumberOfDependents_fill_value=0,
13
+ NumberOfDependents_up_threshold=10,
14
+ MonthlyIncome_fill_value=0,
15
+ RevolvingUtilizationOfUnsecuredLines_drop_threshold=2,
16
+ age_low_drop_threshold=18,
17
+ age_up_drop_threshold=80,
18
+ DebtRatio_up_threshold=5,
19
+ PastDueRiskScore_weights=[1.0, 1.2, 1.3],
20
+ NumberRealEstateLoansOrLines_drop_threshold=20,
21
+ drop_special_codes=False):
22
+ self.NumberOfDependents_fill_value = NumberOfDependents_fill_value
23
+ self.NumberOfDependents_up_threshold = NumberOfDependents_up_threshold
24
+
25
+ self.MonthlyIncome_fill_value = MonthlyIncome_fill_value
26
+
27
+ self.RevolvingUtilizationOfUnsecuredLines_drop_threshold = RevolvingUtilizationOfUnsecuredLines_drop_threshold
28
+
29
+ self.age_low_drop_threshold = age_low_drop_threshold
30
+ self.age_up_drop_threshold = age_up_drop_threshold
31
+
32
+ self.DebtRatio_up_threshold = DebtRatio_up_threshold
33
+
34
+ self.PastDueRiskScore_weights = PastDueRiskScore_weights
35
+
36
+ self.NumberRealEstateLoansOrLines_drop_threshold = NumberRealEstateLoansOrLines_drop_threshold
37
+
38
+ self.drop_special_codes = drop_special_codes
39
+
40
+ def fit(self, X, y=None):
41
+ return self
42
+
43
+ def transform(self, X):
44
+ X_copy = X.copy()
45
+
46
+ X_copy['NumberOfDependents'] = X_copy['NumberOfDependents'].fillna(value=self.NumberOfDependents_fill_value)
47
+ X_copy['NumberOfDependents'] = X_copy['NumberOfDependents'].clip(0, self.NumberOfDependents_up_threshold).copy()
48
+
49
+ X_copy['MonthlyIncomeIsMissing'] = 0
50
+ X_copy.loc[X_copy['MonthlyIncome'].isna(), 'MonthlyIncomeIsMissing'] = 1
51
+ X_copy['MonthlyIncome'] = X['MonthlyIncome'].fillna(value=self.MonthlyIncome_fill_value)
52
+
53
+ X_copy['RevolvingUtilizationOverOne'] = 0.0
54
+ X_copy.loc[X_copy['RevolvingUtilizationOfUnsecuredLines'] > 1, 'RevolvingUtilizationOverOne'] = 1.0
55
+ X_copy['RevolvingUtilizationOfUnsecuredLines'] = X_copy['RevolvingUtilizationOfUnsecuredLines'].clip(0,
56
+ 1).copy()
57
+
58
+ X_copy['DebtPayments'] = 0.0
59
+ X_copy.loc[X_copy['MonthlyIncome'] == 0, 'DebtPayments'] = X_copy.loc[X_copy['MonthlyIncome'] == 0, 'DebtRatio']
60
+ X_copy.loc[X_copy['MonthlyIncome'] != 0, 'DebtPayments'] = X_copy.loc[
61
+ X_copy['MonthlyIncome'] != 0, 'DebtRatio'] * \
62
+ X_copy.loc[
63
+ X_copy['MonthlyIncome'] != 0, 'MonthlyIncome']
64
+ X_copy['DebtRatio'] = X_copy['DebtRatio'].clip(0, self.DebtRatio_up_threshold).copy()
65
+
66
+ X_copy['DebtPayments_over_10k'] = 0.0
67
+ X_copy.loc[X_copy['DebtPayments'] > 10000, 'DebtPayments_over_10k'] = 1.0
68
+ X_copy['DebtPayments'] = X_copy['DebtPayments'].clip(0, 10000).copy()
69
+
70
+ X_copy['MonthlyIncome_over_20k'] = 0.0
71
+ X_copy.loc[X_copy['MonthlyIncome'] >= 20000, 'MonthlyIncome_over_20k'] = 1.0
72
+ X_copy['MonthlyIncome'] = X_copy['MonthlyIncome'].clip(0, 20000)
73
+
74
+ X_copy['Code96'] = 0.0
75
+ X_copy['Code98'] = 0.0
76
+ X_copy.loc[X_copy['NumberOfTime30-59DaysPastDueNotWorse'] == 96, 'Code96'] = 1.0
77
+ X_copy.loc[X_copy['NumberOfTime30-59DaysPastDueNotWorse'] == 98, 'Code98'] = 1.0
78
+
79
+ X_copy['PastDueRiskScore'] = (
80
+ self.PastDueRiskScore_weights[0] * X_copy['NumberOfTime30-59DaysPastDueNotWorse'] +
81
+ self.PastDueRiskScore_weights[1] * X_copy['NumberOfTime60-89DaysPastDueNotWorse'] +
82
+ self.PastDueRiskScore_weights[2] * X_copy['NumberOfTimes90DaysLate'])
83
+ X_copy.loc[X_copy['NumberOfTime30-59DaysPastDueNotWorse'] == 96, 'PastDueRiskScore'] = 96
84
+ X_copy.loc[X_copy['NumberOfTime30-59DaysPastDueNotWorse'] == 98, 'PastDueRiskScore'] = 98
85
+ X_copy = X_copy.drop(columns=['NumberOfTime30-59DaysPastDueNotWorse', 'NumberOfTime60-89DaysPastDueNotWorse',
86
+ 'NumberOfTimes90DaysLate'])
87
+
88
+ X_copy['NumberOfOpenCreditLinesAndLoans_over_30'] = 0.0
89
+ X_copy.loc[X_copy['NumberOfOpenCreditLinesAndLoans'] > 30, 'NumberOfOpenCreditLinesAndLoans_over_30'] = 1.0
90
+ X_copy['NumberOfOpenCreditLinesAndLoans'] = X_copy['NumberOfOpenCreditLinesAndLoans'].clip(0, 30).copy()
91
+
92
+ X_copy['NumberRealEstateLoansOrLines_over_5'] = 0.0
93
+ X_copy.loc[X_copy['NumberRealEstateLoansOrLines'] > 5, 'NumberRealEstateLoansOrLines_over_5'] = 1.0
94
+ X_copy['NumberRealEstateLoansOrLines'] = X_copy['NumberRealEstateLoansOrLines'].clip(0, 5).copy()
95
+
96
+ X_copy['ConsumerCredit_Group'] = pd.cut(X_copy['NumberOfOpenCreditLinesAndLoans'],
97
+ bins=[0, 1, 2, 6, 15, 31],
98
+ labels=[
99
+ '0_loans',
100
+ '1_loans',
101
+ '2-5_loans',
102
+ '6-14_loans',
103
+ '16-30_loans'
104
+ ])
105
+ consumer_dummy = pd.get_dummies(X_copy['ConsumerCredit_Group'], prefix='Consumer', drop_first=False).astype(
106
+ 'float')
107
+
108
+ X_copy['RealEstateLoans_Group'] = pd.cut(X_copy['NumberRealEstateLoansOrLines'],
109
+ bins=[-1, 0, 3, 100],
110
+ labels=[
111
+ '0_loans',
112
+ '1-3_loans',
113
+ '4+_loans',
114
+ ])
115
+ estate_dummy = pd.get_dummies(X_copy['RealEstateLoans_Group'], prefix='RealEstateLoans',
116
+ drop_first=False).astype('float')
117
+
118
+ X_copy = pd.concat([X_copy, consumer_dummy, estate_dummy], axis=1).copy()
119
+ X_copy = X_copy.drop(columns=['ConsumerCredit_Group',
120
+ 'RealEstateLoans_Group']).copy()
121
+
122
+ X_copy = X_copy.drop(columns=['Consumer_6-14_loans',
123
+ 'RealEstateLoans_0_loans']).copy()
124
+
125
+ X_copy = X_copy.drop(columns=['NumberOfOpenCreditLinesAndLoans',
126
+ 'NumberRealEstateLoansOrLines',
127
+ 'MonthlyIncomeIsMissing',
128
+ 'MonthlyIncome_over_20k',
129
+ 'Consumer_0_loans',
130
+ 'NumberOfOpenCreditLinesAndLoans_over_30']).copy()
131
+
132
+ if self.drop_special_codes:
133
+ X_copy = X_copy.drop(columns=['Code96', 'Code98'])
134
+
135
+ return X_copy
136
+
137
+
138
+ def fit_transform(self, X, y=None):
139
+ return self.fit(X, y).transform(X)
140
+
141
+ def clean_train(self, X, y=None):
142
+ mask = (
143
+ (X[
144
+ 'RevolvingUtilizationOfUnsecuredLines'] <= self.RevolvingUtilizationOfUnsecuredLines_drop_threshold) &
145
+ (X['age'] >= self.age_low_drop_threshold) &
146
+ (X['age'] <= self.age_up_drop_threshold) &
147
+ (X['NumberRealEstateLoansOrLines'] <= self.NumberRealEstateLoansOrLines_drop_threshold)
148
+ )
149
+
150
+ X_clean = X[mask].copy()
151
+
152
+ if y is not None:
153
+ y_clean = y[mask].copy()
154
+ return X_clean, y_clean
155
+
156
+ return X_clean
157
+
158
+ from sklearn.base import BaseEstimator, TransformerMixin
159
+ from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler, MaxAbsScaler
160
+
161
+ class CreditScaler(BaseEstimator, TransformerMixin):
162
+ """
163
+ Масштабирует только не-булевые колонки.
164
+ Можно задать различные способы масштабирования
165
+ """
166
+
167
+ def __init__(self, scaler_type='standard'):
168
+ """
169
+ Параметр scaler_type - тип scaler'а.
170
+
171
+ Доступные типы:
172
+ - 'standard': StandardScaler (среднее=0, дисперсия=1)
173
+ - 'robust': RobustScaler (устойчив к выбросам)
174
+ - 'minmax': MinMaxScaler (приводит к [0, 1])
175
+ - 'maxabs': MaxAbsScaler (приводит к [-1, 1])
176
+ """
177
+
178
+ self.boolean_columns = [
179
+ 'RevolvingUtilizationOverOne',
180
+ 'DebtPayments_over_10k',
181
+ 'Code96',
182
+ 'Code98',
183
+ 'NumberRealEstateLoansOrLines_over_5',
184
+ 'Consumer_1_loans',
185
+ 'Consumer_2-5_loans',
186
+ 'Consumer_16-30_loans',
187
+ 'RealEstateLoans_1-3_loans',
188
+ 'RealEstateLoans_4+_loans'
189
+ ]
190
+
191
+ self.scaler_type = scaler_type
192
+ self._create_scaler()
193
+
194
+ # Эти переменные заполнятся во время fit
195
+ self.columns_to_scale_ = None
196
+ self.n_features_in_ = None
197
+ self.feature_names_in_ = None
198
+
199
+ def _create_scaler(self):
200
+ """Создает scaler по типу"""
201
+ if self.scaler_type == 'standard':
202
+ self.scaler = StandardScaler()
203
+ elif self.scaler_type == 'robust':
204
+ self.scaler = RobustScaler()
205
+ elif self.scaler_type == 'minmax':
206
+ self.scaler = MinMaxScaler()
207
+ elif self.scaler_type == 'maxabs':
208
+ self.scaler = MaxAbsScaler()
209
+ else:
210
+ raise ValueError(
211
+ f"Unknown scaler_type: {self.scaler_type}. "
212
+ f"Available: standard, robust, minmax, maxabs"
213
+ )
214
+
215
+ def fit(self, X, y=None):
216
+ """
217
+ Определяет колонки для масштабирования (все, кроме булевых)
218
+ и обучает scaler.
219
+ """
220
+
221
+ self.feature_names_in_ = X.columns.tolist()
222
+ self.n_features_in_ = len(self.feature_names_in_)
223
+
224
+ self.columns_to_scale_ = [
225
+ col for col in self.feature_names_in_
226
+ if col not in self.boolean_columns
227
+ ]
228
+
229
+ self.scaler.fit(X[self.columns_to_scale_])
230
+ return self
231
+
232
+ def transform(self, X, y=None):
233
+ """
234
+ Масштабирует только не-булевы колонки.
235
+ """
236
+ X_copy = X.copy()
237
+
238
+ X_copy[self.columns_to_scale_] = self.scaler.transform(X_copy[self.columns_to_scale_])
239
+
240
+ return X_copy
241
+
242
+ def fit_transform(self, X, y=None):
243
+ return self.fit(X, y).transform(X, y)
244
+
245
+ def get_feature_names_out(self, input_features=None):
246
+ """Для совместимости с sklearn"""
247
+ if input_features is not None:
248
+ return input_features
249
+ return self.feature_names_in_ if self.feature_names_in_ is not None else []
250
+
251
+ def set_params(self, **params):
252
+ """Для совместимости с GridSearchCV"""
253
+ if 'scaler_type' in params and params['scaler_type'] != self.scaler_type:
254
+ self.scaler_type = params['scaler_type']
255
+ self._create_scaler()
256
+ return super().set_params(**params)
257
+
258
+
259
+ def check_business_rules(age, monthly_income, monthly_debt, debt_ratio,
260
+ late_90, late_60_89, late_30_59, credit_lines,
261
+ real_estate, utilization, dependents):
262
+
263
+ # КРИТИЧЕСКИЕ ПРАВИЛА - сразу отказ
264
+ if age < 18:
265
+ return {
266
+ 'needs_manual': False,
267
+ 'message': 'Возраст менее 18 лет - кредит не выдаётся',
268
+ 'decision': 1 # отказ
269
+ }
270
+
271
+ # СПЕЦИАЛЬНЫЕ БАНКОВСКИЕ КОДЫ - сразу ручной разбор
272
+ if (late_90 == 98) or (late_60_89 == 98) or (late_30_59 == 98):
273
+ return {
274
+ 'needs_manual': True,
275
+ 'message': 'Код 98: Списание долга как безнадежного',
276
+ 'decision': None
277
+ }
278
+
279
+ if (late_90 == 96) or (late_60_89 == 96) or (late_30_59 == 96):
280
+ return {
281
+ 'needs_manual': True,
282
+ 'message': 'Код 96: Изъятие залога или реализация имущества',
283
+ 'decision': None
284
+ }
285
+
286
+ # КРИТИЧЕСКИЕ ПРАВИЛА - сразу ручной разбор
287
+ if age > 80:
288
+ return {
289
+ 'needs_manual': True,
290
+ 'message': 'Возраст > 80 лет - требуется ручной разбор (индивидуальные условия)',
291
+ 'decision': None
292
+ }
293
+
294
+ if monthly_income > 1000000:
295
+ return {
296
+ 'needs_manual': True,
297
+ 'message': 'Доход свыше 1,000,000 $ - требуется ручной разбор',
298
+ 'decision': None
299
+ }
300
+
301
+ if monthly_debt > 1000000:
302
+ return {
303
+ 'needs_manual': True,
304
+ 'message': 'Платежи свыше 1,000,000 $ - требуется ручной разбор',
305
+ 'decision': None
306
+ }
307
+
308
+ if utilization > 2:
309
+ return {
310
+ 'needs_manual': True,
311
+ 'message': 'Использование кредитных средств превышает 200%',
312
+ 'decision': None
313
+ }
314
+
315
+ if real_estate > 20:
316
+ return {
317
+ 'needs_manual': True,
318
+ 'message': 'Количество кредитов под залог недвижимости слишком велико - ручной разбор',
319
+ 'decision': None
320
+ }
321
+
322
+ # 4. ВСЕ ПРОВЕРКИ ПРОЙДЕНЫ - допуск к авторазбору моделью
323
+ return {
324
+ 'needs_manual': False,
325
+ 'decision': None,
326
+ }
327
+
328
+
329
+
app/utils/data_loader.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import joblib
3
+ import os
4
+
5
+
6
+ @st.cache_resource
7
+ def load_artifacts(models_path, preprocessor_path):
8
+ """Загрузка препроцессоров и моделей"""
9
+ preprocessor = joblib.load(os.path.join(preprocessor_path, 'preprocessor_150.pkl'))
10
+ scaler = joblib.load(os.path.join(preprocessor_path, 'scaler_150.pkl'))
11
+
12
+ models = {}
13
+ model_files = {
14
+ 'Logistic Regression': 'logreg_150_model.pkl',
15
+ 'XGBoost': 'xgb_150_model.pkl',
16
+ 'LightGBM': 'lgbm_150_model.pkl',
17
+ 'CatBoost': 'catboost_150_model.pkl',
18
+ 'Random Forest': 'rfc_150_model.pkl'
19
+ }
20
+
21
+ for name, filename in model_files.items():
22
+ path = os.path.join(models_path, filename)
23
+ if os.path.exists(path):
24
+ models[name] = joblib.load(path)
25
+
26
+ return preprocessor, scaler, models
catboost_info/catboost_training.json ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "meta":{"test_sets":[],"test_metrics":[],"learn_metrics":[{"best_value":"Min","name":"Logloss"}],"launch_mode":"Train","parameters":"","iteration_count":100,"learn_sets":["learn"],"name":"experiment"},
3
+ "iterations":[
4
+ {"learn":[0.6582255385],"iteration":0,"passed_time":0.1064302509,"remaining_time":10.53659484},
5
+ {"learn":[0.628929721],"iteration":1,"passed_time":0.1525804859,"remaining_time":7.476443808},
6
+ {"learn":[0.6055823656],"iteration":2,"passed_time":0.1890582018,"remaining_time":6.112881857},
7
+ {"learn":[0.585745295],"iteration":3,"passed_time":0.2252067115,"remaining_time":5.404961076},
8
+ {"learn":[0.5691497866],"iteration":4,"passed_time":0.2406276923,"remaining_time":4.571926155},
9
+ {"learn":[0.5553995801],"iteration":5,"passed_time":0.2564603788,"remaining_time":4.017879268},
10
+ {"learn":[0.5431466822],"iteration":6,"passed_time":0.2927427626,"remaining_time":3.889296704},
11
+ {"learn":[0.5321745596],"iteration":7,"passed_time":0.326945827,"remaining_time":3.75987701},
12
+ {"learn":[0.5230197248],"iteration":8,"passed_time":0.3622708005,"remaining_time":3.662960316},
13
+ {"learn":[0.5150673326],"iteration":9,"passed_time":0.3964141569,"remaining_time":3.567727412},
14
+ {"learn":[0.5085723427],"iteration":10,"passed_time":0.4321486694,"remaining_time":3.496475598},
15
+ {"learn":[0.5029521178],"iteration":11,"passed_time":0.4666833149,"remaining_time":3.422344309},
16
+ {"learn":[0.4982952699],"iteration":12,"passed_time":0.4887040018,"remaining_time":3.270557551},
17
+ {"learn":[0.4940193081],"iteration":13,"passed_time":0.5244568892,"remaining_time":3.221663748},
18
+ {"learn":[0.4903079864],"iteration":14,"passed_time":0.5588645355,"remaining_time":3.166899035},
19
+ {"learn":[0.4877126951],"iteration":15,"passed_time":0.5737366867,"remaining_time":3.012117605},
20
+ {"learn":[0.4849442288],"iteration":16,"passed_time":0.594951629,"remaining_time":2.904763836},
21
+ {"learn":[0.4823937275],"iteration":17,"passed_time":0.6301627283,"remaining_time":2.870741318},
22
+ {"learn":[0.4803765605],"iteration":18,"passed_time":0.6663060298,"remaining_time":2.840567811},
23
+ {"learn":[0.4781523185],"iteration":19,"passed_time":0.687587555,"remaining_time":2.75035022},
24
+ {"learn":[0.4767059201],"iteration":20,"passed_time":0.7219467433,"remaining_time":2.715894891},
25
+ {"learn":[0.475163465],"iteration":21,"passed_time":0.7562504736,"remaining_time":2.681251679},
26
+ {"learn":[0.4741219727],"iteration":22,"passed_time":0.777308792,"remaining_time":2.602294651},
27
+ {"learn":[0.473008657],"iteration":23,"passed_time":0.8123926421,"remaining_time":2.5725767},
28
+ {"learn":[0.4722618181],"iteration":24,"passed_time":0.8479381142,"remaining_time":2.543814343},
29
+ {"learn":[0.4714753648],"iteration":25,"passed_time":0.8643292969,"remaining_time":2.460014153},
30
+ {"learn":[0.470390758],"iteration":26,"passed_time":0.904391114,"remaining_time":2.445205605},
31
+ {"learn":[0.4696201438],"iteration":27,"passed_time":0.9384147212,"remaining_time":2.413066426},
32
+ {"learn":[0.46859249],"iteration":28,"passed_time":0.9729809498,"remaining_time":2.382125774},
33
+ {"learn":[0.4677104182],"iteration":29,"passed_time":1.007459887,"remaining_time":2.350739737},
34
+ {"learn":[0.4673133885],"iteration":30,"passed_time":1.019293767,"remaining_time":2.268750643},
35
+ {"learn":[0.4667841252],"iteration":31,"passed_time":1.031243396,"remaining_time":2.191392216},
36
+ {"learn":[0.4664536674],"iteration":32,"passed_time":1.066954367,"remaining_time":2.166240684},
37
+ {"learn":[0.4662622609],"iteration":33,"passed_time":1.076521387,"remaining_time":2.089717986},
38
+ {"learn":[0.466086662],"iteration":34,"passed_time":1.088162018,"remaining_time":2.020872318},
39
+ {"learn":[0.4657380808],"iteration":35,"passed_time":1.122871037,"remaining_time":1.996215177},
40
+ {"learn":[0.4651284039],"iteration":36,"passed_time":1.157755597,"remaining_time":1.971313584},
41
+ {"learn":[0.4646531445],"iteration":37,"passed_time":1.195417263,"remaining_time":1.95041764},
42
+ {"learn":[0.4641257326],"iteration":38,"passed_time":1.230054783,"remaining_time":1.92393184},
43
+ {"learn":[0.4637898175],"iteration":39,"passed_time":1.246455715,"remaining_time":1.869683573},
44
+ {"learn":[0.4633285186],"iteration":40,"passed_time":1.280777487,"remaining_time":1.843070043},
45
+ {"learn":[0.4628428368],"iteration":41,"passed_time":1.315556006,"remaining_time":1.816720199},
46
+ {"learn":[0.4626124403],"iteration":42,"passed_time":1.349455031,"remaining_time":1.788812483},
47
+ {"learn":[0.4623858706],"iteration":43,"passed_time":1.362016739,"remaining_time":1.73347585},
48
+ {"learn":[0.4621286714],"iteration":44,"passed_time":1.398965243,"remaining_time":1.709846409},
49
+ {"learn":[0.4617974001],"iteration":45,"passed_time":1.433543555,"remaining_time":1.682855478},
50
+ {"learn":[0.4614775166],"iteration":46,"passed_time":1.466964625,"remaining_time":1.654236705},
51
+ {"learn":[0.46142579],"iteration":47,"passed_time":1.479586041,"remaining_time":1.602884878},
52
+ {"learn":[0.4614011205],"iteration":48,"passed_time":1.486797243,"remaining_time":1.547482845},
53
+ {"learn":[0.4611845342],"iteration":49,"passed_time":1.521749302,"remaining_time":1.521749302},
54
+ {"learn":[0.4609852804],"iteration":50,"passed_time":1.557818146,"remaining_time":1.496727238},
55
+ {"learn":[0.4604321277],"iteration":51,"passed_time":1.596752012,"remaining_time":1.473924934},
56
+ {"learn":[0.4601645791],"iteration":52,"passed_time":1.646375224,"remaining_time":1.459993123},
57
+ {"learn":[0.459804458],"iteration":53,"passed_time":1.683746309,"remaining_time":1.434302411},
58
+ {"learn":[0.4592589475],"iteration":54,"passed_time":1.71827358,"remaining_time":1.405860201},
59
+ {"learn":[0.4589643366],"iteration":55,"passed_time":1.752749892,"remaining_time":1.377160629},
60
+ {"learn":[0.4585201818],"iteration":56,"passed_time":1.787086539,"remaining_time":1.348153003},
61
+ {"learn":[0.4582657803],"iteration":57,"passed_time":1.822507553,"remaining_time":1.319746849},
62
+ {"learn":[0.4580557799],"iteration":58,"passed_time":1.844325617,"remaining_time":1.281650005},
63
+ {"learn":[0.457864554],"iteration":59,"passed_time":1.877865186,"remaining_time":1.251910124},
64
+ {"learn":[0.4576167412],"iteration":60,"passed_time":1.915169063,"remaining_time":1.224452351},
65
+ {"learn":[0.4575874936],"iteration":61,"passed_time":1.924591958,"remaining_time":1.17958862},
66
+ {"learn":[0.457362279],"iteration":62,"passed_time":1.960633385,"remaining_time":1.151483099},
67
+ {"learn":[0.4572946663],"iteration":63,"passed_time":1.972484848,"remaining_time":1.109522727},
68
+ {"learn":[0.4569830294],"iteration":64,"passed_time":2.007666031,"remaining_time":1.08105094},
69
+ {"learn":[0.456610445],"iteration":65,"passed_time":2.042187927,"remaining_time":1.052036205},
70
+ {"learn":[0.4560918865],"iteration":66,"passed_time":2.076725989,"remaining_time":1.022865039},
71
+ {"learn":[0.4558479503],"iteration":67,"passed_time":2.110631805,"remaining_time":0.9932384965},
72
+ {"learn":[0.455740418],"iteration":68,"passed_time":2.124730753,"remaining_time":0.9545891789},
73
+ {"learn":[0.455501269],"iteration":69,"passed_time":2.159214566,"remaining_time":0.9253776709},
74
+ {"learn":[0.4554787935],"iteration":70,"passed_time":2.168732127,"remaining_time":0.8858201647},
75
+ {"learn":[0.4552744806],"iteration":71,"passed_time":2.205001553,"remaining_time":0.8575006039},
76
+ {"learn":[0.455234248],"iteration":72,"passed_time":2.220812656,"remaining_time":0.8213964619},
77
+ {"learn":[0.455137986],"iteration":73,"passed_time":2.255120136,"remaining_time":0.7923395074},
78
+ {"learn":[0.4549484305],"iteration":74,"passed_time":2.28992153,"remaining_time":0.7633071767},
79
+ {"learn":[0.4548062199],"iteration":75,"passed_time":2.324904798,"remaining_time":0.7341804624},
80
+ {"learn":[0.4546474797],"iteration":76,"passed_time":2.360039856,"remaining_time":0.7049469699},
81
+ {"learn":[0.4545581835],"iteration":77,"passed_time":2.372090859,"remaining_time":0.6690512679},
82
+ {"learn":[0.4544265313],"iteration":78,"passed_time":2.40573901,"remaining_time":0.6395002433},
83
+ {"learn":[0.4544030978],"iteration":79,"passed_time":2.413539667,"remaining_time":0.6033849167},
84
+ {"learn":[0.4543650724],"iteration":80,"passed_time":2.422935896,"remaining_time":0.568342988},
85
+ {"learn":[0.4542698101],"iteration":81,"passed_time":2.455982135,"remaining_time":0.5391180296},
86
+ {"learn":[0.4540294101],"iteration":82,"passed_time":2.490015159,"remaining_time":0.5100031048},
87
+ {"learn":[0.4539463005],"iteration":83,"passed_time":2.501245001,"remaining_time":0.4764276192},
88
+ {"learn":[0.4537784829],"iteration":84,"passed_time":2.53640835,"remaining_time":0.4476014736},
89
+ {"learn":[0.4536943889],"iteration":85,"passed_time":2.572798317,"remaining_time":0.4188276329},
90
+ {"learn":[0.4536386999],"iteration":86,"passed_time":2.608432288,"remaining_time":0.3897657442},
91
+ {"learn":[0.4533342039],"iteration":87,"passed_time":2.644785463,"remaining_time":0.3606525632},
92
+ {"learn":[0.4531946585],"iteration":88,"passed_time":2.679509774,"remaining_time":0.3311753653},
93
+ {"learn":[0.4529846134],"iteration":89,"passed_time":2.713719672,"remaining_time":0.301524408},
94
+ {"learn":[0.4529583581],"iteration":90,"passed_time":2.728664447,"remaining_time":0.2698679124},
95
+ {"learn":[0.4528171854],"iteration":91,"passed_time":2.765771076,"remaining_time":0.2405018327},
96
+ {"learn":[0.4526575987],"iteration":92,"passed_time":2.800900134,"remaining_time":0.2108204402},
97
+ {"learn":[0.4526170824],"iteration":93,"passed_time":2.822734239,"remaining_time":0.1801745259},
98
+ {"learn":[0.4525149982],"iteration":94,"passed_time":2.857135885,"remaining_time":0.1503755729},
99
+ {"learn":[0.4524663385],"iteration":95,"passed_time":2.892015112,"remaining_time":0.1205006297},
100
+ {"learn":[0.4524315166],"iteration":96,"passed_time":2.908353878,"remaining_time":0.08994908901},
101
+ {"learn":[0.4523241677],"iteration":97,"passed_time":2.943535644,"remaining_time":0.060072156},
102
+ {"learn":[0.4523010903],"iteration":98,"passed_time":2.964563379,"remaining_time":0.02994508464},
103
+ {"learn":[0.4522791181],"iteration":99,"passed_time":2.980352983,"remaining_time":0}
104
+ ]}
catboost_info/learn/events.out.tfevents ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1fe5092567732e395a3420a1205c4190f0e63d50edc678509bd4104fc34a503
3
+ size 5398
catboost_info/learn_error.tsv ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ iter Logloss
2
+ 0 0.6582255385
3
+ 1 0.628929721
4
+ 2 0.6055823656
5
+ 3 0.585745295
6
+ 4 0.5691497866
7
+ 5 0.5553995801
8
+ 6 0.5431466822
9
+ 7 0.5321745596
10
+ 8 0.5230197248
11
+ 9 0.5150673326
12
+ 10 0.5085723427
13
+ 11 0.5029521178
14
+ 12 0.4982952699
15
+ 13 0.4940193081
16
+ 14 0.4903079864
17
+ 15 0.4877126951
18
+ 16 0.4849442288
19
+ 17 0.4823937275
20
+ 18 0.4803765605
21
+ 19 0.4781523185
22
+ 20 0.4767059201
23
+ 21 0.475163465
24
+ 22 0.4741219727
25
+ 23 0.473008657
26
+ 24 0.4722618181
27
+ 25 0.4714753648
28
+ 26 0.470390758
29
+ 27 0.4696201438
30
+ 28 0.46859249
31
+ 29 0.4677104182
32
+ 30 0.4673133885
33
+ 31 0.4667841252
34
+ 32 0.4664536674
35
+ 33 0.4662622609
36
+ 34 0.466086662
37
+ 35 0.4657380808
38
+ 36 0.4651284039
39
+ 37 0.4646531445
40
+ 38 0.4641257326
41
+ 39 0.4637898175
42
+ 40 0.4633285186
43
+ 41 0.4628428368
44
+ 42 0.4626124403
45
+ 43 0.4623858706
46
+ 44 0.4621286714
47
+ 45 0.4617974001
48
+ 46 0.4614775166
49
+ 47 0.46142579
50
+ 48 0.4614011205
51
+ 49 0.4611845342
52
+ 50 0.4609852804
53
+ 51 0.4604321277
54
+ 52 0.4601645791
55
+ 53 0.459804458
56
+ 54 0.4592589475
57
+ 55 0.4589643366
58
+ 56 0.4585201818
59
+ 57 0.4582657803
60
+ 58 0.4580557799
61
+ 59 0.457864554
62
+ 60 0.4576167412
63
+ 61 0.4575874936
64
+ 62 0.457362279
65
+ 63 0.4572946663
66
+ 64 0.4569830294
67
+ 65 0.456610445
68
+ 66 0.4560918865
69
+ 67 0.4558479503
70
+ 68 0.455740418
71
+ 69 0.455501269
72
+ 70 0.4554787935
73
+ 71 0.4552744806
74
+ 72 0.455234248
75
+ 73 0.455137986
76
+ 74 0.4549484305
77
+ 75 0.4548062199
78
+ 76 0.4546474797
79
+ 77 0.4545581835
80
+ 78 0.4544265313
81
+ 79 0.4544030978
82
+ 80 0.4543650724
83
+ 81 0.4542698101
84
+ 82 0.4540294101
85
+ 83 0.4539463005
86
+ 84 0.4537784829
87
+ 85 0.4536943889
88
+ 86 0.4536386999
89
+ 87 0.4533342039
90
+ 88 0.4531946585
91
+ 89 0.4529846134
92
+ 90 0.4529583581
93
+ 91 0.4528171854
94
+ 92 0.4526575987
95
+ 93 0.4526170824
96
+ 94 0.4525149982
97
+ 95 0.4524663385
98
+ 96 0.4524315166
99
+ 97 0.4523241677
100
+ 98 0.4523010903
101
+ 99 0.4522791181
catboost_info/time_left.tsv ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ iter Passed Remaining
2
+ 0 106 10536
3
+ 1 152 7476
4
+ 2 189 6112
5
+ 3 225 5404
6
+ 4 240 4571
7
+ 5 256 4017
8
+ 6 292 3889
9
+ 7 326 3759
10
+ 8 362 3662
11
+ 9 396 3567
12
+ 10 432 3496
13
+ 11 466 3422
14
+ 12 488 3270
15
+ 13 524 3221
16
+ 14 558 3166
17
+ 15 573 3012
18
+ 16 594 2904
19
+ 17 630 2870
20
+ 18 666 2840
21
+ 19 687 2750
22
+ 20 721 2715
23
+ 21 756 2681
24
+ 22 777 2602
25
+ 23 812 2572
26
+ 24 847 2543
27
+ 25 864 2460
28
+ 26 904 2445
29
+ 27 938 2413
30
+ 28 972 2382
31
+ 29 1007 2350
32
+ 30 1019 2268
33
+ 31 1031 2191
34
+ 32 1066 2166
35
+ 33 1076 2089
36
+ 34 1088 2020
37
+ 35 1122 1996
38
+ 36 1157 1971
39
+ 37 1195 1950
40
+ 38 1230 1923
41
+ 39 1246 1869
42
+ 40 1280 1843
43
+ 41 1315 1816
44
+ 42 1349 1788
45
+ 43 1362 1733
46
+ 44 1398 1709
47
+ 45 1433 1682
48
+ 46 1466 1654
49
+ 47 1479 1602
50
+ 48 1486 1547
51
+ 49 1521 1521
52
+ 50 1557 1496
53
+ 51 1596 1473
54
+ 52 1646 1459
55
+ 53 1683 1434
56
+ 54 1718 1405
57
+ 55 1752 1377
58
+ 56 1787 1348
59
+ 57 1822 1319
60
+ 58 1844 1281
61
+ 59 1877 1251
62
+ 60 1915 1224
63
+ 61 1924 1179
64
+ 62 1960 1151
65
+ 63 1972 1109
66
+ 64 2007 1081
67
+ 65 2042 1052
68
+ 66 2076 1022
69
+ 67 2110 993
70
+ 68 2124 954
71
+ 69 2159 925
72
+ 70 2168 885
73
+ 71 2205 857
74
+ 72 2220 821
75
+ 73 2255 792
76
+ 74 2289 763
77
+ 75 2324 734
78
+ 76 2360 704
79
+ 77 2372 669
80
+ 78 2405 639
81
+ 79 2413 603
82
+ 80 2422 568
83
+ 81 2455 539
84
+ 82 2490 510
85
+ 83 2501 476
86
+ 84 2536 447
87
+ 85 2572 418
88
+ 86 2608 389
89
+ 87 2644 360
90
+ 88 2679 331
91
+ 89 2713 301
92
+ 90 2728 269
93
+ 91 2765 240
94
+ 92 2800 210
95
+ 93 2822 180
96
+ 94 2857 150
97
+ 95 2892 120
98
+ 96 2908 89
99
+ 97 2943 60
100
+ 98 2964 29
101
+ 99 2980 0
catboost_info/tmp/cat_feature_index.17f2f383-11eeddeb-4a42fe68-9d5bbcbb.tmp ADDED
Binary file (4 Bytes). View file
 
datasets/.DS_Store ADDED
Binary file (8.2 kB). View file