Spaces:
Sleeping
Sleeping
HardWorkingStation commited on
Commit ·
0546d21
1
Parent(s): f496f49
Initial commit
Browse files- src/model_predictions/{ct_cbc.csv → catboost/ct_cbc.csv} +0 -0
- src/model_predictions/{sm_cbc.csv → catboost/sm_cbc.csv} +0 -0
- src/model_predictions/{tm_dependend_cbc.csv → catboost/tm_dependend_cbc.csv} +0 -0
- src/model_predictions/{tm_independend_cbc.csv → catboost/tm_independend_cbc.csv} +0 -0
- src/model_predictions/random_forest/tm_rfc.csv +0 -0
- src/model_predictions/xgboost/sm_xgb.csv +0 -0
- src/test.ipynb +0 -0
- src/web_app.py +144 -16
src/model_predictions/{ct_cbc.csv → catboost/ct_cbc.csv}
RENAMED
|
File without changes
|
src/model_predictions/{sm_cbc.csv → catboost/sm_cbc.csv}
RENAMED
|
File without changes
|
src/model_predictions/{tm_dependend_cbc.csv → catboost/tm_dependend_cbc.csv}
RENAMED
|
File without changes
|
src/model_predictions/{tm_independend_cbc.csv → catboost/tm_independend_cbc.csv}
RENAMED
|
File without changes
|
src/model_predictions/random_forest/tm_rfc.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
src/model_predictions/xgboost/sm_xgb.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
src/test.ipynb
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
src/web_app.py
CHANGED
|
@@ -13,10 +13,14 @@ import tools
|
|
| 13 |
dataset, target, treatment = tools.get_data()
|
| 14 |
|
| 15 |
# загрузим предикты моделей
|
| 16 |
-
ct_cbc = pd.read_csv('src/model_predictions/ct_cbc.csv', index_col='Unnamed: 0')
|
| 17 |
-
sm_cbc = pd.read_csv('src/model_predictions/sm_cbc.csv', index_col='Unnamed: 0')
|
| 18 |
-
tm_dependend_cbc = pd.read_csv('src/model_predictions/tm_dependend_cbc.csv', index_col='Unnamed: 0')
|
| 19 |
-
tm_independend_cbc = pd.read_csv('src/model_predictions/tm_independend_cbc.csv', index_col='Unnamed: 0')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
# загрузим данные
|
| 22 |
data_train_index = pd.read_csv('data/data_train_index.csv')
|
|
@@ -242,36 +246,160 @@ if show_ml_reasons:
|
|
| 242 |
catboost_uplift_by_percentile = uplift_by_percentile(target_filtered, final_uplift, treatment_filtered)
|
| 243 |
catboost_qini_auc_score = qini_auc_score(target_filtered, final_uplift, treatment_filtered)
|
| 244 |
catboost_weighted_average_uplift = tools.get_weighted_average_uplift(target_filtered, final_uplift, treatment_filtered)
|
| 245 |
-
qini_curve_score = qini_curve(target_filtered, final_uplift, treatment_filtered)
|
| 246 |
|
| 247 |
# отображаем метрики
|
| 248 |
col1, col2, col3 = st.columns(3)
|
| 249 |
-
col1.metric(
|
| 250 |
-
|
| 251 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
|
| 253 |
st.write('Uplift по процентилям')
|
| 254 |
st.write(catboost_uplift_by_percentile)
|
| 255 |
st.form_submit_button('Обновить графики', help='При изменении флагов')
|
|
|
|
| 256 |
perfect_qini = st.checkbox('Отрисовать идеальную метрику qini')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
|
|
|
|
| 258 |
# получаем координаты пользовательской метрики для точки на графике
|
| 259 |
x, y = qini_curve_user_score[0][1], qini_curve_user_score[1][1]
|
| 260 |
# получаем объект UpliftCurveDisplay с осями и графиком matplotlib
|
| 261 |
-
|
| 262 |
# добавляем пользовательскую метрику на оси графика
|
| 263 |
-
|
| 264 |
# добавляем обозначение метрики пользователя в легенду
|
| 265 |
-
|
| 266 |
-
st.pyplot(
|
|
|
|
| 267 |
prefect_uplift = st.checkbox('Отрисовать идеальную метрику uplift')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
# получаем координаты пользовательской метрики для точки на графике
|
| 270 |
x, y = uplift_curve_user_score[0][1], uplift_curve_user_score[1][1]
|
| 271 |
# получаем объект UpliftCurveDisplay с осями и графиком matplotlib
|
| 272 |
-
|
| 273 |
# добавляем пользовательскую метрику на оси графика
|
| 274 |
-
|
| 275 |
# добавляем обозначение метрики пользователя в легенду
|
| 276 |
-
|
| 277 |
-
st.pyplot(
|
|
|
|
| 13 |
dataset, target, treatment = tools.get_data()
|
| 14 |
|
| 15 |
# загрузим предикты моделей
|
| 16 |
+
ct_cbc = pd.read_csv('src/model_predictions/catboost/ct_cbc.csv', index_col='Unnamed: 0')
|
| 17 |
+
sm_cbc = pd.read_csv('src/model_predictions/catboost/sm_cbc.csv', index_col='Unnamed: 0')
|
| 18 |
+
tm_dependend_cbc = pd.read_csv('src/model_predictions/catboost/tm_dependend_cbc.csv', index_col='Unnamed: 0')
|
| 19 |
+
tm_independend_cbc = pd.read_csv('src/model_predictions/catboost/tm_independend_cbc.csv', index_col='Unnamed: 0')
|
| 20 |
+
|
| 21 |
+
tm_rfc = pd.read_csv('src/model_predictions/random_forest/tm_rfc.csv', index_col='Unnamed: 0')
|
| 22 |
+
|
| 23 |
+
sm_xgboost = pd.read_csv('src/model_predictions/xgboost/sm_xgb.csv', index_col='Unnamed: 0')
|
| 24 |
|
| 25 |
# загрузим данные
|
| 26 |
data_train_index = pd.read_csv('data/data_train_index.csv')
|
|
|
|
| 246 |
catboost_uplift_by_percentile = uplift_by_percentile(target_filtered, final_uplift, treatment_filtered)
|
| 247 |
catboost_qini_auc_score = qini_auc_score(target_filtered, final_uplift, treatment_filtered)
|
| 248 |
catboost_weighted_average_uplift = tools.get_weighted_average_uplift(target_filtered, final_uplift, treatment_filtered)
|
|
|
|
| 249 |
|
| 250 |
# отображаем метрики
|
| 251 |
col1, col2, col3 = st.columns(3)
|
| 252 |
+
col1.metric(
|
| 253 |
+
label=f'Uplift для {k}% пользователей',
|
| 254 |
+
value=f'{catboost_uplift_at_k:.4f}',
|
| 255 |
+
delta=f'{catboost_uplift_at_k - user_metric_uplift_at_k:.4f}'
|
| 256 |
+
)
|
| 257 |
+
col2.metric(
|
| 258 |
+
label=f'Qini AUC score',
|
| 259 |
+
value=f'{catboost_qini_auc_score:.4f}',
|
| 260 |
+
help='Всегда будет 0 для пользователя',
|
| 261 |
+
delta=f'{catboost_qini_auc_score - user_metric_qini_auc_score:.4f}'
|
| 262 |
+
)
|
| 263 |
+
col3.metric(
|
| 264 |
+
label=f'Weighted average uplift',
|
| 265 |
+
value=f'{catboost_weighted_average_uplift:.4f}',
|
| 266 |
+
delta=f'{catboost_weighted_average_uplift - user_metric_weighted_average_uplift:.4f}'
|
| 267 |
+
)
|
| 268 |
|
| 269 |
st.write('Uplift по процентилям')
|
| 270 |
st.write(catboost_uplift_by_percentile)
|
| 271 |
st.form_submit_button('Обновить графики', help='При изменении флагов')
|
| 272 |
+
|
| 273 |
perfect_qini = st.checkbox('Отрисовать идеальную метрику qini')
|
| 274 |
+
# получаем координаты пользовательской метрики для точки на графике
|
| 275 |
+
x, y = qini_curve_user_score[0][1], qini_curve_user_score[1][1]
|
| 276 |
+
# получаем объект UpliftCurveDisplay с осями и графиком matplotlib
|
| 277 |
+
qini_fig = plot_qini_curve(target_test, sm_cbc['0'], treatment_test, perfect=perfect_qini)
|
| 278 |
+
# добавляем пользовательскую метрику на оси графика
|
| 279 |
+
qini_fig.ax_.plot(x, y, 'ro', markersize=3, label='Analitic qini')
|
| 280 |
+
# добавляем обозначение метрики пользователя в легенду
|
| 281 |
+
qini_fig.ax_.legend(loc=u'upper left', bbox_to_anchor=(1, 1))
|
| 282 |
+
st.pyplot(qini_fig.figure_)
|
| 283 |
+
|
| 284 |
+
prefect_uplift = st.checkbox('Отрисовать идеальную метрику uplift')
|
| 285 |
+
# получаем координаты пользовательской метрики для точки на графике
|
| 286 |
+
x, y = uplift_curve_user_score[0][1], uplift_curve_user_score[1][1]
|
| 287 |
+
# получаем объект UpliftCurveDisplay с осями и графиком matplotlib
|
| 288 |
+
uplift_fig = plot_uplift_curve(target_test, sm_cbc['0'], treatment_test, perfect=prefect_uplift)
|
| 289 |
+
# добавляем пользовательскую метрику на оси графика
|
| 290 |
+
uplift_fig.ax_.plot(x, y, 'ro', markersize=3, label='Analitic qini')
|
| 291 |
+
# добавляем обозначение метрики пользователя в легенду
|
| 292 |
+
uplift_fig.ax_.legend(loc=u'upper left', bbox_to_anchor=(1, 1))
|
| 293 |
+
st.pyplot(uplift_fig.figure_)
|
| 294 |
+
|
| 295 |
+
with st.expander('Решение с помощью Random forest (sklearn)'):
|
| 296 |
+
with st.form(key='sklearn_metricks'):
|
| 297 |
+
|
| 298 |
+
final_rf_uplift = tm_rfc.loc[filtered_dataset.index]['0']
|
| 299 |
+
|
| 300 |
+
# считаем метрики для ML
|
| 301 |
+
random_forest_uplift_at_k = uplift_at_k(target_filtered, final_rf_uplift, treatment_filtered, strategy='overall', k=k)
|
| 302 |
+
random_forest_uplift_by_percentile = uplift_by_percentile(target_filtered, final_rf_uplift, treatment_filtered)
|
| 303 |
+
random_forest_qini_auc_score = qini_auc_score(target_filtered, final_rf_uplift, treatment_filtered)
|
| 304 |
+
random_forest_weighted_average_uplift = tools.get_weighted_average_uplift(target_filtered, final_rf_uplift, treatment_filtered)
|
| 305 |
+
|
| 306 |
+
# отображаем метрики
|
| 307 |
+
col1, col2, col3 = st.columns(3)
|
| 308 |
+
col1.metric(
|
| 309 |
+
label=f'Uplift для {k}% пользователей',
|
| 310 |
+
value=f'{random_forest_uplift_at_k:.4f}',
|
| 311 |
+
delta=f'{random_forest_uplift_at_k - user_metric_uplift_at_k:.4f}'
|
| 312 |
+
)
|
| 313 |
+
col2.metric(
|
| 314 |
+
label=f'Qini AUC score',
|
| 315 |
+
value=f'{random_forest_qini_auc_score:.4f}',
|
| 316 |
+
help='Всегда будет 0 для пользователя',
|
| 317 |
+
delta=f'{random_forest_qini_auc_score - user_metric_qini_auc_score:.4f}'
|
| 318 |
+
)
|
| 319 |
+
col3.metric(
|
| 320 |
+
label=f'Weighted average uplift',
|
| 321 |
+
value=f'{random_forest_weighted_average_uplift:.4f}',
|
| 322 |
+
delta=f'{random_forest_weighted_average_uplift - user_metric_weighted_average_uplift:.4f}'
|
| 323 |
+
)
|
| 324 |
+
|
| 325 |
+
st.write('Uplift по процентилям')
|
| 326 |
+
st.write(random_forest_uplift_by_percentile)
|
| 327 |
+
st.form_submit_button('Обновить графики', help='При изменении флагов')
|
| 328 |
|
| 329 |
+
perfect_qini = st.checkbox('Отрисовать идеальную метрику qini')
|
| 330 |
# получаем координаты пользовательской метрики для точки на графике
|
| 331 |
x, y = qini_curve_user_score[0][1], qini_curve_user_score[1][1]
|
| 332 |
# получаем объект UpliftCurveDisplay с осями и графиком matplotlib
|
| 333 |
+
qini_fig = plot_qini_curve(target_test, tm_rfc['0'], treatment_test, perfect=perfect_qini)
|
| 334 |
# добавляем пользовательскую метрику на оси графика
|
| 335 |
+
qini_fig.ax_.plot(x, y, 'ro', markersize=3, label='Analitic qini')
|
| 336 |
# добавляем обозначение метрики пользователя в легенду
|
| 337 |
+
qini_fig.ax_.legend(loc=u'upper left', bbox_to_anchor=(1, 1))
|
| 338 |
+
st.pyplot(qini_fig.figure_)
|
| 339 |
+
|
| 340 |
prefect_uplift = st.checkbox('Отрисовать идеальную метрику uplift')
|
| 341 |
+
# получаем координаты пользовательской метрики для точки на графике
|
| 342 |
+
x, y = uplift_curve_user_score[0][1], uplift_curve_user_score[1][1]
|
| 343 |
+
# получаем объект UpliftCurveDisplay с осями и графиком matplotlib
|
| 344 |
+
uplift_fig = plot_uplift_curve(target_test, tm_rfc['0'], treatment_test, perfect=prefect_uplift)
|
| 345 |
+
# добавляем пользовательскую метрику на оси графика
|
| 346 |
+
uplift_fig.ax_.plot(x, y, 'ro', markersize=3, label='Analitic qini')
|
| 347 |
+
# добавляем обозначение метрики пользователя в легенду
|
| 348 |
+
uplift_fig.ax_.legend(loc=u'upper left', bbox_to_anchor=(1, 1))
|
| 349 |
+
st.pyplot(uplift_fig.figure_)
|
| 350 |
|
| 351 |
+
with st.expander('Решение с помощью XGBoost'):
|
| 352 |
+
with st.form(key='xgboost_metricks'):
|
| 353 |
+
|
| 354 |
+
final_xgboost_uplift = sm_xgboost.loc[filtered_dataset.index]['0']
|
| 355 |
+
|
| 356 |
+
# считаем метрики для ML
|
| 357 |
+
xgboost_uplift_at_k = uplift_at_k(target_filtered, final_xgboost_uplift, treatment_filtered, strategy='overall', k=k)
|
| 358 |
+
xgboost_uplift_by_percentile = uplift_by_percentile(target_filtered, final_xgboost_uplift, treatment_filtered)
|
| 359 |
+
xgboost_qini_auc_score = qini_auc_score(target_filtered, final_xgboost_uplift, treatment_filtered)
|
| 360 |
+
xgboost_weighted_average_uplift = tools.get_weighted_average_uplift(target_filtered, final_xgboost_uplift, treatment_filtered)
|
| 361 |
+
|
| 362 |
+
# отображаем метрики
|
| 363 |
+
col1, col2, col3 = st.columns(3)
|
| 364 |
+
col1.metric(
|
| 365 |
+
label=f'Uplift для {k}% пользователей',
|
| 366 |
+
value=f'{xgboost_uplift_at_k:.4f}',
|
| 367 |
+
delta=f'{xgboost_uplift_at_k - user_metric_uplift_at_k:.4f}'
|
| 368 |
+
)
|
| 369 |
+
col2.metric(
|
| 370 |
+
label=f'Qini AUC score',
|
| 371 |
+
value=f'{xgboost_qini_auc_score:.4f}',
|
| 372 |
+
help='Всегда будет 0 для пользователя',
|
| 373 |
+
delta=f'{xgboost_qini_auc_score - user_metric_qini_auc_score:.4f}'
|
| 374 |
+
)
|
| 375 |
+
col3.metric(
|
| 376 |
+
label=f'Weighted average uplift',
|
| 377 |
+
value=f'{xgboost_weighted_average_uplift:.4f}',
|
| 378 |
+
delta=f'{xgboost_weighted_average_uplift - user_metric_weighted_average_uplift:.4f}'
|
| 379 |
+
)
|
| 380 |
+
|
| 381 |
+
st.write('Uplift по процентилям')
|
| 382 |
+
st.write(xgboost_uplift_by_percentile)
|
| 383 |
+
st.form_submit_button('Обновить графики', help='При изменении флагов')
|
| 384 |
+
|
| 385 |
+
perfect_qini = st.checkbox('Отрис��вать идеальную метрику qini')
|
| 386 |
+
# получаем координаты пользовательской метрики для точки на графике
|
| 387 |
+
x, y = qini_curve_user_score[0][1], qini_curve_user_score[1][1]
|
| 388 |
+
# получаем объект UpliftCurveDisplay с осями и графиком matplotlib
|
| 389 |
+
qini_fig = plot_qini_curve(target_test, sm_xgboost['0'], treatment_test, perfect=perfect_qini)
|
| 390 |
+
# добавляем пользовательскую метрику на оси графика
|
| 391 |
+
qini_fig.ax_.plot(x, y, 'ro', markersize=3, label='Analitic qini')
|
| 392 |
+
# добавляем обозначение метрики пользователя в легенду
|
| 393 |
+
qini_fig.ax_.legend(loc=u'upper left', bbox_to_anchor=(1, 1))
|
| 394 |
+
st.pyplot(qini_fig.figure_)
|
| 395 |
+
|
| 396 |
+
prefect_uplift = st.checkbox('Отрисовать идеальную метрику uplift')
|
| 397 |
# получаем координаты пользовательской метрики для точки на графике
|
| 398 |
x, y = uplift_curve_user_score[0][1], uplift_curve_user_score[1][1]
|
| 399 |
# получаем объект UpliftCurveDisplay с осями и графиком matplotlib
|
| 400 |
+
uplift_fig = plot_uplift_curve(target_test, sm_xgboost['0'], treatment_test, perfect=prefect_uplift)
|
| 401 |
# добавляем пользовательскую метрику на оси графика
|
| 402 |
+
uplift_fig.ax_.plot(x, y, 'ro', markersize=3, label='Analitic qini')
|
| 403 |
# добавляем обозначение метрики пользователя в легенду
|
| 404 |
+
uplift_fig.ax_.legend(loc=u'upper left', bbox_to_anchor=(1, 1))
|
| 405 |
+
st.pyplot(uplift_fig.figure_)
|