JonnyBP commited on
Commit ·
c5d0f1a
1
Parent(s): 3c446c6
feat: add ensemble and optuna. #5 #6 #9
Browse files- .gitignore +3 -0
- configs/best_params.yaml +15 -0
- configs/models.yaml +7 -3
- notebooks/04_baseline_v2.ipynb +98 -58
- notebooks/05_ensemble_v2.ipynb +0 -0
- notebooks/06_Optuna_v2_final.ipynb +1032 -0
- reports/v2/12_ensemble_comparativa.png +3 -0
- reports/v2/13_best_model_test.png +3 -0
- reports/v2/14_optuna_comparativa.png +3 -0
- reports/v2/14_optuna_convergencia.png +3 -0
- reports/v2/15_optuna_convergencia.png +3 -0
.gitignore
CHANGED
|
@@ -54,3 +54,6 @@ mlruns/
|
|
| 54 |
mlartifacts/
|
| 55 |
|
| 56 |
#jony
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
mlartifacts/
|
| 55 |
|
| 56 |
#jony
|
| 57 |
+
06_Optuna_v2.ipynb
|
| 58 |
+
optunaas.ipynb
|
| 59 |
+
07_data_augmentation_v2.ipynb
|
configs/best_params.yaml
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
winner: LR tuned
|
| 2 |
+
hyperparameters:
|
| 3 |
+
ngram_range: '1_2'
|
| 4 |
+
max_features: 4045
|
| 5 |
+
min_df: 2
|
| 6 |
+
sublinear_tf: false
|
| 7 |
+
C: 0.3235215031170205
|
| 8 |
+
results:
|
| 9 |
+
f1_test: 0.7579
|
| 10 |
+
f1_train: 0.8987
|
| 11 |
+
train_test_gap_pp: 14.07
|
| 12 |
+
cv_test_gap_pp: 4.76
|
| 13 |
+
roc_auc: 0.81
|
| 14 |
+
fp: 18
|
| 15 |
+
fn: 30
|
configs/models.yaml
CHANGED
|
@@ -7,17 +7,21 @@ models:
|
|
| 7 |
|
| 8 |
random_forest:
|
| 9 |
n_estimators: 100
|
| 10 |
-
max_depth:
|
| 11 |
-
min_samples_split:
|
|
|
|
|
|
|
| 12 |
class_weight: balanced
|
| 13 |
n_jobs: -1
|
| 14 |
|
| 15 |
xgboost:
|
| 16 |
n_estimators: 100
|
| 17 |
-
max_depth:
|
| 18 |
learning_rate: 0.1
|
| 19 |
subsample: 0.8
|
| 20 |
colsample_bytree: 0.8
|
|
|
|
|
|
|
| 21 |
scale_pos_weight: 1
|
| 22 |
|
| 23 |
evaluation:
|
|
|
|
| 7 |
|
| 8 |
random_forest:
|
| 9 |
n_estimators: 100
|
| 10 |
+
max_depth: 10
|
| 11 |
+
min_samples_split: 10
|
| 12 |
+
min_samples_leaf: 5
|
| 13 |
+
max_features: sqrt
|
| 14 |
class_weight: balanced
|
| 15 |
n_jobs: -1
|
| 16 |
|
| 17 |
xgboost:
|
| 18 |
n_estimators: 100
|
| 19 |
+
max_depth: 3
|
| 20 |
learning_rate: 0.1
|
| 21 |
subsample: 0.8
|
| 22 |
colsample_bytree: 0.8
|
| 23 |
+
min_child_weight: 5
|
| 24 |
+
reg_lambda: 1
|
| 25 |
scale_pos_weight: 1
|
| 26 |
|
| 27 |
evaluation:
|
notebooks/04_baseline_v2.ipynb
CHANGED
|
@@ -39,7 +39,7 @@
|
|
| 39 |
},
|
| 40 |
{
|
| 41 |
"cell_type": "code",
|
| 42 |
-
"execution_count":
|
| 43 |
"metadata": {},
|
| 44 |
"outputs": [
|
| 45 |
{
|
|
@@ -56,9 +56,17 @@
|
|
| 56 |
},
|
| 57 |
{
|
| 58 |
"cell_type": "code",
|
| 59 |
-
"execution_count":
|
| 60 |
"metadata": {},
|
| 61 |
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
{
|
| 63 |
"name": "stdout",
|
| 64 |
"output_type": "stream",
|
|
@@ -103,7 +111,7 @@
|
|
| 103 |
},
|
| 104 |
{
|
| 105 |
"cell_type": "code",
|
| 106 |
-
"execution_count":
|
| 107 |
"metadata": {},
|
| 108 |
"outputs": [
|
| 109 |
{
|
|
@@ -148,7 +156,7 @@
|
|
| 148 |
},
|
| 149 |
{
|
| 150 |
"cell_type": "code",
|
| 151 |
-
"execution_count":
|
| 152 |
"metadata": {},
|
| 153 |
"outputs": [
|
| 154 |
{
|
|
@@ -202,7 +210,7 @@
|
|
| 202 |
},
|
| 203 |
{
|
| 204 |
"cell_type": "code",
|
| 205 |
-
"execution_count":
|
| 206 |
"metadata": {},
|
| 207 |
"outputs": [
|
| 208 |
{
|
|
@@ -262,7 +270,7 @@
|
|
| 262 |
},
|
| 263 |
{
|
| 264 |
"cell_type": "code",
|
| 265 |
-
"execution_count":
|
| 266 |
"metadata": {},
|
| 267 |
"outputs": [
|
| 268 |
{
|
|
@@ -272,12 +280,12 @@
|
|
| 272 |
"Ejecutando 5-fold cross-validation...\n",
|
| 273 |
"Completado.\n",
|
| 274 |
"\n",
|
| 275 |
-
"Métrica Train Val
|
| 276 |
-
"-----------------------------------------------------------------\n",
|
| 277 |
-
" f1 0.8839 0.7015 0.
|
| 278 |
-
" precision 0.8841 0.7033 0.
|
| 279 |
-
" recall 0.8841 0.7025 0.
|
| 280 |
-
" roc_auc 0.9552 0.7737 0.
|
| 281 |
]
|
| 282 |
}
|
| 283 |
],
|
|
@@ -302,22 +310,22 @@
|
|
| 302 |
")\n",
|
| 303 |
"\n",
|
| 304 |
"print('Completado.\\n')\n",
|
| 305 |
-
"print(f\"{'Métrica':20} {'Train':>10} {'Val':>10} {'
|
| 306 |
-
"print('-' *
|
|
|
|
| 307 |
"for metric in ['f1', 'precision', 'recall', 'roc_auc']:\n",
|
| 308 |
" train_mean = cv_results[f'train_{metric}'].mean()\n",
|
| 309 |
" val_mean = cv_results[f'test_{metric}'].mean()\n",
|
| 310 |
" val_std = cv_results[f'test_{metric}'].std()\n",
|
| 311 |
-
"
|
| 312 |
-
"
|
| 313 |
-
"
|
| 314 |
-
"
|
| 315 |
-
" print(f' {\"\":18} std={val_std:.4f} ← alta varianza entre folds')"
|
| 316 |
]
|
| 317 |
},
|
| 318 |
{
|
| 319 |
"cell_type": "code",
|
| 320 |
-
"execution_count":
|
| 321 |
"metadata": {},
|
| 322 |
"outputs": [
|
| 323 |
{
|
|
@@ -398,7 +406,7 @@
|
|
| 398 |
},
|
| 399 |
{
|
| 400 |
"cell_type": "code",
|
| 401 |
-
"execution_count":
|
| 402 |
"metadata": {},
|
| 403 |
"outputs": [
|
| 404 |
{
|
|
@@ -408,10 +416,13 @@
|
|
| 408 |
"=======================================================\n",
|
| 409 |
"RESULTADOS FINALES — LOGISTIC REGRESSION BASELINE\n",
|
| 410 |
"=======================================================\n",
|
| 411 |
-
"F1-weighted
|
| 412 |
-
"F1-weighted
|
| 413 |
-
"
|
| 414 |
-
"
|
|
|
|
|
|
|
|
|
|
| 415 |
"\n",
|
| 416 |
" precision recall f1-score support\n",
|
| 417 |
"\n",
|
|
@@ -437,23 +448,37 @@
|
|
| 437 |
"# Métricas train vs test (chequeo de overfitting para rúbrica)\n",
|
| 438 |
"f1_train = f1_score(y_train, y_pred_train, average='weighted')\n",
|
| 439 |
"f1_test = f1_score(y_test, y_pred, average='weighted')\n",
|
| 440 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 441 |
"\n",
|
| 442 |
"print('=' * 55)\n",
|
| 443 |
"print('RESULTADOS FINALES — LOGISTIC REGRESSION BASELINE')\n",
|
| 444 |
"print('=' * 55)\n",
|
| 445 |
-
"
|
| 446 |
-
"print(f'F1-weighted
|
| 447 |
-
"print(f'
|
| 448 |
-
"
|
| 449 |
-
"
|
|
|
|
| 450 |
"print()\n",
|
| 451 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 452 |
]
|
| 453 |
},
|
| 454 |
{
|
| 455 |
"cell_type": "code",
|
| 456 |
-
"execution_count":
|
| 457 |
"metadata": {},
|
| 458 |
"outputs": [
|
| 459 |
{
|
|
@@ -519,7 +544,7 @@
|
|
| 519 |
},
|
| 520 |
{
|
| 521 |
"cell_type": "code",
|
| 522 |
-
"execution_count":
|
| 523 |
"metadata": {},
|
| 524 |
"outputs": [
|
| 525 |
{
|
|
@@ -595,7 +620,7 @@
|
|
| 595 |
},
|
| 596 |
{
|
| 597 |
"cell_type": "code",
|
| 598 |
-
"execution_count":
|
| 599 |
"metadata": {},
|
| 600 |
"outputs": [
|
| 601 |
{
|
|
@@ -676,7 +701,7 @@
|
|
| 676 |
},
|
| 677 |
{
|
| 678 |
"cell_type": "code",
|
| 679 |
-
"execution_count":
|
| 680 |
"metadata": {},
|
| 681 |
"outputs": [
|
| 682 |
{
|
|
@@ -769,15 +794,15 @@
|
|
| 769 |
},
|
| 770 |
{
|
| 771 |
"cell_type": "code",
|
| 772 |
-
"execution_count":
|
| 773 |
"metadata": {},
|
| 774 |
"outputs": [
|
| 775 |
{
|
| 776 |
"name": "stderr",
|
| 777 |
"output_type": "stream",
|
| 778 |
"text": [
|
| 779 |
-
"2026/05/
|
| 780 |
-
"2026/05/
|
| 781 |
]
|
| 782 |
},
|
| 783 |
{
|
|
@@ -785,7 +810,7 @@
|
|
| 785 |
"output_type": "stream",
|
| 786 |
"text": [
|
| 787 |
"MLflow registrado\n",
|
| 788 |
-
" Run ID :
|
| 789 |
" Experimento: Youtube_project_experiment\n",
|
| 790 |
" Run name : lr_baseline\n"
|
| 791 |
]
|
|
@@ -819,8 +844,14 @@
|
|
| 819 |
" # Metrics test final\n",
|
| 820 |
" mlflow.log_metric('test_f1', f1_test)\n",
|
| 821 |
" mlflow.log_metric('train_f1', f1_train)\n",
|
| 822 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 823 |
" mlflow.log_metric('test_roc_auc', roc_auc_score(y_test, y_pred_proba))\n",
|
|
|
|
| 824 |
" mlflow.log_metric('fp_count', len(fp))\n",
|
| 825 |
" mlflow.log_metric('fn_count', len(fn))\n",
|
| 826 |
"\n",
|
|
@@ -850,7 +881,7 @@
|
|
| 850 |
},
|
| 851 |
{
|
| 852 |
"cell_type": "code",
|
| 853 |
-
"execution_count":
|
| 854 |
"metadata": {},
|
| 855 |
"outputs": [
|
| 856 |
{
|
|
@@ -898,7 +929,7 @@
|
|
| 898 |
},
|
| 899 |
{
|
| 900 |
"cell_type": "code",
|
| 901 |
-
"execution_count":
|
| 902 |
"metadata": {},
|
| 903 |
"outputs": [
|
| 904 |
{
|
|
@@ -918,19 +949,21 @@
|
|
| 918 |
"Metricas test final:\n",
|
| 919 |
" F1-weighted : 0.7531\n",
|
| 920 |
" Train F1 : 0.8623\n",
|
| 921 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 922 |
"\n",
|
| 923 |
"Errores:\n",
|
| 924 |
-
"
|
| 925 |
-
"
|
| 926 |
"\n",
|
| 927 |
"Coeficientes sospechosos detectados:\n",
|
| 928 |
" Revisar si stefan/peggy/hubbard aparecen en top coef\n",
|
| 929 |
-
" Si
|
|
|
|
| 930 |
"\n",
|
| 931 |
-
"Siguiente: 05_ensemble.ipynb\n",
|
| 932 |
-
" Random Forest + XGBoost\n",
|
| 933 |
-
" Comparar contra este baseline con misma CV\n",
|
| 934 |
"\n"
|
| 935 |
]
|
| 936 |
}
|
|
@@ -939,9 +972,14 @@
|
|
| 939 |
"f1_cv = cv_results['test_f1'].mean()\n",
|
| 940 |
"roc_cv = cv_results['test_roc_auc'].mean()\n",
|
| 941 |
"\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 942 |
"print(f\"\"\"\n",
|
| 943 |
"CONCLUSIONES — BASELINE\n",
|
| 944 |
-
"
|
| 945 |
"Modelo : Logistic Regression + TF-IDF\n",
|
| 946 |
"Dataset: 800 train / 200 test / {CV_FOLDS}-fold CV\n",
|
| 947 |
"\n",
|
|
@@ -952,19 +990,21 @@
|
|
| 952 |
"Metricas test final:\n",
|
| 953 |
" F1-weighted : {f1_test:.4f}\n",
|
| 954 |
" Train F1 : {f1_train:.4f}\n",
|
| 955 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 956 |
"\n",
|
| 957 |
"Errores:\n",
|
| 958 |
-
"
|
| 959 |
-
"
|
| 960 |
"\n",
|
| 961 |
"Coeficientes sospechosos detectados:\n",
|
| 962 |
" Revisar si stefan/peggy/hubbard aparecen en top coef\n",
|
| 963 |
-
" Si
|
|
|
|
| 964 |
"\n",
|
| 965 |
-
"Siguiente: 05_ensemble.ipynb\n",
|
| 966 |
-
" Random Forest + XGBoost\n",
|
| 967 |
-
" Comparar contra este baseline con misma CV\n",
|
| 968 |
"\"\"\")"
|
| 969 |
]
|
| 970 |
}
|
|
|
|
| 39 |
},
|
| 40 |
{
|
| 41 |
"cell_type": "code",
|
| 42 |
+
"execution_count": 1,
|
| 43 |
"metadata": {},
|
| 44 |
"outputs": [
|
| 45 |
{
|
|
|
|
| 56 |
},
|
| 57 |
{
|
| 58 |
"cell_type": "code",
|
| 59 |
+
"execution_count": 2,
|
| 60 |
"metadata": {},
|
| 61 |
"outputs": [
|
| 62 |
+
{
|
| 63 |
+
"name": "stderr",
|
| 64 |
+
"output_type": "stream",
|
| 65 |
+
"text": [
|
| 66 |
+
"/home/under/miniconda3/envs/py310/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
| 67 |
+
" from .autonotebook import tqdm as notebook_tqdm\n"
|
| 68 |
+
]
|
| 69 |
+
},
|
| 70 |
{
|
| 71 |
"name": "stdout",
|
| 72 |
"output_type": "stream",
|
|
|
|
| 111 |
},
|
| 112 |
{
|
| 113 |
"cell_type": "code",
|
| 114 |
+
"execution_count": 3,
|
| 115 |
"metadata": {},
|
| 116 |
"outputs": [
|
| 117 |
{
|
|
|
|
| 156 |
},
|
| 157 |
{
|
| 158 |
"cell_type": "code",
|
| 159 |
+
"execution_count": 4,
|
| 160 |
"metadata": {},
|
| 161 |
"outputs": [
|
| 162 |
{
|
|
|
|
| 210 |
},
|
| 211 |
{
|
| 212 |
"cell_type": "code",
|
| 213 |
+
"execution_count": 5,
|
| 214 |
"metadata": {},
|
| 215 |
"outputs": [
|
| 216 |
{
|
|
|
|
| 270 |
},
|
| 271 |
{
|
| 272 |
"cell_type": "code",
|
| 273 |
+
"execution_count": 12,
|
| 274 |
"metadata": {},
|
| 275 |
"outputs": [
|
| 276 |
{
|
|
|
|
| 280 |
"Ejecutando 5-fold cross-validation...\n",
|
| 281 |
"Completado.\n",
|
| 282 |
"\n",
|
| 283 |
+
"Métrica Train Val Gap(pp) Std Val\n",
|
| 284 |
+
"----------------------------------------------------------------------\n",
|
| 285 |
+
" f1 0.8839 0.7015 18.24 pp 0.0312 ⚠️ Alto gap\n",
|
| 286 |
+
" precision 0.8841 0.7033 18.09 pp 0.0317 ⚠️ Alto gap\n",
|
| 287 |
+
" recall 0.8841 0.7025 18.16 pp 0.0308 ⚠️ Alto gap\n",
|
| 288 |
+
" roc_auc 0.9552 0.7737 18.15 pp 0.0200 ⚠️ Alto gap\n"
|
| 289 |
]
|
| 290 |
}
|
| 291 |
],
|
|
|
|
| 310 |
")\n",
|
| 311 |
"\n",
|
| 312 |
"print('Completado.\\n')\n",
|
| 313 |
+
"print(f\"{'Métrica':20} {'Train':>10} {'Val':>10} {'Gap(pp)':>15} {'Std Val':>10}\")\n",
|
| 314 |
+
"print('-' * 70)\n",
|
| 315 |
+
"\n",
|
| 316 |
"for metric in ['f1', 'precision', 'recall', 'roc_auc']:\n",
|
| 317 |
" train_mean = cv_results[f'train_{metric}'].mean()\n",
|
| 318 |
" val_mean = cv_results[f'test_{metric}'].mean()\n",
|
| 319 |
" val_std = cv_results[f'test_{metric}'].std()\n",
|
| 320 |
+
" gap_pp = abs(train_mean - val_mean) * 100\n",
|
| 321 |
+
"\n",
|
| 322 |
+
" flag = '⚠️ Alto gap' if gap_pp > 5 else '✅ Estable'\n",
|
| 323 |
+
" print(f' {metric:18} {train_mean:>10.4f} {val_mean:>10.4f} {gap_pp:>11.2f} pp {val_std:>10.4f} {flag:>10}')\n"
|
|
|
|
| 324 |
]
|
| 325 |
},
|
| 326 |
{
|
| 327 |
"cell_type": "code",
|
| 328 |
+
"execution_count": 13,
|
| 329 |
"metadata": {},
|
| 330 |
"outputs": [
|
| 331 |
{
|
|
|
|
| 406 |
},
|
| 407 |
{
|
| 408 |
"cell_type": "code",
|
| 409 |
+
"execution_count": 16,
|
| 410 |
"metadata": {},
|
| 411 |
"outputs": [
|
| 412 |
{
|
|
|
|
| 416 |
"=======================================================\n",
|
| 417 |
"RESULTADOS FINALES — LOGISTIC REGRESSION BASELINE\n",
|
| 418 |
"=======================================================\n",
|
| 419 |
+
"F1-weighted Train : 0.8623\n",
|
| 420 |
+
"F1-weighted Test : 0.7531\n",
|
| 421 |
+
"Train-Test Gap : 10.91 pp\n",
|
| 422 |
+
"\n",
|
| 423 |
+
"CV F1 Mean : 0.7015\n",
|
| 424 |
+
"CV-Test Gap : 5.17 pp\n",
|
| 425 |
+
"Rubrica CV/Test : ⚠️ Alto gap\n",
|
| 426 |
"\n",
|
| 427 |
" precision recall f1-score support\n",
|
| 428 |
"\n",
|
|
|
|
| 448 |
"# Métricas train vs test (chequeo de overfitting para rúbrica)\n",
|
| 449 |
"f1_train = f1_score(y_train, y_pred_train, average='weighted')\n",
|
| 450 |
"f1_test = f1_score(y_test, y_pred, average='weighted')\n",
|
| 451 |
+
"\n",
|
| 452 |
+
"\n",
|
| 453 |
+
"train_test_gap_pp = abs(f1_train - f1_test) * 100\n",
|
| 454 |
+
"\n",
|
| 455 |
+
"cv_f1_mean = cv_results['test_f1'].mean()\n",
|
| 456 |
+
"cv_test_gap_pp = abs(cv_f1_mean - f1_test) * 100\n",
|
| 457 |
"\n",
|
| 458 |
"print('=' * 55)\n",
|
| 459 |
"print('RESULTADOS FINALES — LOGISTIC REGRESSION BASELINE')\n",
|
| 460 |
"print('=' * 55)\n",
|
| 461 |
+
"\n",
|
| 462 |
+
"print(f'F1-weighted Train : {f1_train:.4f}')\n",
|
| 463 |
+
"print(f'F1-weighted Test : {f1_test:.4f}')\n",
|
| 464 |
+
"print(f'Train-Test Gap : {train_test_gap_pp:.2f} pp')\n",
|
| 465 |
+
"\n",
|
| 466 |
+
"\n",
|
| 467 |
"print()\n",
|
| 468 |
+
"\n",
|
| 469 |
+
"print(f'CV F1 Mean : {cv_f1_mean:.4f}')\n",
|
| 470 |
+
"print(f'CV-Test Gap : {cv_test_gap_pp:.2f} pp')\n",
|
| 471 |
+
"\n",
|
| 472 |
+
"status = '✅ Estable' if cv_test_gap_pp < 5 else '⚠️ Alto gap'\n",
|
| 473 |
+
"print(f'Rubrica CV/Test : {status}')\n",
|
| 474 |
+
"\n",
|
| 475 |
+
"print()\n",
|
| 476 |
+
"print(classification_report(y_test, y_pred, target_names=['No toxico', 'Toxico']))"
|
| 477 |
]
|
| 478 |
},
|
| 479 |
{
|
| 480 |
"cell_type": "code",
|
| 481 |
+
"execution_count": 17,
|
| 482 |
"metadata": {},
|
| 483 |
"outputs": [
|
| 484 |
{
|
|
|
|
| 544 |
},
|
| 545 |
{
|
| 546 |
"cell_type": "code",
|
| 547 |
+
"execution_count": 18,
|
| 548 |
"metadata": {},
|
| 549 |
"outputs": [
|
| 550 |
{
|
|
|
|
| 620 |
},
|
| 621 |
{
|
| 622 |
"cell_type": "code",
|
| 623 |
+
"execution_count": 19,
|
| 624 |
"metadata": {},
|
| 625 |
"outputs": [
|
| 626 |
{
|
|
|
|
| 701 |
},
|
| 702 |
{
|
| 703 |
"cell_type": "code",
|
| 704 |
+
"execution_count": 20,
|
| 705 |
"metadata": {},
|
| 706 |
"outputs": [
|
| 707 |
{
|
|
|
|
| 794 |
},
|
| 795 |
{
|
| 796 |
"cell_type": "code",
|
| 797 |
+
"execution_count": 21,
|
| 798 |
"metadata": {},
|
| 799 |
"outputs": [
|
| 800 |
{
|
| 801 |
"name": "stderr",
|
| 802 |
"output_type": "stream",
|
| 803 |
"text": [
|
| 804 |
+
"2026/05/14 15:02:40 WARNING mlflow.models.model: `artifact_path` is deprecated. Please use `name` instead.\n",
|
| 805 |
+
"2026/05/14 15:02:43 WARNING mlflow.sklearn: Saving scikit-learn models in the pickle or cloudpickle format requires exercising caution because these formats rely on Python's object serialization mechanism, which can execute arbitrary code during deserialization. The recommended safe alternative is the 'skops' format. For more information, see: https://scikit-learn.org/stable/model_persistence.html\n"
|
| 806 |
]
|
| 807 |
},
|
| 808 |
{
|
|
|
|
| 810 |
"output_type": "stream",
|
| 811 |
"text": [
|
| 812 |
"MLflow registrado\n",
|
| 813 |
+
" Run ID : 733ff07e2ec44dd28d55c5c186a91dc0\n",
|
| 814 |
" Experimento: Youtube_project_experiment\n",
|
| 815 |
" Run name : lr_baseline\n"
|
| 816 |
]
|
|
|
|
| 844 |
" # Metrics test final\n",
|
| 845 |
" mlflow.log_metric('test_f1', f1_test)\n",
|
| 846 |
" mlflow.log_metric('train_f1', f1_train)\n",
|
| 847 |
+
" # Gap clásico Train vs Test\n",
|
| 848 |
+
" mlflow.log_metric('train_test_gap_pp', round(train_test_gap_pp, 2))\n",
|
| 849 |
+
"\n",
|
| 850 |
+
" # Consistencia CV vs Test\n",
|
| 851 |
+
" mlflow.log_metric('cv_f1_mean', round(cv_f1_mean, 4))\n",
|
| 852 |
+
" mlflow.log_metric('cv_test_gap_pp', round(cv_test_gap_pp, 2))\n",
|
| 853 |
" mlflow.log_metric('test_roc_auc', roc_auc_score(y_test, y_pred_proba))\n",
|
| 854 |
+
" \n",
|
| 855 |
" mlflow.log_metric('fp_count', len(fp))\n",
|
| 856 |
" mlflow.log_metric('fn_count', len(fn))\n",
|
| 857 |
"\n",
|
|
|
|
| 881 |
},
|
| 882 |
{
|
| 883 |
"cell_type": "code",
|
| 884 |
+
"execution_count": 22,
|
| 885 |
"metadata": {},
|
| 886 |
"outputs": [
|
| 887 |
{
|
|
|
|
| 929 |
},
|
| 930 |
{
|
| 931 |
"cell_type": "code",
|
| 932 |
+
"execution_count": 24,
|
| 933 |
"metadata": {},
|
| 934 |
"outputs": [
|
| 935 |
{
|
|
|
|
| 949 |
"Metricas test final:\n",
|
| 950 |
" F1-weighted : 0.7531\n",
|
| 951 |
" Train F1 : 0.8623\n",
|
| 952 |
+
"\n",
|
| 953 |
+
"Gaps:\n",
|
| 954 |
+
" Train-Test Gap : 10.91 pp\n",
|
| 955 |
+
" CV-Test Gap : 5.17 pp\n",
|
| 956 |
+
" Rubrica CV/Test: ⚠️ Alto gap\n",
|
| 957 |
"\n",
|
| 958 |
"Errores:\n",
|
| 959 |
+
" False Positives : 19\n",
|
| 960 |
+
" False Negatives : 30\n",
|
| 961 |
"\n",
|
| 962 |
"Coeficientes sospechosos detectados:\n",
|
| 963 |
" Revisar si stefan/peggy/hubbard aparecen en top coef\n",
|
| 964 |
+
" Si aparecen -> agregar a custom_stopwords\n",
|
| 965 |
+
" y re-ejecutar preprocessing\n",
|
| 966 |
"\n",
|
|
|
|
|
|
|
|
|
|
| 967 |
"\n"
|
| 968 |
]
|
| 969 |
}
|
|
|
|
| 972 |
"f1_cv = cv_results['test_f1'].mean()\n",
|
| 973 |
"roc_cv = cv_results['test_roc_auc'].mean()\n",
|
| 974 |
"\n",
|
| 975 |
+
"train_test_gap_pp = abs(f1_train - f1_test) * 100\n",
|
| 976 |
+
"cv_test_gap_pp = abs(f1_cv - f1_test) * 100\n",
|
| 977 |
+
"\n",
|
| 978 |
+
"cv_status = '✅ Estable' if cv_test_gap_pp < 5 else '⚠️ Alto gap'\n",
|
| 979 |
+
"\n",
|
| 980 |
"print(f\"\"\"\n",
|
| 981 |
"CONCLUSIONES — BASELINE\n",
|
| 982 |
+
"=======================================================\n",
|
| 983 |
"Modelo : Logistic Regression + TF-IDF\n",
|
| 984 |
"Dataset: 800 train / 200 test / {CV_FOLDS}-fold CV\n",
|
| 985 |
"\n",
|
|
|
|
| 990 |
"Metricas test final:\n",
|
| 991 |
" F1-weighted : {f1_test:.4f}\n",
|
| 992 |
" Train F1 : {f1_train:.4f}\n",
|
| 993 |
+
"\n",
|
| 994 |
+
"Gaps:\n",
|
| 995 |
+
" Train-Test Gap : {train_test_gap_pp:.2f} pp\n",
|
| 996 |
+
" CV-Test Gap : {cv_test_gap_pp:.2f} pp\n",
|
| 997 |
+
" Rubrica CV/Test: {cv_status}\n",
|
| 998 |
"\n",
|
| 999 |
"Errores:\n",
|
| 1000 |
+
" False Positives : {len(fp)}\n",
|
| 1001 |
+
" False Negatives : {len(fn)}\n",
|
| 1002 |
"\n",
|
| 1003 |
"Coeficientes sospechosos detectados:\n",
|
| 1004 |
" Revisar si stefan/peggy/hubbard aparecen en top coef\n",
|
| 1005 |
+
" Si aparecen -> agregar a custom_stopwords\n",
|
| 1006 |
+
" y re-ejecutar preprocessing\n",
|
| 1007 |
"\n",
|
|
|
|
|
|
|
|
|
|
| 1008 |
"\"\"\")"
|
| 1009 |
]
|
| 1010 |
}
|
notebooks/05_ensemble_v2.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
notebooks/06_Optuna_v2_final.ipynb
ADDED
|
@@ -0,0 +1,1032 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {
|
| 6 |
+
"id": "d8d7a65f"
|
| 7 |
+
},
|
| 8 |
+
"source": [
|
| 9 |
+
"# ⚙️ Notebook 06 — Optimización con Optuna + LinearSVC\n",
|
| 10 |
+
"## YouTube Hate Speech Detection\n",
|
| 11 |
+
"\n",
|
| 12 |
+
"### ¿Qué hace este notebook?\n",
|
| 13 |
+
"Optimizamos LR y RF con Optuna, añadimos LinearSVC como modelo adicional,\n",
|
| 14 |
+
"y comparamos 5 modelos en una tabla unificada.\n",
|
| 15 |
+
"\n",
|
| 16 |
+
"### Nota metodológica — dos métricas de GAP\n",
|
| 17 |
+
"Este notebook reporta dos métricas de gap distintas:\n",
|
| 18 |
+
"\n",
|
| 19 |
+
"| Métrica | Fórmula | Qué mide |\n",
|
| 20 |
+
"|---|---|---|\n",
|
| 21 |
+
"| `train_test_gap` | f1_train - f1_test | Memorización / ajuste in-sample |\n",
|
| 22 |
+
"| `cv_test_gap` | \\|cv_mean - f1_test\\| | Estabilidad de generalización (OOS vs OOS) |\n",
|
| 23 |
+
"\n",
|
| 24 |
+
"La rúbrica exige < 5pp. El `cv_test_gap` es la comparación\n",
|
| 25 |
+
"estadísticamente correcta porque ambos términos son out-of-sample.\n",
|
| 26 |
+
"Ver informe metodológico para la justificación completa.\n",
|
| 27 |
+
"\n",
|
| 28 |
+
"### Modelos comparados\n",
|
| 29 |
+
"1. LR baseline (cargado desde disco)\n",
|
| 30 |
+
"2. RF baseline (cargado desde disco)\n",
|
| 31 |
+
"3. LinearSVC (nuevo — muy competitivo en TF-IDF sparse)\n",
|
| 32 |
+
"4. LR tuned (Optuna)\n",
|
| 33 |
+
"5. RF tuned (Optuna)\n",
|
| 34 |
+
"\n",
|
| 35 |
+
"### Sin hardcoding\n",
|
| 36 |
+
"Todos los valores se calculan en el momento desde los modelos en disco."
|
| 37 |
+
]
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"cell_type": "markdown",
|
| 41 |
+
"metadata": {
|
| 42 |
+
"id": "26ed562f"
|
| 43 |
+
},
|
| 44 |
+
"source": [
|
| 45 |
+
"## 0. Imports y configuración"
|
| 46 |
+
]
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"cell_type": "code",
|
| 50 |
+
"execution_count": 3,
|
| 51 |
+
"metadata": {},
|
| 52 |
+
"outputs": [
|
| 53 |
+
{
|
| 54 |
+
"name": "stdout",
|
| 55 |
+
"output_type": "stream",
|
| 56 |
+
"text": [
|
| 57 |
+
"kernel\n"
|
| 58 |
+
]
|
| 59 |
+
}
|
| 60 |
+
],
|
| 61 |
+
"source": [
|
| 62 |
+
"print(\"kernel\")"
|
| 63 |
+
]
|
| 64 |
+
},
|
| 65 |
+
{
|
| 66 |
+
"cell_type": "code",
|
| 67 |
+
"execution_count": 4,
|
| 68 |
+
"metadata": {
|
| 69 |
+
"id": "d57a4942"
|
| 70 |
+
},
|
| 71 |
+
"outputs": [
|
| 72 |
+
{
|
| 73 |
+
"name": "stdout",
|
| 74 |
+
"output_type": "stream",
|
| 75 |
+
"text": [
|
| 76 |
+
"PROJECT_ROOT: /mnt/c/Users/under/Documents/F5/3_Projects/Project_9_Equipo3/Project_YT\n"
|
| 77 |
+
]
|
| 78 |
+
}
|
| 79 |
+
],
|
| 80 |
+
"source": [
|
| 81 |
+
"import sys, yaml, joblib, warnings\n",
|
| 82 |
+
"import numpy as np\n",
|
| 83 |
+
"import pandas as pd\n",
|
| 84 |
+
"import matplotlib.pyplot as plt\n",
|
| 85 |
+
"import mlflow, mlflow.sklearn\n",
|
| 86 |
+
"import optuna\n",
|
| 87 |
+
"from pathlib import Path\n",
|
| 88 |
+
"from sklearn.svm import LinearSVC\n",
|
| 89 |
+
"from sklearn.calibration import CalibratedClassifierCV\n",
|
| 90 |
+
"from sklearn.ensemble import RandomForestClassifier\n",
|
| 91 |
+
"from sklearn.linear_model import LogisticRegression\n",
|
| 92 |
+
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
|
| 93 |
+
"from sklearn.pipeline import Pipeline\n",
|
| 94 |
+
"from sklearn.model_selection import (\n",
|
| 95 |
+
" train_test_split, StratifiedKFold,\n",
|
| 96 |
+
" cross_val_score, cross_validate\n",
|
| 97 |
+
")\n",
|
| 98 |
+
"from sklearn.metrics import f1_score, roc_auc_score, classification_report\n",
|
| 99 |
+
"warnings.filterwarnings('ignore')\n",
|
| 100 |
+
"optuna.logging.set_verbosity(optuna.logging.WARNING)\n",
|
| 101 |
+
"\n",
|
| 102 |
+
"PROJECT_ROOT = Path.cwd().parent\n",
|
| 103 |
+
"sys.path.insert(0, str(PROJECT_ROOT))\n",
|
| 104 |
+
"plt.rcParams['figure.figsize'] = (12, 5)\n",
|
| 105 |
+
"plt.rcParams['axes.spines.top'] = False\n",
|
| 106 |
+
"plt.rcParams['axes.spines.right'] = False\n",
|
| 107 |
+
"print(f'PROJECT_ROOT: {PROJECT_ROOT}')"
|
| 108 |
+
]
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"cell_type": "code",
|
| 112 |
+
"execution_count": 5,
|
| 113 |
+
"metadata": {
|
| 114 |
+
"id": "44ca1814"
|
| 115 |
+
},
|
| 116 |
+
"outputs": [
|
| 117 |
+
{
|
| 118 |
+
"name": "stdout",
|
| 119 |
+
"output_type": "stream",
|
| 120 |
+
"text": [
|
| 121 |
+
"LR baseline: True | RF baseline: True\n"
|
| 122 |
+
]
|
| 123 |
+
}
|
| 124 |
+
],
|
| 125 |
+
"source": [
|
| 126 |
+
"CONFIG_FEAT = PROJECT_ROOT / 'configs' / 'features.yaml'\n",
|
| 127 |
+
"CONFIG_PIPE = PROJECT_ROOT / 'configs' / 'pipeline.yaml'\n",
|
| 128 |
+
"CONFIG_MOD = PROJECT_ROOT / 'configs' / 'models.yaml'\n",
|
| 129 |
+
"\n",
|
| 130 |
+
"with open(CONFIG_FEAT) as f: feat_cfg = yaml.safe_load(f)\n",
|
| 131 |
+
"with open(CONFIG_PIPE) as f: pipe_cfg = yaml.safe_load(f)\n",
|
| 132 |
+
"with open(CONFIG_MOD) as f: mod_cfg = yaml.safe_load(f)\n",
|
| 133 |
+
"\n",
|
| 134 |
+
"tfidf_cfg = feat_cfg['vectorization']['tfidf']\n",
|
| 135 |
+
"rf_cfg = mod_cfg['models']['random_forest']\n",
|
| 136 |
+
"lr_cfg = mod_cfg['models']['logistic_regression']\n",
|
| 137 |
+
"TARGET = pipe_cfg['data']['target_binary']\n",
|
| 138 |
+
"RAND = pipe_cfg['pipeline']['random_state']\n",
|
| 139 |
+
"TEST_SIZE = pipe_cfg['pipeline']['test_size']\n",
|
| 140 |
+
"CV_FOLDS = pipe_cfg['pipeline']['cv_folds']\n",
|
| 141 |
+
"\n",
|
| 142 |
+
"MODEL_LR = PROJECT_ROOT / 'models' / 'lr_baseline.joblib'\n",
|
| 143 |
+
"MODEL_RF = PROJECT_ROOT / 'models' / 'best_ensemble.joblib'\n",
|
| 144 |
+
"print(f'LR baseline: {MODEL_LR.exists()} | RF baseline: {MODEL_RF.exists()}')"
|
| 145 |
+
]
|
| 146 |
+
},
|
| 147 |
+
{
|
| 148 |
+
"cell_type": "markdown",
|
| 149 |
+
"metadata": {
|
| 150 |
+
"id": "e02c2668"
|
| 151 |
+
},
|
| 152 |
+
"source": [
|
| 153 |
+
"## 1. Carga de datos y split\n",
|
| 154 |
+
"\n",
|
| 155 |
+
"Mismo `random_state` fijo en todos los notebooks — comparación justa."
|
| 156 |
+
]
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"cell_type": "code",
|
| 160 |
+
"execution_count": 6,
|
| 161 |
+
"metadata": {
|
| 162 |
+
"id": "c2af1236"
|
| 163 |
+
},
|
| 164 |
+
"outputs": [
|
| 165 |
+
{
|
| 166 |
+
"name": "stdout",
|
| 167 |
+
"output_type": "stream",
|
| 168 |
+
"text": [
|
| 169 |
+
"Train: 800 | Test: 200\n"
|
| 170 |
+
]
|
| 171 |
+
}
|
| 172 |
+
],
|
| 173 |
+
"source": [
|
| 174 |
+
"PROCESSED = PROJECT_ROOT / 'data' / 'processed' / 'v2' / 'comments_preprocessed.csv'\n",
|
| 175 |
+
"df = pd.read_csv(PROCESSED)\n",
|
| 176 |
+
"df['clean_text'] = df['clean_text'].fillna('').astype(str)\n",
|
| 177 |
+
"X, y = df['clean_text'], df[TARGET]\n",
|
| 178 |
+
"\n",
|
| 179 |
+
"X_train, X_test, y_train, y_test = train_test_split(\n",
|
| 180 |
+
" X, y, test_size=TEST_SIZE, random_state=RAND, stratify=y\n",
|
| 181 |
+
")\n",
|
| 182 |
+
"cv_strategy = StratifiedKFold(n_splits=CV_FOLDS, shuffle=True, random_state=RAND)\n",
|
| 183 |
+
"print(f'Train: {len(X_train)} | Test: {len(X_test)}')"
|
| 184 |
+
]
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"cell_type": "markdown",
|
| 188 |
+
"metadata": {
|
| 189 |
+
"id": "9c263c3b"
|
| 190 |
+
},
|
| 191 |
+
"source": [
|
| 192 |
+
"## 2. Función de evaluación\n",
|
| 193 |
+
"\n",
|
| 194 |
+
"Calcula ambas métricas de gap. `cv_scores` es opcional —\n",
|
| 195 |
+
"solo los modelos evaluados con `cross_validate` tendrán `cv_test_gap`."
|
| 196 |
+
]
|
| 197 |
+
},
|
| 198 |
+
{
|
| 199 |
+
"cell_type": "code",
|
| 200 |
+
"execution_count": 7,
|
| 201 |
+
"metadata": {
|
| 202 |
+
"id": "8bc1d30a"
|
| 203 |
+
},
|
| 204 |
+
"outputs": [],
|
| 205 |
+
"source": [
|
| 206 |
+
"def evaluate_pipeline(pipeline, X_tr, y_tr, X_te, y_te, name, cv_scores=None):\n",
|
| 207 |
+
" \"\"\"\n",
|
| 208 |
+
" Evalua un pipeline y devuelve dict con ambas metricas de gap.\n",
|
| 209 |
+
"\n",
|
| 210 |
+
" train_test_gap : f1_train - f1_test (in-sample vs out-of-sample)\n",
|
| 211 |
+
" cv_test_gap : |cv_mean - f1_test| (OOS vs OOS — para rubrica)\n",
|
| 212 |
+
" \"\"\"\n",
|
| 213 |
+
" y_pred = pipeline.predict(X_te)\n",
|
| 214 |
+
" y_pred_proba = pipeline.predict_proba(X_te)[:, 1]\n",
|
| 215 |
+
" y_pred_train = pipeline.predict(X_tr)\n",
|
| 216 |
+
"\n",
|
| 217 |
+
" f1_te = f1_score(y_te, y_pred, average='weighted')\n",
|
| 218 |
+
" f1_tr = f1_score(y_tr, y_pred_train, average='weighted')\n",
|
| 219 |
+
" roc = roc_auc_score(y_te, y_pred_proba)\n",
|
| 220 |
+
"\n",
|
| 221 |
+
" cv_mean = cv_std = cv_test_gap = None\n",
|
| 222 |
+
" if cv_scores is not None:\n",
|
| 223 |
+
" cv_mean = cv_scores['test_score'].mean()\n",
|
| 224 |
+
" cv_std = cv_scores['test_score'].std()\n",
|
| 225 |
+
" cv_test_gap = abs(cv_mean - f1_te) * 100\n",
|
| 226 |
+
"\n",
|
| 227 |
+
" return {\n",
|
| 228 |
+
" 'name' : name,\n",
|
| 229 |
+
" 'f1_test' : round(f1_te, 4),\n",
|
| 230 |
+
" 'f1_train' : round(f1_tr, 4),\n",
|
| 231 |
+
" 'train_test_gap_pp': round((f1_tr - f1_te) * 100, 2),\n",
|
| 232 |
+
" 'cv_mean' : round(cv_mean, 4) if cv_mean is not None else None,\n",
|
| 233 |
+
" 'cv_std' : round(cv_std, 4) if cv_std is not None else None,\n",
|
| 234 |
+
" 'cv_test_gap_pp' : round(cv_test_gap, 2) if cv_test_gap is not None else None,\n",
|
| 235 |
+
" 'roc_auc' : round(roc, 4),\n",
|
| 236 |
+
" 'fp' : int(((y_te == False) & (y_pred == True)).sum()),\n",
|
| 237 |
+
" 'fn' : int(((y_te == True) & (y_pred == False)).sum()),\n",
|
| 238 |
+
" }"
|
| 239 |
+
]
|
| 240 |
+
},
|
| 241 |
+
{
|
| 242 |
+
"cell_type": "markdown",
|
| 243 |
+
"metadata": {
|
| 244 |
+
"id": "b7042f8a"
|
| 245 |
+
},
|
| 246 |
+
"source": [
|
| 247 |
+
"## 3. Helper TF-IDF y carga de baselines\n",
|
| 248 |
+
"\n",
|
| 249 |
+
"Los baselines se cargan desde disco y se evalúan sin re-entrenar."
|
| 250 |
+
]
|
| 251 |
+
},
|
| 252 |
+
{
|
| 253 |
+
"cell_type": "code",
|
| 254 |
+
"execution_count": 8,
|
| 255 |
+
"metadata": {
|
| 256 |
+
"id": "891692e9"
|
| 257 |
+
},
|
| 258 |
+
"outputs": [
|
| 259 |
+
{
|
| 260 |
+
"name": "stdout",
|
| 261 |
+
"output_type": "stream",
|
| 262 |
+
"text": [
|
| 263 |
+
" LR baseline F1=0.7531 | train-test=10.91pp | cv-test=5.17pp\n",
|
| 264 |
+
" RF baseline F1=0.7073 | train-test=11.16pp | cv-test=0.11pp\n"
|
| 265 |
+
]
|
| 266 |
+
}
|
| 267 |
+
],
|
| 268 |
+
"source": [
|
| 269 |
+
"def make_tfidf(**overrides):\n",
|
| 270 |
+
" params = {\n",
|
| 271 |
+
" 'max_features': tfidf_cfg['max_features'],\n",
|
| 272 |
+
" 'ngram_range' : tuple(tfidf_cfg['ngram_range']),\n",
|
| 273 |
+
" 'sublinear_tf': tfidf_cfg['sublinear_tf'],\n",
|
| 274 |
+
" 'min_df' : tfidf_cfg['min_df'],\n",
|
| 275 |
+
" 'analyzer' : 'word',\n",
|
| 276 |
+
" 'strip_accents': 'unicode',\n",
|
| 277 |
+
" }\n",
|
| 278 |
+
" params.update(overrides)\n",
|
| 279 |
+
" return TfidfVectorizer(**params)\n",
|
| 280 |
+
"\n",
|
| 281 |
+
"# Cargar baselines desde disco\n",
|
| 282 |
+
"lr_baseline_pipe = joblib.load(MODEL_LR)\n",
|
| 283 |
+
"rf_baseline_pipe = joblib.load(MODEL_RF)\n",
|
| 284 |
+
"\n",
|
| 285 |
+
"# CV scores para baselines\n",
|
| 286 |
+
"cv_lr_base = cross_validate(lr_baseline_pipe, X_train, y_train,\n",
|
| 287 |
+
" cv=cv_strategy, scoring='f1_weighted',\n",
|
| 288 |
+
" return_train_score=False, n_jobs=-1)\n",
|
| 289 |
+
"cv_rf_base = cross_validate(rf_baseline_pipe, X_train, y_train,\n",
|
| 290 |
+
" cv=cv_strategy, scoring='f1_weighted',\n",
|
| 291 |
+
" return_train_score=False, n_jobs=-1)\n",
|
| 292 |
+
"\n",
|
| 293 |
+
"metrics_lr_base = evaluate_pipeline(lr_baseline_pipe, X_train, y_train,\n",
|
| 294 |
+
" X_test, y_test, 'LR baseline', cv_lr_base)\n",
|
| 295 |
+
"metrics_rf_base = evaluate_pipeline(rf_baseline_pipe, X_train, y_train,\n",
|
| 296 |
+
" X_test, y_test, 'RF baseline', cv_rf_base)\n",
|
| 297 |
+
"\n",
|
| 298 |
+
"for m in [metrics_lr_base, metrics_rf_base]:\n",
|
| 299 |
+
" print(f\" {m['name']:15} F1={m['f1_test']:.4f} | \"\n",
|
| 300 |
+
" f\"train-test={m['train_test_gap_pp']}pp | \"\n",
|
| 301 |
+
" f\"cv-test={m['cv_test_gap_pp']}pp\")"
|
| 302 |
+
]
|
| 303 |
+
},
|
| 304 |
+
{
|
| 305 |
+
"cell_type": "markdown",
|
| 306 |
+
"metadata": {
|
| 307 |
+
"id": "27e76bb8"
|
| 308 |
+
},
|
| 309 |
+
"source": [
|
| 310 |
+
"## 4. LinearSVC — modelo adicional\n",
|
| 311 |
+
"\n",
|
| 312 |
+
"### ¿Por qué LinearSVC en TF-IDF?\n",
|
| 313 |
+
"LinearSVC usa un hiperplano de separación lineal que funciona\n",
|
| 314 |
+
"excepcionalmente bien con matrices sparse de alta dimensión.\n",
|
| 315 |
+
"No tiene probabilidades nativas, pero `CalibratedClassifierCV`\n",
|
| 316 |
+
"las añade mediante Platt scaling.\n",
|
| 317 |
+
"\n",
|
| 318 |
+
"Frecuentemente supera a LR en clasificación de texto porque\n",
|
| 319 |
+
"maximiza el margen entre clases en lugar de minimizar log-loss."
|
| 320 |
+
]
|
| 321 |
+
},
|
| 322 |
+
{
|
| 323 |
+
"cell_type": "code",
|
| 324 |
+
"execution_count": 9,
|
| 325 |
+
"metadata": {
|
| 326 |
+
"id": "6c750ae7"
|
| 327 |
+
},
|
| 328 |
+
"outputs": [
|
| 329 |
+
{
|
| 330 |
+
"name": "stdout",
|
| 331 |
+
"output_type": "stream",
|
| 332 |
+
"text": [
|
| 333 |
+
" LinearSVC F1=0.7250 | train-test=23.99pp | cv-test=4.03pp\n"
|
| 334 |
+
]
|
| 335 |
+
}
|
| 336 |
+
],
|
| 337 |
+
"source": [
|
| 338 |
+
"# LinearSVC con calibración para obtener predict_proba\n",
|
| 339 |
+
"svc_pipeline = Pipeline([\n",
|
| 340 |
+
" ('tfidf', make_tfidf()),\n",
|
| 341 |
+
" ('clf', CalibratedClassifierCV(\n",
|
| 342 |
+
" LinearSVC(\n",
|
| 343 |
+
" C=1.0, max_iter=2000,\n",
|
| 344 |
+
" class_weight='balanced',\n",
|
| 345 |
+
" random_state=RAND,\n",
|
| 346 |
+
" ),\n",
|
| 347 |
+
" cv=3\n",
|
| 348 |
+
" ))\n",
|
| 349 |
+
"])\n",
|
| 350 |
+
"\n",
|
| 351 |
+
"svc_pipeline.fit(X_train, y_train)\n",
|
| 352 |
+
"\n",
|
| 353 |
+
"cv_svc = cross_validate(svc_pipeline, X_train, y_train,\n",
|
| 354 |
+
" cv=cv_strategy, scoring='f1_weighted',\n",
|
| 355 |
+
" return_train_score=False, n_jobs=-1)\n",
|
| 356 |
+
"\n",
|
| 357 |
+
"metrics_svc = evaluate_pipeline(svc_pipeline, X_train, y_train,\n",
|
| 358 |
+
" X_test, y_test, 'LinearSVC', cv_svc)\n",
|
| 359 |
+
"\n",
|
| 360 |
+
"print(f\" LinearSVC F1={metrics_svc['f1_test']:.4f} | \"\n",
|
| 361 |
+
" f\"train-test={metrics_svc['train_test_gap_pp']}pp | \"\n",
|
| 362 |
+
" f\"cv-test={metrics_svc['cv_test_gap_pp']}pp\")"
|
| 363 |
+
]
|
| 364 |
+
},
|
| 365 |
+
{
|
| 366 |
+
"cell_type": "markdown",
|
| 367 |
+
"metadata": {
|
| 368 |
+
"id": "a3660dce"
|
| 369 |
+
},
|
| 370 |
+
"source": [
|
| 371 |
+
"## 5. Optuna — Logistic Regression\n",
|
| 372 |
+
"\n",
|
| 373 |
+
"Búsqueda bayesiana sobre el pipeline completo TF-IDF + LR.\n",
|
| 374 |
+
"Optuna solo ve X_train — X_test se reserva para evaluación final."
|
| 375 |
+
]
|
| 376 |
+
},
|
| 377 |
+
{
|
| 378 |
+
"cell_type": "code",
|
| 379 |
+
"execution_count": 10,
|
| 380 |
+
"metadata": {
|
| 381 |
+
"id": "852e0804"
|
| 382 |
+
},
|
| 383 |
+
"outputs": [
|
| 384 |
+
{
|
| 385 |
+
"name": "stdout",
|
| 386 |
+
"output_type": "stream",
|
| 387 |
+
"text": [
|
| 388 |
+
"Optimizando LR — 60 trials...\n"
|
| 389 |
+
]
|
| 390 |
+
},
|
| 391 |
+
{
|
| 392 |
+
"name": "stderr",
|
| 393 |
+
"output_type": "stream",
|
| 394 |
+
"text": [
|
| 395 |
+
"Best trial: 52. Best value: 0.710353: 100%|██████████| 60/60 [00:05<00:00, 10.21it/s]"
|
| 396 |
+
]
|
| 397 |
+
},
|
| 398 |
+
{
|
| 399 |
+
"name": "stdout",
|
| 400 |
+
"output_type": "stream",
|
| 401 |
+
"text": [
|
| 402 |
+
"Mejor F1 CV: 0.7104\n",
|
| 403 |
+
"Params : {'ngram_range': '1_2', 'max_features': 4045, 'min_df': 2, 'sublinear_tf': False, 'C': 0.3235215031170205}\n"
|
| 404 |
+
]
|
| 405 |
+
},
|
| 406 |
+
{
|
| 407 |
+
"name": "stderr",
|
| 408 |
+
"output_type": "stream",
|
| 409 |
+
"text": [
|
| 410 |
+
"\n"
|
| 411 |
+
]
|
| 412 |
+
}
|
| 413 |
+
],
|
| 414 |
+
"source": [
|
| 415 |
+
"N_TRIALS = 60\n",
|
| 416 |
+
"\n",
|
| 417 |
+
"def objective_lr(trial):\n",
|
| 418 |
+
" ngram_str = trial.suggest_categorical('ngram_range', ['1_1', '1_2'])\n",
|
| 419 |
+
" ngram = (1,1) if ngram_str == '1_1' else (1,2)\n",
|
| 420 |
+
" pipe = Pipeline([\n",
|
| 421 |
+
" ('tfidf', make_tfidf(\n",
|
| 422 |
+
" max_features = trial.suggest_int('max_features', 500, 5000),\n",
|
| 423 |
+
" min_df = trial.suggest_int('min_df', 2, 6),\n",
|
| 424 |
+
" ngram_range = ngram,\n",
|
| 425 |
+
" sublinear_tf = trial.suggest_categorical('sublinear_tf', [True, False]),\n",
|
| 426 |
+
" )),\n",
|
| 427 |
+
" ('clf', LogisticRegression(\n",
|
| 428 |
+
" C = trial.suggest_float('C', 0.01, 2.0, log=True),\n",
|
| 429 |
+
" max_iter = 1000,\n",
|
| 430 |
+
" class_weight = 'balanced',\n",
|
| 431 |
+
" solver = 'lbfgs',\n",
|
| 432 |
+
" random_state = RAND,\n",
|
| 433 |
+
" ))\n",
|
| 434 |
+
" ])\n",
|
| 435 |
+
" return cross_val_score(pipe, X_train, y_train,\n",
|
| 436 |
+
" cv=cv_strategy, scoring='f1_weighted',\n",
|
| 437 |
+
" n_jobs=-1).mean()\n",
|
| 438 |
+
"\n",
|
| 439 |
+
"study_lr = optuna.create_study(direction='maximize',\n",
|
| 440 |
+
" sampler=optuna.samplers.TPESampler(seed=RAND),\n",
|
| 441 |
+
" study_name='lr_optimization')\n",
|
| 442 |
+
"print(f'Optimizando LR — {N_TRIALS} trials...')\n",
|
| 443 |
+
"study_lr.optimize(objective_lr, n_trials=N_TRIALS, show_progress_bar=True)\n",
|
| 444 |
+
"print(f'Mejor F1 CV: {study_lr.best_value:.4f}')\n",
|
| 445 |
+
"print(f'Params : {study_lr.best_trial.params}')"
|
| 446 |
+
]
|
| 447 |
+
},
|
| 448 |
+
{
|
| 449 |
+
"cell_type": "code",
|
| 450 |
+
"execution_count": 11,
|
| 451 |
+
"metadata": {
|
| 452 |
+
"id": "27c75e2a"
|
| 453 |
+
},
|
| 454 |
+
"outputs": [
|
| 455 |
+
{
|
| 456 |
+
"name": "stdout",
|
| 457 |
+
"output_type": "stream",
|
| 458 |
+
"text": [
|
| 459 |
+
"LR tuned F1=0.7579 | train-test=14.07pp | cv-test=4.76pp\n"
|
| 460 |
+
]
|
| 461 |
+
}
|
| 462 |
+
],
|
| 463 |
+
"source": [
|
| 464 |
+
"bp_lr = study_lr.best_trial.params\n",
|
| 465 |
+
"ngram_lr = (1,1) if bp_lr['ngram_range'] == '1_1' else (1,2)\n",
|
| 466 |
+
"\n",
|
| 467 |
+
"lr_tuned_pipe = Pipeline([\n",
|
| 468 |
+
" ('tfidf', make_tfidf(\n",
|
| 469 |
+
" max_features=bp_lr['max_features'], min_df=bp_lr['min_df'],\n",
|
| 470 |
+
" ngram_range=ngram_lr, sublinear_tf=bp_lr['sublinear_tf'],\n",
|
| 471 |
+
" )),\n",
|
| 472 |
+
" ('clf', LogisticRegression(\n",
|
| 473 |
+
" C=bp_lr['C'], max_iter=1000,\n",
|
| 474 |
+
" class_weight='balanced', solver='lbfgs', random_state=RAND,\n",
|
| 475 |
+
" ))\n",
|
| 476 |
+
"])\n",
|
| 477 |
+
"lr_tuned_pipe.fit(X_train, y_train)\n",
|
| 478 |
+
"\n",
|
| 479 |
+
"cv_lr_tuned = cross_validate(lr_tuned_pipe, X_train, y_train,\n",
|
| 480 |
+
" cv=cv_strategy, scoring='f1_weighted',\n",
|
| 481 |
+
" return_train_score=False, n_jobs=-1)\n",
|
| 482 |
+
"\n",
|
| 483 |
+
"metrics_lr_tuned = evaluate_pipeline(lr_tuned_pipe, X_train, y_train,\n",
|
| 484 |
+
" X_test, y_test, 'LR tuned', cv_lr_tuned)\n",
|
| 485 |
+
"print(f\"LR tuned F1={metrics_lr_tuned['f1_test']:.4f} | \"\n",
|
| 486 |
+
" f\"train-test={metrics_lr_tuned['train_test_gap_pp']}pp | \"\n",
|
| 487 |
+
" f\"cv-test={metrics_lr_tuned['cv_test_gap_pp']}pp\")"
|
| 488 |
+
]
|
| 489 |
+
},
|
| 490 |
+
{
|
| 491 |
+
"cell_type": "markdown",
|
| 492 |
+
"metadata": {
|
| 493 |
+
"id": "927c7687"
|
| 494 |
+
},
|
| 495 |
+
"source": [
|
| 496 |
+
"## 6. Optuna — Random Forest\n",
|
| 497 |
+
"\n",
|
| 498 |
+
"Búsqueda sobre n_estimators, max_depth y min_samples_leaf.\n",
|
| 499 |
+
"RF con bigramas es muy lento — usamos solo unigramas."
|
| 500 |
+
]
|
| 501 |
+
},
|
| 502 |
+
{
|
| 503 |
+
"cell_type": "code",
|
| 504 |
+
"execution_count": 12,
|
| 505 |
+
"metadata": {
|
| 506 |
+
"id": "f37dc04c"
|
| 507 |
+
},
|
| 508 |
+
"outputs": [
|
| 509 |
+
{
|
| 510 |
+
"name": "stdout",
|
| 511 |
+
"output_type": "stream",
|
| 512 |
+
"text": [
|
| 513 |
+
"Optimizando RF — 60 trials...\n"
|
| 514 |
+
]
|
| 515 |
+
},
|
| 516 |
+
{
|
| 517 |
+
"name": "stderr",
|
| 518 |
+
"output_type": "stream",
|
| 519 |
+
"text": [
|
| 520 |
+
"Best trial: 37. Best value: 0.713928: 100%|██████████| 60/60 [00:57<00:00, 1.05it/s]"
|
| 521 |
+
]
|
| 522 |
+
},
|
| 523 |
+
{
|
| 524 |
+
"name": "stdout",
|
| 525 |
+
"output_type": "stream",
|
| 526 |
+
"text": [
|
| 527 |
+
"Mejor F1 CV: 0.7139\n",
|
| 528 |
+
"Params : {'max_features': 1380, 'min_df': 6, 'n_estimators': 171, 'max_depth': 9, 'min_samples_leaf': 2}\n"
|
| 529 |
+
]
|
| 530 |
+
},
|
| 531 |
+
{
|
| 532 |
+
"name": "stderr",
|
| 533 |
+
"output_type": "stream",
|
| 534 |
+
"text": [
|
| 535 |
+
"\n"
|
| 536 |
+
]
|
| 537 |
+
}
|
| 538 |
+
],
|
| 539 |
+
"source": [
|
| 540 |
+
"def objective_rf(trial):\n",
|
| 541 |
+
" pipe = Pipeline([\n",
|
| 542 |
+
" ('tfidf', make_tfidf(\n",
|
| 543 |
+
" max_features = trial.suggest_int('max_features', 500, 5000),\n",
|
| 544 |
+
" min_df = trial.suggest_int('min_df', 2, 6),\n",
|
| 545 |
+
" ngram_range = (1, 1),\n",
|
| 546 |
+
" sublinear_tf = True,\n",
|
| 547 |
+
" )),\n",
|
| 548 |
+
" ('clf', RandomForestClassifier(\n",
|
| 549 |
+
" n_estimators = trial.suggest_int('n_estimators', 50, 300),\n",
|
| 550 |
+
" max_depth = trial.suggest_int('max_depth', 4, 12),\n",
|
| 551 |
+
" min_samples_leaf = trial.suggest_int('min_samples_leaf', 2, 8),\n",
|
| 552 |
+
" max_features = 'sqrt',\n",
|
| 553 |
+
" class_weight = 'balanced',\n",
|
| 554 |
+
" random_state = RAND, n_jobs=-1,\n",
|
| 555 |
+
" ))\n",
|
| 556 |
+
" ])\n",
|
| 557 |
+
" return cross_val_score(pipe, X_train, y_train,\n",
|
| 558 |
+
" cv=cv_strategy, scoring='f1_weighted',\n",
|
| 559 |
+
" n_jobs=-1).mean()\n",
|
| 560 |
+
"\n",
|
| 561 |
+
"study_rf = optuna.create_study(direction='maximize',\n",
|
| 562 |
+
" sampler=optuna.samplers.TPESampler(seed=RAND),\n",
|
| 563 |
+
" study_name='rf_optimization')\n",
|
| 564 |
+
"print(f'Optimizando RF — {N_TRIALS} trials...')\n",
|
| 565 |
+
"study_rf.optimize(objective_rf, n_trials=N_TRIALS, show_progress_bar=True)\n",
|
| 566 |
+
"print(f'Mejor F1 CV: {study_rf.best_value:.4f}')\n",
|
| 567 |
+
"print(f'Params : {study_rf.best_trial.params}')"
|
| 568 |
+
]
|
| 569 |
+
},
|
| 570 |
+
{
|
| 571 |
+
"cell_type": "code",
|
| 572 |
+
"execution_count": 13,
|
| 573 |
+
"metadata": {
|
| 574 |
+
"id": "b5e6782e"
|
| 575 |
+
},
|
| 576 |
+
"outputs": [
|
| 577 |
+
{
|
| 578 |
+
"name": "stdout",
|
| 579 |
+
"output_type": "stream",
|
| 580 |
+
"text": [
|
| 581 |
+
"RF tuned F1=0.6924 | train-test=12.09pp | cv-test=2.15pp\n"
|
| 582 |
+
]
|
| 583 |
+
}
|
| 584 |
+
],
|
| 585 |
+
"source": [
|
| 586 |
+
"bp_rf = study_rf.best_trial.params\n",
|
| 587 |
+
"\n",
|
| 588 |
+
"rf_tuned_pipe = Pipeline([\n",
|
| 589 |
+
" ('tfidf', make_tfidf(\n",
|
| 590 |
+
" max_features=bp_rf['max_features'], min_df=bp_rf['min_df'],\n",
|
| 591 |
+
" ngram_range=(1,1), sublinear_tf=True,\n",
|
| 592 |
+
" )),\n",
|
| 593 |
+
" ('clf', RandomForestClassifier(\n",
|
| 594 |
+
" n_estimators=bp_rf['n_estimators'], max_depth=bp_rf['max_depth'],\n",
|
| 595 |
+
" min_samples_leaf=bp_rf['min_samples_leaf'],\n",
|
| 596 |
+
" max_features='sqrt', class_weight='balanced',\n",
|
| 597 |
+
" random_state=RAND, n_jobs=-1,\n",
|
| 598 |
+
" ))\n",
|
| 599 |
+
"])\n",
|
| 600 |
+
"rf_tuned_pipe.fit(X_train, y_train)\n",
|
| 601 |
+
"\n",
|
| 602 |
+
"cv_rf_tuned = cross_validate(rf_tuned_pipe, X_train, y_train,\n",
|
| 603 |
+
" cv=cv_strategy, scoring='f1_weighted',\n",
|
| 604 |
+
" return_train_score=False, n_jobs=-1)\n",
|
| 605 |
+
"\n",
|
| 606 |
+
"metrics_rf_tuned = evaluate_pipeline(rf_tuned_pipe, X_train, y_train,\n",
|
| 607 |
+
" X_test, y_test, 'RF tuned', cv_rf_tuned)\n",
|
| 608 |
+
"print(f\"RF tuned F1={metrics_rf_tuned['f1_test']:.4f} | \"\n",
|
| 609 |
+
" f\"train-test={metrics_rf_tuned['train_test_gap_pp']}pp | \"\n",
|
| 610 |
+
" f\"cv-test={metrics_rf_tuned['cv_test_gap_pp']}pp\")"
|
| 611 |
+
]
|
| 612 |
+
},
|
| 613 |
+
{
|
| 614 |
+
"cell_type": "markdown",
|
| 615 |
+
"metadata": {
|
| 616 |
+
"id": "dae94e0e"
|
| 617 |
+
},
|
| 618 |
+
"source": [
|
| 619 |
+
"## 7. Tabla comparativa — 5 modelos\n",
|
| 620 |
+
"\n",
|
| 621 |
+
"Se reportan ambos gaps. La columna `cv_test_gap` es la referencia\n",
|
| 622 |
+
"para la rúbrica (OOS vs OOS). La columna `train_test_gap` se mantiene\n",
|
| 623 |
+
"para análisis de memorización."
|
| 624 |
+
]
|
| 625 |
+
},
|
| 626 |
+
{
|
| 627 |
+
"cell_type": "code",
|
| 628 |
+
"execution_count": 14,
|
| 629 |
+
"metadata": {
|
| 630 |
+
"id": "c8f54e1c"
|
| 631 |
+
},
|
| 632 |
+
"outputs": [
|
| 633 |
+
{
|
| 634 |
+
"name": "stdout",
|
| 635 |
+
"output_type": "stream",
|
| 636 |
+
"text": [
|
| 637 |
+
"COMPARATIVA FINAL — 5 MODELOS\n",
|
| 638 |
+
"====================================================================================================\n",
|
| 639 |
+
"Modelo F1 Test F1 Train TrTe gap CV Mean CV Std CV-Te gap FP FN Rubrica\n",
|
| 640 |
+
"----------------------------------------------------------------------------------------------------\n",
|
| 641 |
+
" LR baseline 0.7531 0.8623 10.91 0.7015 0.0312 5.17 19 30 ⚠️ 5.2pp\n",
|
| 642 |
+
" LR tuned 0.7579 0.8987 14.07 0.7104 0.0353 4.76 18 30 ✅ OK\n",
|
| 643 |
+
" RF baseline 0.7073 0.8189 11.16 0.7062 0.0318 0.11 11 45 ✅ OK\n",
|
| 644 |
+
" RF tuned 0.6924 0.8133 12.09 0.7139 0.0334 2.15 9 49 ✅ OK\n",
|
| 645 |
+
" LinearSVC 0.7250 0.9649 23.99 0.6847 0.0276 4.03 17 37 ✅ OK\n",
|
| 646 |
+
"\n",
|
| 647 |
+
"GANADOR (F1 test): LR tuned\n",
|
| 648 |
+
" F1 test : 0.7579\n",
|
| 649 |
+
" cv_test_gap : 4.76pp\n",
|
| 650 |
+
" train_test_gap: 14.07pp\n"
|
| 651 |
+
]
|
| 652 |
+
}
|
| 653 |
+
],
|
| 654 |
+
"source": [
|
| 655 |
+
"all_metrics = [\n",
|
| 656 |
+
" metrics_lr_base, metrics_lr_tuned,\n",
|
| 657 |
+
" metrics_rf_base, metrics_rf_tuned,\n",
|
| 658 |
+
" metrics_svc\n",
|
| 659 |
+
"]\n",
|
| 660 |
+
"\n",
|
| 661 |
+
"comp_df = pd.DataFrame(all_metrics).set_index('name')\n",
|
| 662 |
+
"\n",
|
| 663 |
+
"# Rubrica basada en cv_test_gap\n",
|
| 664 |
+
"comp_df['rubrica'] = comp_df['cv_test_gap_pp'].apply(\n",
|
| 665 |
+
" lambda x: '✅ OK' if x is not None and x < 5 else f'⚠️ {x:.1f}pp' if x else 'N/A'\n",
|
| 666 |
+
")\n",
|
| 667 |
+
"\n",
|
| 668 |
+
"print('COMPARATIVA FINAL — 5 MODELOS')\n",
|
| 669 |
+
"print('=' * 100)\n",
|
| 670 |
+
"print(f\"{'Modelo':16} {'F1 Test':>9} {'F1 Train':>9} \"\n",
|
| 671 |
+
" f\"{'TrTe gap':>9} {'CV Mean':>9} {'CV Std':>8} \"\n",
|
| 672 |
+
" f\"{'CV-Te gap':>10} {'FP':>4} {'FN':>4} {'Rubrica':>12}\")\n",
|
| 673 |
+
"print('-' * 100)\n",
|
| 674 |
+
"for name, row in comp_df.iterrows():\n",
|
| 675 |
+
" cv_gap_str = f\"{row['cv_test_gap_pp']:.2f}\" if row['cv_test_gap_pp'] else 'N/A'\n",
|
| 676 |
+
" cv_mean_str = f\"{row['cv_mean']:.4f}\" if row['cv_mean'] else 'N/A'\n",
|
| 677 |
+
" cv_std_str = f\"{row['cv_std']:.4f}\" if row['cv_std'] else 'N/A'\n",
|
| 678 |
+
" print(f\" {name:14} {row['f1_test']:>9.4f} {row['f1_train']:>9.4f} \"\n",
|
| 679 |
+
" f\"{row['train_test_gap_pp']:>9.2f} {cv_mean_str:>9} {cv_std_str:>8} \"\n",
|
| 680 |
+
" f\"{cv_gap_str:>10} {row['fp']:>4} {row['fn']:>4} {row['rubrica']:>12}\")\n",
|
| 681 |
+
"\n",
|
| 682 |
+
"best_name = comp_df['f1_test'].idxmax()\n",
|
| 683 |
+
"print(f'\\nGANADOR (F1 test): {best_name}')\n",
|
| 684 |
+
"print(f\" F1 test : {comp_df.loc[best_name, 'f1_test']:.4f}\")\n",
|
| 685 |
+
"print(f\" cv_test_gap : {comp_df.loc[best_name, 'cv_test_gap_pp']:.2f}pp\")\n",
|
| 686 |
+
"print(f\" train_test_gap: {comp_df.loc[best_name, 'train_test_gap_pp']:.2f}pp\")"
|
| 687 |
+
]
|
| 688 |
+
},
|
| 689 |
+
{
|
| 690 |
+
"cell_type": "code",
|
| 691 |
+
"execution_count": 15,
|
| 692 |
+
"metadata": {
|
| 693 |
+
"id": "243374d0"
|
| 694 |
+
},
|
| 695 |
+
"outputs": [
|
| 696 |
+
{
|
| 697 |
+
"data": {
|
| 698 |
+
"image/png": "iVBORw0KGgoAAAANSUhEUgAABW0AAAJOCAYAAADMCCWlAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAebBJREFUeJzs3Xd4FOXax/HfphPSqAklEECKCIRepKooRVEUFAGlHORIF3hVQKWqgChFEcWjAh4VRUQQpSgioSiKIMVCEem9JoFACsm8f+QwZslukg1JdpJ8P9c117U7c8/MPTu7s0/uPPuMzTAMQwAAAAAAAAAAS/BwdwIAAAAAAAAAgH9QtAUAAAAAAAAAC6FoCwAAAAAAAAAWQtEWAAAAAAAAACyEoi0AAAAAAAAAWAhFWwAAAAAAAACwEIq2AAAAAAAAAGAhFG0BAAAAAAAAwEIo2gIAAAAAAACAhVC0BQA3OHTokGw2m2w2m9q0aePudAAAALKENkzuOHbsmPz8/GSz2fTxxx/nyT4Nw1D16tVls9k0ePDgPNlnftOnTx/z/R4VFeXudPJMVFSUedx9+vS56e1x3QCyh6ItAMuZMGGC+aXuaAoJCbGLnz9/vh5++GGVK1fOLi4r0jZIMpsiIiJy/mDzsTZt2mT5tVuwYEGO73/BggWaMGGCJkyYoOjo6BzfPgAg/4mLi9PMmTPVqlUrlShRQn5+fqpUqZLuu+8+ffTRR0pMTHR3ivkC37F576WXXlJCQoLKli2rRx55JE/2abPZ9NRTT0mS3nvvPR09ejRP9gsgdx06dMi8hi9btszd6eAmeLk7AQC4Wa+//rp27tzp7jRcUqZMGW3cuFGSFBwc7OZs8qcFCxZo/fr1klJ7QdxYzAcAFC5//vmnOnXqpAMHDtjNP3TokA4dOqQVK1aoVq1aqlu3rnsSzEcy+o6lDZPzzp8/r/nz50uSevXqJW9v7zzb9+OPP66RI0cqISFBr7/+ul577bU82zcKD64beevQoUOaOHGiJKl3797q3LmzexNCtlG0BWBpHTp00HPPPWc3z8vL/tJVvXp1NWjQQI0aNdLAgQNd2n69evXMBoQkbd++XcOGDZMkhYWFafHixeYyPz8/h9tISUlRYmKi0+WO+Pr6qkWLFi7lajWzZ89WTEyM+Xzo0KHasWOHJOm5555Thw4dzGXVqlXL6/QAAIXIhQsX1KFDBx05ckSSVLZsWT3zzDOqXbu2Ll26pPXr15tFscIkO22UzBSENozVLFy40OwF3qVLlzzdd2BgoNq2basVK1bo448/1tSpU9O1tQuyuLg4FS1a1N1p5Cp3H+OVK1fk7+9f6K4b14/bGXefF+QTBgBYzPjx4w1JhiSjd+/eWV7v6tWr5nrZvbytW7fOXL9ixYpO83r//feNF1980ahQoYLh4eFhrFu3zrh8+bIxYMAAo0GDBkbp0qUNb29vIygoyGjatKnx3nvv2W3r4MGD5rZat27tcB/z5s0zZs6caVSpUsXw8fEx6tSpY6xduzbD/BMTE40SJUoYkozixYsbSUlJdsurVatmSDJ8fX2NCxcuGIZhGJ9//rnRvHlzIygoyPD29jZCQ0ON5s2bG88++6yRkpKS5deudevWZu7z589Pl9f06dON+vXrG/7+/oa/v7/RuHFj48MPP0y3nXXr1hl33XWXUaxYMcPLy8soWbKk0ahRI2PYsGFGdHS03TlyNB08eDDLOQMACoYxY8aY3wPBwcHGsWPH0sWcPn3aOH/+vPk8ISHBmDp1qhEZGWn4+/sbRYoUMerUqWNMmTLFSEhIsFu3YsWK5vYPHTpk3HvvvYa/v79RoUIFY86cOYZhpH5/NWzY0PD19TWqVq1qLFq0yG4bN37Hz5gxw6hcubLh6+tr1K9f3/j222/t4tevX2907drVuOWWW4zg4GDD29vbKFOmjPHwww8bO3fudLrtm2mjZOU71lEbZvr06ea8GTNm2OX28ccfm8ueeeYZl48tI4sXLzZuu+02w9fX17jtttuMRYsW2b0Wadsj7733nnHPPfcY4eHhhr+/v+Hr62vccsstxpAhQ4yzZ8/abTdtm2bXrl3GoEGDjJIlSxr+/v7Gvffea+zfv98ufseOHcb9999vlCpVyvDy8jKKFy9uREZGGk8++aRx+PDhTI/jjjvuMNtuN3I1l7Tv1VOnThk9evQwgoODjaCgIKNHjx7G6dOn0+1j1qxZ5jobNmzINN/csGTJEjOHYcOG2S374YcfzGUPP/ywOf/SpUvG+PHjjdtuu83w8/MzAgMDjdatWxsrV660W//G9+z69euNpk2bGn5+fnZ/a8yePduoXLmy4efnZzRq1MhYu3at0bt3b3PddevW2W13w4YNRqdOnYySJUsa3t7eRkREhDFixAizjZ2Zw4cPGxs3bsx0+uWXXzLdVtrPbu/evY0lS5YYkZGRho+PjzF+/Hinf3sYhv17xtn21q5dazRq1Mjw9fU1IiIijJkzZ9ptY/78+Wb8+PHjjbffftuoVq2a4eXlZcyfPz/D/V+5csV4+eWXjXr16hlFixY1/P39jZo1axpjx441Y3LqmnHkyBFj8ODBRpUqVQxfX18jJCTEaNq0qfHpp5/axW3bts3o2rWrERoaav591KVLF2Pr1q03ddzO3ns7d+40Hn30USMsLMzw9vY2ypYta/Tr1884evRoumPI7PVKe824cbq+z5x6PZH7KNoCsJz8ULStXLmy3b7WrVtnnDx5MsM/dCZOnGhuKytF2xv3IckIDAzMtCE4YMAAMz7tH4A7d+405z/44IOGYRhGVFSU4eHh4TTnG4u+GXFWtE1MTDTuuusup/t49tlnzdg9e/YYRYoUcRr7119/UbQFAKST9jtzwoQJmcbHx8cbrVq1cvpd0qpVK7vCbdqiRpUqVdLFjx492vDx8bGb5+HhYezZs8fcRtrv+OrVq6fbhre3t13BbMqUKU7z8/f3N/7880+H276ZNkp2i7YnTpww2xO333673Wv94IMPmvHXiwGuHJszS5YsMWw2W7r1IyMjHbZH2rVr53Sft956q3H16lUzNm2bxtG5KleunHHu3DnDMAzj3LlzRqlSpZxue82aNRkeR1JSklG0aFFDknHXXXelW+5KLoZh/151FF+nTh0jPj7ebh8bNmwwl0+ZMiXT1z43xMfHGyEhIYYko3z58nYdB0aMGGHm9+WXXxqGYRjR0dFG7dq1nb7u1/+ZYhj27e6yZcsafn5+5vPrf2u8+uqrDj+Tt956q91n6bp3333XaRu6evXqWSrcpv3cZjTd+DeJI2k/u5UqVbL7bNxs0fbWW281vL290+WV9r2Stnh54zUoo6JtTEyMUbdu3UyPOyeuGdu3bzeKFy/ucBtp/+b88ssvHR7v9ffE9fegq8ft7L23cuVKw9fX1+H+wsLCjAMHDrj0emWlaJsTryfyBjciA2BpH3zwQbqbWuXEHUxv1oEDB9SzZ0+tWLFC//3vf1WuXDn5+/tr0qRJ+uyzz/Ttt99q3bp1+vTTT1W1alVJ0quvvurSDVAOHDigUaNGafny5YqMjJQkXbp0SQsXLsxwvccee8x8/Pnnnzt8fD3mq6++UkpKiiRp8uTJWrt2rT799FO98MILqlmzZpZv6JaR119/XWvXrpUkNW3aVEuXLtXnn3+u6tWrS5KmTZumn3/+WZK0Zs0aXb16VZL01FNPae3atfr888/10ksvqWHDhrLZbOaQFmnHJFy8eLE2btyojRs3qkyZMjedMwAg/7h8+bLdOLYtW7bMdJ1Zs2Zpw4YNkqTw8HAtXLhQn3zyiSpUqCBJ2rBhg2bOnOlwXU9PTy1dutS8gZMkTZ06VY0aNdJXX32lhx56SFLq0ATvvfeew23s379fkyZN0tdff6127dpJkpKSkjR8+HAzpnHjxpo9e7aWL1+udevWac2aNXrllVckpf7s1ll+N9NGye53bJkyZXTnnXdKkjZv3qwTJ05ISv357+rVqyVJtWvXVp06dW7q2K5LTk7W8OHDZRiGJOnhhx/WihUrNGzYMKf3OejWrZvmzZunFStWKCoqSitWrFCvXr0kSbt379YXX3zhcL3r480uXrxYlStXliQdP35ckydPNo/37NmzkqTu3btrzZo1WrZsmV577TW1bt1anp6eGR7LkSNHFBcXJ0m65ZZbMozNLJcbJSUladGiRVqwYIFKliwpSdq1a5f+85//2MWl3e+ff/6ZYQ65xdfXV127dpUkHTt2TD/99JO5bMmSJZKkEiVKmMNvPf/88/rtt98kSR07djTf72FhYZKkESNGOLyx2okTJ1S+fHl99NFHWrlypTp37qyLFy9q3LhxZszQoUO1YsUKdevWTbt37063jePHj2vIkCFKSUlRYGCgZs+erW+++UZ9+/aVJO3duzfd8G556eDBg2rYsKEWL16sZcuWZemamJHdu3ebn7ERI0aY8ydMmKBz586liz9w4IDatWunZcuW6bPPPtNtt93mdNvPP/+8Obxa8eLFNXPmTK1evVqzZ89WjRo1zLibvWYYhqFevXrpwoULkqRatWrpww8/1IoVKzRu3DiVKFFCUuo1q1+/fkpKSpIkDRw4UCtXrtSgQYMkpX6m+vXrZ35mXTluR++9K1euqHfv3kpISJCXl5defvllffvtt3r22WclSadOnTL3ndXXa/bs2XrjjTfMdTp06GBew59//vkceT2Rh9xdNQaAG2X2X2dnvW/zsqdt8+bNHa7/1VdfGXfffbdRsmRJw9PTM13u13uYZKWn7QMPPGDO//TTT835w4cPz/AYUlJSjEqVKhmSjNKlSxvXrl0zDMMwewqEhISYPSxGjx5tbnfx4sV2PTVc5aynbdoeL5999pn5U69JkyaZ84cMGWIYhmHMnTvXnDdr1izj5MmTWdofvWsBoPA6duyY3Xft7t27M12nTp06ZvxXX31lzv/qq6/M+ZGRkeb8tD3Rrv+K5ezZs3b7vf4z9V9++cWc17lzZ3Mbab/je/bsac6Pjo42/P39zWVHjhwxDMMw4uLijAkTJhi1a9e2W359qlevnsNt32wbxTAy/o511oZJ2+PsjTfeMAzDMBYtWmTOmzp1qhnryrE58vPPP9v1REtMTDSXNW3a1GF75MiRI0b//v2NSpUqOezVNmLECIfH/+6775rz16xZY9erzjAMY/Xq1ea8Z5991jhy5IhLw0ulPZbRo0enW+5KLoZh/15N28v33XffNeffeeeddvtI24bu0KFDhvlGR0dn6Sf9jqZ9+/ZluO207fCRI0eme30GDBhgGIZhJCcnG8WKFTMkGT4+PsZ3331n7mPQoEFm/GuvvWYYhv179sYe8IZh/z5t1KiROf/atWtGhQoVzGXXe9rOnDnTnNe3b19z3xs2bDDfz8HBwUZycnKGx5uT0r52AQEBdkPBGIbzz61hZN7TtkKFCubfE4ZhGM2bNzeX/fe//zUMw/7zX7FixXS/1nO0/+TkZLuer998843T47vZa8b27dvN2KCgIOPMmTMO47744gszrkGDBnbLGjRoYC5bunSpy8ft6L23dOlSu89e2s9LRESEIcmw2WzG2bNnXXq9bhzeIqdfT+SdwjPCOIB8ydGNyEJDQ92UzT/uu+++dPO++OKLTG8eER0dneV9tG7d2nx8/b+/WdmGzWZTjx499PLLL+vMmTPasGGDQkNDzZ4CXbt2la+vrySpZ8+emjlzphISEvTwww9LkkqXLq3mzZtr0KBBatu2bZbzdWbfvn3m40ceecRhzPXcHnjgAT3//PM6f/68hg8fruHDh6tYsWJq0qSJ/vWvf5k5AgBw3Y13Ij9x4oRdDy1H0n43NWnSxHzcuHFjhzFpXY8pXry4Oa9YsWKqUqWKJJk9GiXn39lp9xkcHKzq1atr+/btklJ7a4WHh6t79+5avny502Nwtu28aKM40qVLFw0aNEhXr17V559/rqFDh5q/8rneNrkuu8d2Xdqe1fXr15e3t7f5vFmzZna9NKXUXyrdfvvtOnbsmMv7dPb+OHTokAzDUMuWLVW1alX99ddfmjZtmqZNm6bAwEDVr19fPXv2VL9+/eThkbUfuBr/6znsTGa53PgLKWfxaV+/rOw3re3bt+uOO+7IcnxavXv31oIFC5wub926tcLDw3X06FEtWbJE06dPd/hLsXPnzunixYuSpMTERKftVUe9ZKtWrWr+2uu6tK9Ho0aNzMeenp5q0KCBeYPD69JeG+bPn+/wJocxMTFmz0pnjhw5km7bjvj5+alhw4aZxl3XvHlzu+vTzWrYsKFdj/HGjRvrhx9+kJT+vSRJ7du3z9LN7M6dO2f2fPX19c3w746bvWbceM0vVapUluLSaty4sbZt25Yu7rrMjtvRey/tdlatWqVVq1alW88wDO3Zs0fVqlXL8uuVmZt9PZF3GB4BgKWVLl1aLVq0sJuu/5TPnRwVjt98803zcZ8+ffTtt99q48aNuvvuu83514ciyIpixYqZj9M2ALLSsL5xiIS0Dd6ePXuaj2vVqqVt27Zp2LBhatKkiYKDg3XmzBktXbpU7dq1048//pjlfG/G9Z8YhYWFadu2bRo1apRatGihEiVK6OLFi1q9erUeeeQRffrpp3mSDwAg/wgICDB/Ki7JLCZkR1aGBbpeJE5biAsKCnIYm9Vi2I37PXLkiPkHdUBAgN566y1FRUUpKirKjHHWpsiLNoojgYGBuv/++yVJmzZt0sGDB7Vy5UpJUqtWrRQeHn7Tx+ZIVs7Z0qVLzYJtjRo1tGjRIm3cuNHuJ8BZ2aejffn7++uHH37QpEmTdOeddyosLEyXLl3S+vXr9e9//1vTpk3LcJtpi/zXC5FZ4eoQVhnFp91v2nzyWtri/uHDh/XLL7+YQyNUqlRJt99+u0vbc/QTdlc7f9zMUGGO9p/WvHnz1LJly0yn68NGZJWjY0x7HMnJyXbLHA1xkJHMXpPsdLC5PgyeIzl9zciumz3um+l4dON7KaPXKzNWeT2RNRRtASAbHH1JHj9+3Hw8e/Zs3X333br99tvt5ueVGjVqqH79+pJSe9csXrxYUuq4fWl78BqGodtuu02vv/66fvrpJ0VHR5sF3pSUFC1btuymc6lWrZr5+MCBAzJSb4JpN10f89YwDFWsWFFTp07Vxo0bde7cOf3yyy/m+mnHm0v7xzINCwAo3Lp162Y+njFjhjmmalpnzpwxeyml/W7asmWL+fj6GOs3xuS0tPuMiYnR3r17zeeVK1e2azu0a9dOAwcOVOvWrc1fymQkJ9oo2f2Ovf6P4ZSUFD355JNmoSHtP5Nv5tiuu96rWUrt+Zm2CLV58+Z08Wn3OXjwYD3yyCNq0aKF4uPjM92Xs/dHRESEbDabDMNQqVKlNHbsWK1du1YnT57UgQMHFBAQIElOx8q9rkKFCipatKik1LGObyaXrMan/SfHjfutWbNmhjm0adPGYVsuK1NGvWyvS9u54Pnnnzd7cvbo0cM8xpIlS5qdGwICAnTp0qV0+0pOTnbYA9bR65T29di6dav5ODk52e75dWmvDePHj3d4rHFxcel6VeYVR8eY9hcJp06dMh9v2rQp0+Lytm3b7K4DGb2XnO3fkbTnMT4+Xt99953DuJy4Ztx4zXdWqHb23XDjc0ffD5kdt6PlabfTu3dvp++ldu3aZfn1kjK+hufE64m8w/AIAPK99evX6+zZs+aA8dddLz6WKlXKrlCZWypWrGj+xGXcuHFq166dPvzwQ7fd0OGxxx7Tr7/+qlOnTpmNs7QNXin1JmBRUVG69957zT8avvnmG3N5QkLCTefRs2dP86Yg9913n5599lmVL19eJ0+e1J49e/Tll1/q//7v/9SnTx998sknmjt3rjp37qxKlSopODhY33//vcN80vZEfvfdd9WxY0cVKVLEpZ+PAQAKhqeffloff/yxjhw5oujoaDVp0kRPP/20ateurUuXLikqKkrz589XVFSUihcvrh49emjXrl2SUot4ly5dks1m0+jRo81tdu/ePdfy/eSTT1SjRg3Vq1dPb775plk0qVevnsLDw+1+ivz999/rk08+kaenZ7ZvbuRqGyW737Ht27dXiRIldP78ea1Zs0aS/Q2mrudys8dWv35982f0J06cUK9evdSzZ09988036YZGuHGf8+bNU+XKlbV//3699NJLme5rzJgx8vLyUtGiRTVmzBhz/gMPPCBJ+vHHHzVs2DB16dJFVatWVcmSJbVr1y5duXJFUuZtKS8vLzVu3Fjr1q0zh8jIbi43evLJJzVlyhTFx8ebNyByFJ92v82bN88wh9x2/YZ1u3btMt9Dkn3h38PDQ927d9dbb72ly5cv65577tGwYcNUsmRJHTt2TL///ru++OILzZs3T23atMl0n3fffbf8/PwUHx+vLVu2aPjw4WrXrp0+/fRTh8MXdO3aVaNHj1ZCQoKmTp0qm82mZs2a6cqVKzp48KDWrVunq1ev2uXvyIQJEzRhwoQsvzY3IyQkxPxs7t+/XwMGDFD16tX12muvZbru4cOH1bt3b/Xo0UNr1641f83g6+ur9u3bZzsnDw8P9ejRQ3PmzJGU+nfK2LFjVaNGDR04cEDLly/XypUrc+SaERkZqVq1aun3339XTEyM7rrrLj377LMqXry4tm3bposXL2r69Om65557zNdp69atGjJkiO69916tXLnSLOCXLFnS7lcKN+Puu+9WqVKldPbsWf33v/9V8eLFdffddys5OVmHDh3SDz/8oJ07d+rPP//M8usl2V/DN23apFWrVikwMFDVqlXLkdcTeSiXxsoFgGxLezMNZzcdSyvtDRocTTcOtp+RrN6ILO2NLa5bvHhxun37+fnZDVp//QYGWbkRWdp9ZDaYvCMnTpxId6ORXbt22cW8+OKLTl83Dw8PY9OmTVnal2E4vxFZQkKCcdddd2V4jq7Hf/jhhxnGffLJJ+Z2Z8+enW75jecMAFB4/PHHH0blypUz/B7Zvn27YRiGER8fb7Rs2dJpXKtWrYyEhARz245u1GMYhsPvn6x8x6e9Edr1ycvLy2wnGIZh3Hvvveli0t4AKO0+c7KNYhgZf8dmdEMjwzCMgQMH2q330EMPpYtx5dicWbJkiWGz2dJtp3bt2ulei9jYWKNMmTIZ7jNt+yptm8bRuSpTpox5I6ONGzdm+J6bMmVKpseS9vXeunWr3TJXcjEM+/eqo/hatWoZV69edXg+wsLC7G445S6vvPKKXc7169dPF3Px4kW7c+1oyqzdndbUqVPTre/h4WF3TUn7GXn33XcNDw8Pp/t25e+PnJCVvxXGjBnj8P0TEhKS7vqWdnuVK1d2eKwvvfSSGZ/2hlzjx49Pt29n5yA6Otrh+/TG60BOXDO2bdtmd6xpp7Sv2bJlywxvb2+Hcd7e3saXX35508ed1ooVKxzeHNHRsWX19UpKSjLCwsLSxVy/JubE64m8wfAIAJBDunbtqnfeeUdVq1aVn5+fGjVqpNWrV6tWrVpuyadMmTK68847zed16tRR7dq17WI6duyoJ598UrVq1VKxYsXk6emp4sWL65577tE333yTI70tfHx8tHr1ar3xxhtq3LixAgMD5efnp0qVKunee+/V+++/rwcffFBS6s1DnnrqKdWvX18lS5aUp6engoOD1bJlSy1atEiPPvqoud0nn3xSo0aNUoUKFbJ8gw8AQMFVs2ZN7dq1SzNmzFCLFi1UvHhx+fj4KDw8XO3atdMHH3xg/vTb19dXa9as0dSpU1WnTh0VKVJEfn5+ql27tqZMmaJvv/1WPj4+uZbriBEj9Oabb6pKlSry8fFRvXr19PXXX9v1Cvzwww/Vu3dvlSxZUiEhIXr88cf11VdfZWt/rrZRbuY7Nm2PSEfPpZw5toceekifffaZatasKR8fH916661auHCh7rrrLjPG399fUup4u2vWrNGdd96pgIAAlStXTpMmTdKkSZMy3c8nn3yiYcOGqVSpUipSpIg6dOigDRs2mDcyqlatmkaNGqWmTZsqNDRUXl5eCggIUKNGjTRnzhyNGjUq03306NHD/GlyRsMpZJbLjdauXavHH39cwcHBCgwM1KOPPqrvvvtOfn5+ZsylS5fMn1k/9thjdr283aVHjx527ztH76GQkBBt3rxZL774oiIjI1WkSBH5+/uratWq6tq1qz755BM1bdo0y/scNWqUXn/9dUVERMjX11d169bVl19+qZYtWzqMf+KJJ7RhwwY99NBD5nkPDQ1V48aNNXbsWL311luuH3guGzdunP79738rJCRERYsW1QMPPKAffvgh3c0cb9SyZUstX75c9erVk6+vrypWrKjp06fb9d7OruDgYIfn8dZbb1WvXr3MuJy4ZtSvX187d+7UwIEDVblyZfn4+CgkJERNmzZVhw4dzLgHHnhAmzdvVteuXVW6dGl5eXmpVKlSeuihh/Tjjz+aY3fnlI4dO2rr1q16/PHHVb58eXl7e6tkyZKqW7euRo4caQ5zJ2X99fLy8tLy5cvVokULBQYGpttnTn6/IHfZDMOFW0UCAAAAAFw2YcIETZw4UVLqHef79Onj3oTyOcMwHI4R2bRpU3PMzV9//VX16tVzedtt2rTR+vXrJUkHDx5URETETeWaFQMHDtTcuXNVrlw5HTx4UN7e3tnKJSIiQocPH5akLN0I76233tLgwYPl6+urv/76y7xhHADA/eiaBAAAAADIVzZu3Kju3bvrm2++0eHDh7Vz504NHjzYLNhWr15dkZGRbs4y655//nn5+vrq+PHj+uyzz/Jkn4Zh6PXXX5eU2nOUgi0AWAs3IgMAAAAA5CspKSn69NNP9emnn6ZbFhgYqAULFuSr4ZPKly+v+Pj4PN2nzWbT3r1783SfAICsyz/fYgAAAAAASKpcubIee+wxValSRf7+/vL19dUtt9yigQMHaufOnS6NZwoAgBW5PKbthg0b9Oqrr2rbtm06efKkli5dqs6dO2e4TlRUlEaOHKk//vhD4eHheuGFFxjDCQAAAAAAAAAccLmnbVxcnCIjIzVnzpwsxR88eFD33nuv7rjjDu3YsUPDhw/XE088oW+++cblZAEAAAAAAACgoHO5p63dyjZbpj1tR40apRUrVuj333835z366KOKjo7W6tWrs7trAAAAAAAAACiQcn1M282bN6tt27Z289q1a6fNmzc7XSchIUGxsbHmFBMTo7Nnz+om6ssAAABAnjMMQ7GxsbRjAQAA4JJcL9qeOnVKoaGhdvNCQ0MVGxurq1evOlxnypQpCg4ONqeQkBCVLl1aly5dyu10AQAAUEhMmTJFjRo1UmBgoEqXLq3OnTunu5N6mzZtZLPZ7KYBAwZkeR+XLl1ScHAw7VgAAAC4JNeLttkxZswYxcTEmNPRo0fdnRIAAAAKmPXr12vw4MH66aeftGbNGiUlJemee+5RXFycXVz//v118uRJc5o2bZqbMgYAAEBh4ZXbOwgLC9Pp06ft5p0+fVpBQUEqUqSIw3V8fX3l6+ub26kBAACgELvx/goLFixQ6dKltW3bNrVq1cqc7+/vr7CwsLxODwAAAIVYrve0bdasmdauXWs3b82aNWrWrFlu7xoAAADIspiYGElS8eLF7eZ//PHHKlmypGrVqqUxY8boypUrTrdx470ZYmNjczVnAAAAFEwu97S9fPmy9u/fbz4/ePCgduzYoeLFi6tChQoaM2aMjh8/rv/+97+SpAEDBujNN9/Us88+q3/961/6/vvv9dlnn2nFihU5dxQAAADATUhJSdHw4cPVvHlz1apVy5zfo0cPVaxYUWXLltWuXbs0atQo7d27V1988YXD7UyZMkUTJ07Mq7QBAABQQNkMF29lGxUVpTvuuCPd/N69e2vBggXq06ePDh06pKioKLt1RowYoT///FPly5fX2LFj1adPnyzvMzY2VsHBwYqJiVFQUJAr6QIAAACZGjhwoFatWqVNmzapfPnyTuO+//573XXXXdq/f7+qVKmSbnlCQoISEhLM57GxsQoPD6cdCwAAAJe4XLR1h6wUbVNSUpSYmJjHmRUc3t7e8vT0dHcaAAAAeW7IkCH68ssvtWHDBlWqVCnD2Li4OAUEBGj16tVq165dptvOrB1rGIauXbum5OTkbOeP/I12OAAAcCTXb0SWFxITE3Xw4EGlpKS4O5V8LSQkRGFhYbLZbO5OBQAAINcZhqGhQ4dq6dKlioqKyrRgK0k7duyQJJUpU+am95+YmKiTJ09mOEYuCj6bzaby5csrICDA3akAAAALyfdFW8MwdPLkSXl6eio8PFweHrl+b7UCxzAMXblyRWfOnJGUM3+EAAAAWN3gwYO1cOFCffnllwoMDNSpU6ckScHBwSpSpIj+/vtvLVy4UB07dlSJEiW0a9cujRgxQq1atVKdOnVuat8pKSk6ePCgPD09VbZsWfn4+PCP80LIMAydPXtWx44dU9WqVelxCwAATPm+aHvt2jVduXJFZcuWlb+/v7vTybeKFCkiSTpz5oxKly5NgxEAABR4b7/9tiSpTZs2dvPnz5+vPn36yMfHR999951mzZqluLg4hYeHq0uXLnrhhRduet+JiYlKSUlReHg4bdhCrlSpUjp06JCSkpJogwMAAFO+L9peH//Lx8fHzZnkf9f/YKDBCAAACoPMbu0QHh6u9evX52oO/EoM9LAGAACOFJhWIo2dm8drCAAAAAAAALhfgSnaAgAAAMg7KSkpatOmjebOnevyuhs3blT58uVzISsAAICCgaJtARIREaFZs2a5Ow0AAAAUEG3atHHavnz11VdVrVo1DRgwwOXttmzZUseOHcvSfrIqIiJCRYoUUUBAgAICAhQSEnJT2wMAAHCnfD+mrTPD/r0hT/f3xn9aZTk2s2EIxo8frwkTJricwy+//KKiRYu6vB4AAADgiqSkJHl5eenNN990dyp2PvnkE3Xu3NndaQAAANw0etq6wcmTJ81p1qxZCgoKspv39NNPm7GGYejatWtZ2m6pUqW4+zAAAEB+FhfnfIqPz3rs1avOY3OAt7e3Zs+erZUrV0qSFixYoLp162rcuHEqWbKkwsLCtGjRIv3www+qVauWgoOD1a9fP6WkpEiSoqKizJ6w//d//6eNGzdq1KhRCggIUIcOHSRJly9f1pAhQ1ShQgWVLl1avXr1UkxMTLbyPXTokGw2m959911FRESoRIkSGjRokBITE+3ymT17tsqUKaOwsDCNHz8+05vVAQAA5BaKtm4QFhZmTsHBwbLZbObzPXv2KDAwUKtWrVKDBg3k6+urTZs26e+//9YDDzyg0NBQBQQEqFGjRvruu+/stnvj8Ag2m03vvfeeHnzwQfn7+6tq1apavnx5Hh8tAAAAsiwgwPnUpYt9bOnSzmP/V/g0RUT8syyX/P777ypZsqROnTqll19+Wf/+97/1+uuva/369dq9e7e+/vprLVu2LN1606dPV8uWLfXKK6/o8uXLWrVqlSTpX//6ly5cuKBdu3bp4MGDSkpK0pAhQzLM4cknn1TJkiXVrFkzs6Cc1tKlS7Vjxw799ttv+vHHHzVlyhRz2aVLl/Trr7/q77//VlRUlObNm6f//ve/N/eiAAAAZBNFW4saPXq0pk6dqt27d6tOnTq6fPmyOnbsqLVr12r79u1q3769OnXqpCNHjmS4nYkTJ+qRRx7Rrl271LFjR/Xs2VMXLlzIo6MAAABAYVGqVCkNGzZMXl5e6t69u2JjY9WvXz+VKFFCZcuWVevWrfXrr79maVtnz57VkiVLNGfOHIWEhKho0aKaNGmSFi1apOTkZIfrfPjhhzp48KCOHz+uoUOHqkuXLvrll1/sYiZMmKCQkBCVLVtWY8aM0YcffmguS0lJ0SuvvCJ/f3/VqFFDQ4YMsVsOAACQlwrsmLb53aRJk3T33Xebz4sXL67IyEjz+YsvvqilS5dq+fLlGfY46NOnj7p37y5Jmjx5st544w1t2bJF7du3z73kAQAAkD2XLztf5ulp//zMGeexHjf0zTh0KNspZVVoaKj5+PqQXTfOu5zR8aVx6NAhpaSkqFKlSnbzPTw8dOrUKZUrVy7dOi1btjQf9+jRQ8uWLdOSJUvUqFEjc37FihXtHh8/ftx87ufnp9KlSztdDgAAkJco2lpUw4YN7Z5fvnxZEyZM0IoVK3Ty5Eldu3ZNV69ezbSnbZ06dczHRYsWVVBQkM5k1MAHAACA+7hyU9ncinUDjxuKzOHh4fLw8NCJEyeyfc+GG7cpSYcPHzYLyUeOHLEr/sbHx+vMmTNm4fbG5QAAAHmJ4REsqugNDeunn35aS5cu1eTJk7Vx40bt2LFDtWvXNm+e4Iy3t7fdc5vNZt4AAgAAAMjMtWvXFB8fb04JCQk5vo/Q0FD9/fff5vOwsDB17txZQ4YM0blz5yRJp06d0tKlSx2uf+TIEW3YsEEJCQlKSkrSZ599pi+//FKdO3e2i5s0aZKio6N14sQJTZkyRT179jSXeXh4aMyYMbp69ar27t2rOXPm2C0HAADISxRt84kffvhBffr00YMPPqjatWsrLCxMh/LgZ24AAAAo3J555hkVKVLEnKpXr57j+xg+fLi+++47hYSE6L777pMkLViwQCEhIWrUqJGCgoLUsmVLbdu2zeH6ly9f1rBhw1SiRAmVKlVKr732mj777DM1bdrULu6BBx5Q3bp1VatWLTVp0kTPPfecuSwwMFB169ZV5cqV1apVK/Xq1Uu9e/fO8WMFAADICoZHyCeqVq2qL774Qp06dZLNZtPYsWPpMQsAAIBcFRUV5XRZ2g4Effr0UZ8+feyWG4Zh93zBggXm4zZt2ig6Otp83qRJE+3evdsuPjAwUDNmzNCMGTMyzbNmzZrasWNHpnEPP/yw+vfv73T50KFDNXTo0Ey3AwAAkNvoaZtPzJgxQ8WKFdPtt9+uTp06qV27dqpfv7670wIAAAAAAACQwwpsT9s3/tPK3SlkyY29Etq0aZOuV4IkRURE6Pvvv7ebN3jwYLvnNw6X4Gg7aXs0AAAAAAAAALCeAlu0BQAAAAAptQOEow4N1904XAMAAFZy8dVR7k7BJcWeecXdKRQIDI8AAAAAAAAAABZC0RYAAAAAAAAALISiLQAAAAAAAABYCEVbAAAAAAAAALAQirYAAAAAAAAAYCEUbQEAAADkqiNHjiggIEAxMTHuTgUAACBf8HJ3AgAAAABSXXx1VK7vo9gzr2QpLiAgwHx89epVeXl5ydvbW5LUsmVLrVq1Ksv7rFChgi5fvuxaopImT56syZMnS5KSk5MVHx+vokWLmstXrVqlli1burTNCRMmaMeOHVq2bJnL+QAAAOQVetoCAAAASOfy5cvm1LJlS73yyivm87QF22vXrskwjFzJ4bnnnrPbZ3BwcLq8AAAACiKKtgAAAABcYrPZ9Oabb6pWrVoqWrSoLl++rBkzZqhq1aoKDAxUlSpV9Oabb5rxhw4dks1mU3R0tCSpT58+6t+/vx599FEFBgaqevXqioqKcimHpKQkjRs3TlWqVFGJEiV0//3368SJE5IkwzA0atQohYWFKSgoSNWqVdPXX3+tZcuWafLkyfr6668VEBBg15s4rYSEBA0YMEDFixdXpUqV9P7778tms+nQoUOSpG+//VYNGzZUcHCwypQpo0GDBunq1avm+hEREXr55ZdVv359BQUFqV27dmZuAAAAWVFgh0dYvPBcnu7v4R4lsxxrs9kyXD5+/HhNmDAhW3nYbDYtXbpUnTt3ztb6AAAAQFYsXLhQ3377rUqUKCFvb29VrFhR33//vcqXL6+oqCh17NhR9erVU/PmzR2uv2jRIi1fvlwff/yxpkyZoj59+phF0ax4/vnntW3bNm3atEklSpTQc889p0cffVQbNmzQmjVrtHDhQv36668qW7asjhw5ovj4eFWrVk3PPfdcpsMjvPTSS9q6dav++OMP+fv7q2fPnnbLixQponfffVd16tTR4cOHde+992rGjBl6/vnnzZj33ntPq1atUoUKFTRw4EA99thj+v7777N8fAAAoHCjp60bnDx50pxmzZqloKAgu3lPP/20u1MEAAAAMvTss8+qbNmy8vX1lYeHh7p06aLw8HDZbDbdcccdateuXYa9Zzt27Kg2bdrI09NTffv21eHDh3X+/Pks7dswDL311luaMWOGypQpIx8fH7300kv64YcfdPToUXl7eys+Pl5//PGHkpKSVKFCBVWrVi3Lx7Zw4UKNHj1aZcqUUXBwsMaPH2+3vGXLlqpXr548PT1VuXJlPfnkk+mOdeDAgapRo4b8/f01bdo0rVu3TseOHctyDgAAoHArsD1trSwsLMx8HBwcLJvNZjfvvffe0/Tp03Xw4EFFRERo2LBhGjRokCQpMTFRI0eO1JIlS3Tx4kWFhoZqwIABGjNmjCIiIiRJDz74oCSpYsWKLvVWAACgIFvUo427U8iybguj3J0CkKkKFSrYPf/44481ffp0HTp0SCkpKbpy5YoqVarkdP207d/rNxe7dOmS/vzzT3Xo0MFc5ugGZufOnVNcXJxatWpl9ys2Hx8fHT16VHfccYcmTpyosWPHavfu3Wrbtq1ee+21DPNJ68SJEwoPD3d6rL/88ovGjBmj3377TVevXtW1a9dUvXp1u5iKFSuaj0NDQ+Xr66vjx4+rfPnyWcoBAAAUbvS0tZiPP/5Y48aN08svv6zdu3dr8uTJGjt2rD744ANJ0htvvKHly5frs88+0969e/Xxxx+bxdpffvlFkjR//nydPHnSfA4AAADkNA+Pf/6UOHLkiHr37q1p06bpzJkzio6OVseOHbN1g7KWLVva3WzMkRIlSsjf318///yzoqOjzenq1au6/fbbJUmDBg3STz/9pCNHjsjX11fDhg1Ll7czZcuW1dGjR+2OL63u3bvrjjvu0IEDBxQbG6vJkyenO9bDhw+bj8+cOaOEhASVK1cuay8CAAAo9CjaWsz48eM1ffp0PfTQQ6pUqZIeeughjRgxQu+8846k1AZj1apV1aJFC1WsWFEtWrRQ9+7dJUmlSpWSJIWEhCgsLMx8DgAAAOSmy5cvyzAMlS5dWh4eHlq5cqW+/fbbXNufh4eHBgwYoP/7v/8zi6vnz5/XokWLJKV2Zvjxxx+VmJioIkWKqGjRovLySv2RYWhoqA4fPqxr16453X737t01bdo0nTp1SjExMXrxxRftlsfGxiokJERFixbV7t279fbbb6fbxjvvvKO9e/fq6tWrGjVqlFq1akUvWwAAkGUUbS0kLi5Of//9t/r162fezTYgIEAvvfSS/v77b0mpd9rdsWOHqlevrmHDhuVqYxgAAADIipo1a+r555/XnXfeqRIlSmjRokW6//77c3WfU6ZMUbNmzXTnnXcqMDBQDRo0MNvGsbGxGjRokEqUKKGwsDCdOHFCr7/+uiTp4YcfVlBQkEqVKqWQkBCH237hhRcUGRmpmjVrqm7duurYsaMkydfXV1JqQfa1115TQECABgwYoEcffTTdNv71r3+pe/fuCg0N1fHjx/Xxxx/nwqsAAAAKKpuRnd8s5bHY2FgFBwcrJiZGQUFBdsvi4+N18OBBVapUSX5+fub8xQvP5WmOD/coma31FixYoOHDhys6OlqnT59WWFiYPvroIzVp0sQuztPT0xyDKzY2VqtWrdJ3332nxYsXq23btvr8888lSTabTUuXLlXnzp1dzsXZawkAQEHAmLZwB2ftWNpd+cvmzZvVpk0bxcfH242h60xERIRmzZqVpTY57wUAQGYuvjrK3Sm4pNgzr7g7hQKBG5FZSGhoqMqWLasDBw6oZ8+eTuOCgoLUrVs3devWTV27dlX79u114cIFFS9eXN7e3kpOTs7DrAEAAICC5cyZM/rjjz/UqlUrnT59WmPGjFGXLl2yVLAFAADICRRtLWbixIkaNmyYgoOD1b59eyUkJGjr1q26ePGiRo4cqRkzZqhMmTKqV6+ePDw8tHjxYoWFhZk/7YqIiNDatWvVvHlz+fr6qlixYu49IAAAACCfSU5O1ogRI7R//375+/vr7rvv1htvvOHutAAAQCFC0dZinnjiCfn7++vVV1/VM888o6JFi6p27doaPny4JCkwMFDTpk3TX3/9JU9PTzVq1EgrV64074I7ffp0jRw5Uu+++67KlSunQ4cOue9gAAAAgHyoTJky2rFjR7bXpw0OAABuVoEd0xau47UEABRkjGkLd2BMW2SG9wIAIDOMaVs4ebg7AQAAAAAAAADAPyjaAgAAAAAAAICFULQFAAAAAAAAAAuhaAsAAAAAAAAAFkLRFgAAAAAAAAAsxMvdCeQUwzDcnUK+l5KS4u4UAAAACrUZ+6JyfR8jq7VxKX7Tpk16+eWX9dNPP8kwDFWsWFE9e/bUgAEDFBERof/85z/q2rWr3Tpr1qzRgw8+qJMnTyowMFBHjhxRzZo1zeVXrlyRr6+vPD09JUmPPfaY5s6d61Jehw4dUqVKlXTx4kWFhIS4tC4AAIDV5fuirbe3t2w2m86ePatSpUrJZrO5O6V8xzAMJSYm6uzZs/Lw8JCPj4+7UwIAAIAFfP311+revbtefPFFffjhhypZsqT27NmjqVOn6uLFi+rZs6fmzZuXrmg7b948PfLIIwoMDJQkVahQQZcvXzaXR0REaNasWercuXNeHg4AAEC+ke+Ltp6enipfvryOHTumQ4cOuTudfM3f318VKlSQhwejZgAAABR2hmFo2LBhGjVqlIYPH27Or1GjhhYsWCBJ6tevnxo2bKgTJ06obNmykqTo6GgtW7ZM3333XZb39d133+m5557Tvn37VK5cOU2ZMkX333+/pNReu//3f/+ngwcPyt/fXw899JDefvttNW7cWJJUvnx5SdI777yjnj17ptv2559/rtGjR+vs2bN65JFHdPLkSTVs2FATJkzQ5cuX1bNnT23evFkJCQmKjIzU7NmzFRkZKUmaMGGCtm7dqtDQUC1evFihoaGaNm2aHnzwQZdfTwAAAFfk+6KtJAUEBKhq1apKSkpydyr5lqenp7y8vOipDAAAAEnSX3/9pYMHD6p79+5OY+rWravIyEh98MEHGjNmjCRp4cKFqlixopo3b56l/ezatUsPP/ywlixZojZt2ujHH3/Uvffeqy1btqh69erq3bu3XnnlFT3++OOKi4vTzp07JUlbtmxRpUqVdOzYMafDI+zbt0+PP/64li5dqrZt22r+/PkaNGiQGjZsKCl1eLAePXpo4cKF8vT01KhRo/TII49oz549Zrt49erVmjNnjt555x2tWrVKDz/8sP744w9VqVIlqy8lAACAywpE0VZKLTpeHxMLAAAAwM05e/asJKlcuXIZxvXr10+zZs0yi7bz5s1Tv379sryfd955R3369NGdd94pSWrRooXuu+8+ffbZZxo7dqy8vb21f/9+czi022+/PcvbXrRoke666y61b99ektS/f3/NmjXLXB4UFKRu3bqZzydOnKg33nhDJ06cMI+7WrVqevLJJyVJnTp10h133KFPPvlEL7zwQpbzAAAAcBW/gwcAAACQTsmSJSVJx48fzzCuR48eOnr0qDZt2qTffvtNO3fuVK9evSRJt912mwICAhQQEKCPP/7Y4fqHDh3S3LlzFRISYk5ffvmlTpw4IUlaunSpfv/9d1WvXl316tXTZ599luVjOHHihMLDw+3mVahQwXx89epVDRo0SBEREQoKClJERIQk6dy5c2ZMxYoV7davWLFipq8JAADAzSowPW0BAAAA5Jxq1aopIiJCn376qZ5//nmncSEhIerSpYvmzZun4OBg3XfffQoNDZUk/fHHH5nuJzw8XE899ZSmTp3qcHn9+vW1ZMkSpaSkaNmyZXrkkUfUunXrLN2HoWzZsvr555/t5h05ckRNmjSRJE2fPl3btm3Tpk2bVL58eUVHR6tYsWIyDMOMP3z4cLr1XentCwAAkB30tAUAAACQjs1m0+zZszV16lTNnj1b58+fl5Q6Tmy/fv3sipn9+vXT4sWL9dFHH7k0NIIkPfnkk5o/f77WrVun5ORkJSQkaPPmzdq9e7cSExP14Ycf6uLFi/Lw8DDHrvXy8lKpUqXk4eGhv//+2+m2H3nkEX333Xf69ttvde3aNc2bN0/79u0zl8fGxsrPz0/FihXT5cuX9dxzz6Xbxr59+/Tuu+/q2rVrWrFihb7//nu7IRUAAAByA0VbAAAAAA7dd999WrVqlVasWKEqVaooJCREXbt2VY0aNVSmTBkzrk2bNgoNDZWPj486dOjg0j7q1atnjhFbqlQplStXTmPHjlVCQoKk1Bub3XLLLQoMDNTQoUO1cOFClShRQkWKFNH48ePVoUMHhYSEaOHChem2Xb16dX3wwQcaOHCgSpQooc2bN+vOO++Ur6+vJGnkyJHy9PRUaGioatWqpWbNmqXbRvv27fXTTz+pePHieuqpp/TRRx+patWqLh0jAACAq2xG2t/+WFRsbKyCg4MVExOjoKAgd6cDAADyoUU92rg7hSzrtjDK3Skghzhrx8bHx+vgwYOqVKmS/Pz83Jhh4VO9enWNGzdOPXv2zDR2woQJ2rFjh5YtW5Zr+fBeAABk5uKro9ydgkuKPfOKu1MoEOhpCwAAAKDA+uqrr3Tp0iUlJCRo+vTpOnnypNq3b+/utAAAADLEjcgAAAAAFFjffPONevfuraSkJFWvXl3Lly9XiRIl3J0WAABAhijaAgAAACiw3nzzTb355pvZWnfChAk5mwwAAEAWMTwCAAAAAAAAAFgIRVsAAAAAAAAAsBCKtgAAAICbpKSkuDsFuJlhGO5OAQAAWBBj2gIAAAB5zMfHRx4eHjpx4oRKlSolHx8f2Ww2d6eFPGYYhs6ePSubzSZvb293pwMAACyEoi0AAACQxzw8PFSpUiWdPHlSJ06ccHc6cCObzaby5cvL09PT3akAAAALoWgLAAAAuIGPj48qVKiga9euKTk52d3pwE28vb0p2AIAgHQo2gIAAABucv1n8fw0HgAAAGlxIzIAAAAAAAAAsBCKtgAAAAAAAABgIRRtAQAAAAAAAMBCKNoCAAAAAAAAgIVQtAUAAAAAAAAAC6FoCwAAAAAAAAAWQtEWAAAAAAAAACyEoi0AAAAAAAAAWAhFWwAAAAAAAACwEIq2AAAAAAAAAGAhFG0BAAAAAAAAwEIo2gIAAAAAAACAhVC0BQAAAAAAAAALoWgLAAAAAAAAABZC0RYAAAAAAAAALISiLQAAAAAAAABYCEVbAAAAAAAAALAQirYAAAAAAAAAYCEUbQEAAAAAAADAQijaAgAAAAAAAICFULQFAAAAAAAAAAuhaAsAAAAAAAAAFkLRFgAAAAAAAAAshKItAAAAAAAAAFgIRVsAAAAAAAAAsBCKtgAAAAAAAABgIRRtAQAAAAAAAMBCKNoCAAAAAAAAgIVkq2g7Z84cRUREyM/PT02aNNGWLVucxiYlJWnSpEmqUqWK/Pz8FBkZqdWrV2c7YQAAAAAAAAAoyFwu2i5atEgjR47U+PHj9euvvyoyMlLt2rXTmTNnHMa/8MILeueddzR79mz9+eefGjBggB588EFt3779ppMHAAAAAAAAgILG5aLtjBkz1L9/f/Xt21c1a9bU3Llz5e/vr3nz5jmM//DDD/Xcc8+pY8eOqly5sgYOHKiOHTtq+vTpN508AAAAAAAAABQ0LhVtExMTtW3bNrVt2/afDXh4qG3bttq8ebPDdRISEuTn52c3r0iRItq0aVM20gUAAAByxpQpU9SoUSMFBgaqdOnS6ty5s/bu3WsXEx8fr8GDB6tEiRIKCAhQly5ddPr0aTdlDAAAgMLCpaLtuXPnlJycrNDQULv5oaGhOnXqlMN12rVrpxkzZuivv/5SSkqK1qxZoy+++EInT550up+EhATFxsbaTQAAAEBOWr9+vQYPHqyffvpJa9asUVJSku655x7FxcWZMSNGjNBXX32lxYsXa/369Tpx4oQeeughN2YNAACAwsArt3fw+uuvq3///qpRo4ZsNpuqVKmivn37Oh1OQUrt9TBx4sTcTg0AAACF2I03x12wYIFKly6tbdu2qVWrVoqJidH777+vhQsX6s4775QkzZ8/X7feeqt++uknNW3a1B1pAwAAoBBwqWhbsmRJeXp6pvtJ2OnTpxUWFuZwnVKlSmnZsmWKj4/X+fPnVbZsWY0ePVqVK1d2up8xY8Zo5MiR5vPY2FiFh4e7kiqQbbNfO+ruFLJs6NN8LgAAyCkxMTGSpOLFi0uStm3bpqSkJLuhwWrUqKEKFSpo8+bNFG0BAACQa1waHsHHx0cNGjTQ2rVrzXkpKSlau3atmjVrluG6fn5+KleunK5du6YlS5bogQcecBrr6+uroKAguwkAAADILSkpKRo+fLiaN2+uWrVqSZJOnTolHx8fhYSE2MVmNDQYw3wBAAAgJ7hUtJWkkSNH6t1339UHH3yg3bt3a+DAgYqLi1Pfvn0lSb169dKYMWPM+J9//llffPGFDhw4oI0bN6p9+/ZKSUnRs88+m3NHAQAAANyEwYMH6/fff9enn356U9uZMmWKgoODzYlfiwEAACA7XB7Ttlu3bjp79qzGjRunU6dOqW7dulq9erV5c7IjR47Iw+OfWnB8fLxeeOEFHThwQAEBAerYsaM+/PDDdD0WAAAAAHcYMmSIvv76a23YsEHly5c354eFhSkxMVHR0dF2bdeMhgZjmC8AAADkhGzdiGzIkCEaMmSIw2VRUVF2z1u3bq0///wzO7sBAAAAco1hGBo6dKiWLl2qqKgoVapUyW55gwYN5O3trbVr16pLly6SpL179+rIkSNOhwbz9fWVr69vrucOAACAgi1bRVsAAAAgvxs8eLAWLlyoL7/8UoGBgeY4tcHBwSpSpIiCg4PVr18/jRw5UsWLF1dQUJCGDh2qZs2acRMyAAAA5CqKtgAAACiU3n77bUlSmzZt7ObPnz9fffr0kSTNnDlTHh4e6tKlixISEtSuXTu99dZbeZwpAAAAChuKtgAAACiUDMPINMbPz09z5szRnDlz8iAjAAAAIJVH5iEAAAAAAAAAgLxC0RYAAAAAAAAALISiLQAAAAAAAABYCEVbAAAAAAAAALAQirYAAAAAAAAAYCEUbQEAAAAAAADAQijaAgAAAAAAAICFULQFAAAAAAAAAAuhaAsAAAAAAAAAFkLRFgAAAAAAAAAshKItAAAAAAAAAFgIRVsAAAAAAAAAsBCKtgAAAAAAAABgIRRtAQAAAAAAAMBCKNoCAAAAAAAAgIVQtAUAAAAAAAAAC6FoCwAAAAAAAAAWQtEWAAAAAAAAACyEoi0AAAAAAAAAWAhFWwAAAAAAAACwEIq2AAAAAAAAAGAhFG0BAAAAAAAAwEIo2gIAAAAAAACAhVC0BQAAAAAAAAALoWgLAAAAAAAAABZC0RYAAAAAAAAALISiLQAAAAAAAABYCEVbAAAAAAAAALAQirYAAAAAAAAAYCEUbQEAAAAAAADAQrzcnYCVDfv3Bnen4JI3/tPK3SkAAAAAAAAAuEn0tAUAAAAAAAAAC6FoCwAAAAAAAAAWQtEWAAAAAAAAACyEoi0AAAAAAAAAWAhFWwAAAAAAAACwEIq2AAAAAAAAAGAhFG0BAAAAAAAAwEIo2gIAAAAAAACAhVC0BQAAAAAAAAALoWgLAAAAAAAAABZC0RYAAAAAAAAALISiLQAAAAAAAABYCEVbAAAAAAAAALAQirYAAAAAAAAAYCEUbQEAAAAAAADAQijaAgAAAAAAAICFULQFAAAAAAAAAAuhaAsAAAAAAAAAFkLRFgAAAAAAAAAshKItAAAAAAAAAFgIRVsAAAAAAAAAsBCKtgAAAAAAAABgIRRtAQAAAAAAAMBCKNoCAAAAAAAAgIVQtAUAAAAAAAAAC6FoCwAAAAAAAAAWQtEWAAAAAAAAACyEoi0AAAAAAAAAWAhFWwAAAAAAAACwEIq2AAAAAAAAAGAhFG0BAAAAAAAAwEIo2gIAAAAAAACAhVC0BQAAAAAAAAALoWgLAAAAAAAAABZC0RYAAAAAAAAALISiLQAAAAAAAABYCEVbAAAAAAAAALAQirYAAAAAAAAAYCEUbQEAAAAAAADAQijaAgAAAAAAAICFULQFAAAAAAAAAAuhaAsAAAAAAAAAFkLRFgAAAAAAAAAsxMvdCQCAuzV8ebi7U3DJ1udnuTsFAAAAAACQi+hpCwAAAAAAAAAWQtEWAAAAAAAAACyEoi0AAAAAAAAAWAhFWwAAAAAAAACwEIq2AAAAAAAAAGAhXu5OAACAvPbsb1+5OwWXTKvdyd0pAAAAAADyED1tAQAAAAAAAMBCKNoCAAAAAAAAgIVQtAUAAAAAAAAAC6FoCwAAAAAAAAAWQtEWAAAAAAAAACyEoi0AAAAAAAAAWAhFWwAAAAAAAACwEIq2AAAAAAAAAGAh2SrazpkzRxEREfLz81OTJk20ZcuWDONnzZql6tWrq0iRIgoPD9eIESMUHx+frYQBAAAAAAAAoCBzuWi7aNEijRw5UuPHj9evv/6qyMhItWvXTmfOnHEYv3DhQo0ePVrjx4/X7t279f7772vRokV67rnnbjp5AAAAAAAAAChoXC7azpgxQ/3791ffvn1Vs2ZNzZ07V/7+/po3b57D+B9//FHNmzdXjx49FBERoXvuuUfdu3fPtHcuAAAAAAAAABRGLhVtExMTtW3bNrVt2/afDXh4qG3bttq8ebPDdW6//XZt27bNLNIeOHBAK1euVMeOHW8ibQAAAODmbNiwQZ06dVLZsmVls9m0bNkyu+V9+vSRzWazm9q3b++eZAEAAFCoeLkSfO7cOSUnJys0NNRufmhoqPbs2eNwnR49eujcuXNq0aKFDMPQtWvXNGDAgAyHR0hISFBCQoL5PDY21pU0AQAAgEzFxcUpMjJS//rXv/TQQw85jGnfvr3mz59vPvf19c2r9AAAAFCIuVS0zY6oqChNnjxZb731lpo0aaL9+/frqaee0osvvqixY8c6XGfKlCmaOHFibqcGAACAQqxDhw7q0KFDhjG+vr4KCwvLo4wAAACAVC4Nj1CyZEl5enrq9OnTdvNPnz7ttDE7duxYPf7443riiSdUu3ZtPfjgg5o8ebKmTJmilJQUh+uMGTNGMTEx5nT06FFX0gQAAAByRFRUlEqXLq3q1atr4MCBOn/+fIbxCQkJio2NtZsAAAAAV7lUtPXx8VGDBg20du1ac15KSorWrl2rZs2aOVznypUr8vCw342np6ckyTAMh+v4+voqKCjIbgIAAADyUvv27fXf//5Xa9eu1SuvvKL169erQ4cOSk5OdrrOlClTFBwcbE7h4eF5mDEAAAAKCpeHRxg5cqR69+6thg0bqnHjxpo1a5bi4uLUt29fSVKvXr1Urlw5TZkyRZLUqVMnzZgxQ/Xq1TOHRxg7dqw6depkFm8BAAAAq3n00UfNx7Vr11adOnVUpUoVRUVF6a677nK4zpgxYzRy5EjzeWxsLIVbAAAAuMzlom23bt109uxZjRs3TqdOnVLdunW1evVq8+ZkR44csetZ+8ILL8hms+mFF17Q8ePHVapUKXXq1Ekvv/xyzh0FAAAAkMsqV66skiVLav/+/U6Ltr6+vtysDAAAADctWzciGzJkiIYMGeJwWVRUlP0OvLw0fvx4jR8/Pju7AgAAACzh2LFjOn/+vMqUKePuVAAAAFDAZatoCwAAAOR3ly9f1v79+83nBw8e1I4dO1S8eHEVL15cEydOVJcuXRQWFqa///5bzz77rG655Ra1a9fOjVkDAACgMKBoCwAAgEJp69atuuOOO8zn18ei7d27t95++23t2rVLH3zwgaKjo1W2bFndc889evHFFxn+AAAAALmOoi0AAAAKpTZt2sgwDKfLv/nmmzzMBgAAAPiHR+YhAAAAAAAAAIC8QtEWAAAAAAAAACyEoi0AAAAAAAAAWAhFWwAAAAAAAACwEIq2AAAAAAAAAGAhFG0BAAAAAAAAwEIo2gIAAAAAAACAhVC0BQAAAAAAAAALoWgLAAAAAAAAABZC0RYAAAAAAAAALISiLQAAAAAAAABYCEVbAAAAAAAAALAQL3cnAAAAAAC4eRdfHeXuFFxS7JlX3J0CAACWRU9bAAAAAAAAALAQirYAAAAAAAAAYCEUbQEAAAAAAADAQijaAgAAAAAAAICFULQFAAAAAAAAAAuhaAsAAAAAAAAAFkLRFgAAAAAAAAAshKItAAAAAAAAAFgIRVsAAAAAAAAAsBCKtgAAAAAAAABgIRRtAQAAAAAAAMBCKNoCAAAAAAAAgIVQtAUAAAAAAAAAC6FoCwAAAAAAAAAWQtEWAAAAAAAAACyEoi0AAAAAAAAAWAhFWwAAAAAAAACwEIq2AAAAAAAAAGAhFG0BAAAAAAAAwEIo2gIAAAAAAACAhVC0BQAAAAAAAAAL8XJ3AgCyb2PUJXen4JKWbQLdnQIAAAAAAIDl0dMWAAAAAAAAACyEoi0AAAAAAAAAWAhFWwAAAAAAAACwEIq2AAAAAAAAAGAh3IisAFm88Jy7U3DJwz1KujsFAAAAAAAAwHLoaQsAAAAAAAAAFpKvetqmpKQoJSXF4TIPDw+7uIxkPTZF9nXtjLfr7ljDSJHN5mH3PCPujk1JSZHNZpPNZvtfrCHDMDLYbl7FZnR89rGS8+3mVayRQazNErH274mc+3zmXKzthvQNm/NlN3JH7I35u+M1y6vY3PrcyzCk/8XKMDL7yLk91jAMC14rc+hcKO/ea4ZsTiLtlziPS2VLc7JyKzY/fD6tFgsAAAAUJPmqaLtz504FBASkmx8cHKxbbrnFLs5ZIz8gIEDVq1c3n//222+6du2aw1gfv/NKjC/1z/MiZ2WzJTuMNVK8lBhfOs2652TzcLxdw/BU4tVQu/3YPJKcxHoo8WqY+dzb74I8PBIdxkbHeqhYcE3zedyVo0q6dtlhrCQVC77tn9irx5WUFOs0NiToVrPwduXqSSUmRTuNDQ6sLg+P1LfW1fhTSki86DBu+/YA1apVS76+vpKk48eP6/Tp0063W7NmTRUpUkSSdPLkSZ08edJpbI0aNVS0aFFJ0unTp3X8+HGnsdWqVVNgYKAk6ezZs0q4ts9prLdneXnaUt+DKUaskpKd5+DtWVaetqD/xV5SUvKJDGLLyNMW/L/YOCUlH3Ma6+UZKi9bMUlSYmKczp0/4DQ2KKiMAgNS38NJSVd19tx+p7GBgaEKCkx9X167lqAzZ52/DgEBJRUcVFaSlJycpNNn9jiNLVq0hEKCy/1vu9e0c+dOp7ElSpRQRESEpNQ/1Ldv3+40tlixYqpcubL5PKPYzK4R4YaP+ThBhk7b/vk8ljN8nP4kIVGGTqWJLWt4y9NJUSZJhk6miQ0zvOXtJDZZho6niQ01vOWTJjbtsXp5eSkyMtJ8/tdff+nyZcefew8PD9WrV898fuDAAcXExDg5OqlBgwbm40OHDuniRcefZUmqV6+eWTw5cuSIzp8/7zQ2MjJSXl6p14hjx47p7NmzTmNz6xoh45rk6y1J8oi5Ko+LV5yGJocFySiS+h6xXYqX5/k457GhQTL8/xd7OUGe55xfg5NLB8oomnpstiuJ8jxzyWnshfIXVKJECUlSbGys9u93/lkODw9X6dKp30eXL1/Wvn3OP8vlypVTWFjqd8yVK1e0Z4/zz3KZMmVUtmzq5z4+Pl5//vmn09jQ0FCVL19ekpSYmKjff//daWypUqVUoUIFSbl/jYgJDnMY65WUoIArF8znMUGh/xTUb+B5LVGBcf+8v2ODSsuwOb5KeCYnKfDyP0MXxQaWkuHh6TDWI/magi7/81nYvXu34uPjHcb6+Piodu3a5vO9e/fqyhXH7+GCfo1ImwMAAABQkNA9AQAAAAAAAAAsxGZk9ptFC4iNjVVwcLAuXryooKAghzG58bO74QM2yN1DHrgS26J1TbcPeeBKbNfuJS3x09wbY2e/dsRprNWGR4hsEJyvhkdo2SbQEj+hvTG2yeSRds+tPjzCz8/NsHtutZ8o54fhEUb//rXbhzxwJfaVOp0sd63Mj8MjfPbYXU4irTc8wsMffZ9hrBU+n1aLtarr7diYmBin7Vggp1x8dZS7U3BJsWdecXcKAJAvcH0vnPLV8AgeHh5Zapy70oDPOPbGZa78YZD3sbYbfp554/OMuCP2xtc+7R/5mW83N2Ozdnyp28z6dnMr1paPYqWc/HzmXKyRQfoZLXNXbEbHasXXN6dic/Rzn3aZzZbVj5zbYtMei3WulTkfK+Xue82WYRX9H1mNy81YK3zm8lssAAAAUJDQEgYAAAAAAAAAC6FoCwAAAAAAAAAWQtEWAAAAAAAAACyEoi0AAAAAAAAAWAhFWwAAAAAAAACwEIq2AAAAAAAAAGAhFG0BAAAAAAAAwEK83J0AAAAAAAAAMnfx1VHuTsElxZ55xd0pAPkWPW0BAAAAAAAAwEIo2gIAAAAAAACAhVC0BQAAAAAAAAALoWgLAAAAAAAAABZC0RYAAAAAAAAALISiLQAAAAAAAABYCEVbAAAAAAAAALAQirYAAAAAAAAAYCEUbQEAAAAAAADAQijaAgAAAAAAAICFULQFAAAAAAAAAAuhaAsAAAAAAAAAFkLRFgAAAAAAAAAshKItAAAAAAAAAFiIl7sTAAAAgL1z/9fD3Sm4pOT0he5OAQAAAChQ6GkLAAAAAAAAABZC0RYAAAAAAAAALISiLQAAAAAAAABYCGPaAgAAAACQD118dZS7U3BJsWdecXcKAJBv0NMWAAAAAAAAACyEoi0AAAAAAAAAWAhFWwAAAAAAAACwEIq2AAAAAAAAAGAh3IgMAAAAKGC4OREAAED+Rk9bAAAAAAAAALAQirYAAAAAAAAAYCEUbQEAAAAAAADAQijaAgAAAAAAAICFULQFAAAAAAAAAAuhaAsAAAAAAAAAFkLRFgAAAAAAAAAshKItAAAAAAAAAFgIRVsAAAAAAAAAsBAvdycAAHDNs7995e4UXDKtdid3pwAAAAAAQL5CT1sAAAAAAAAAsBCKtgAAAAAAAABgIdkq2s6ZM0cRERHy8/NTkyZNtGXLFqexbdq0kc1mSzfde++92U4aAAAAAAAAAAoql4u2ixYt0siRIzV+/Hj9+uuvioyMVLt27XTmzBmH8V988YVOnjxpTr///rs8PT318MMP33TyAAAAQHZt2LBBnTp1UtmyZWWz2bRs2TK75YZhaNy4cSpTpoyKFCmitm3b6q+//nJPsgAAAChUXC7azpgxQ/3791ffvn1Vs2ZNzZ07V/7+/po3b57D+OLFiyssLMyc1qxZI39/f4q2AAAAcKu4uDhFRkZqzpw5DpdPmzZNb7zxhubOnauff/5ZRYsWVbt27RQfH5/HmQIAAKCw8XIlODExUdu2bdOYMWPMeR4eHmrbtq02b96cpW28//77evTRR1W0aFGnMQkJCUpISDCfx8bGupImAAAAkKkOHTqoQ4cODpcZhqFZs2bphRde0AMPPCBJ+u9//6vQ0FAtW7ZMjz76aF6mCgAAgELGpZ62586dU3JyskJDQ+3mh4aG6tSpU5muv2XLFv3+++964oknMoybMmWKgoODzSk8PNyVNAEAAICbcvDgQZ06dUpt27Y15wUHB6tJkyZZ7qwAAAAAZFe2bkSWXe+//75q166txo0bZxg3ZswYxcTEmNPRo0fzKEMAAABAZocEVzsrJCQkKDY21m4CAAAAXOVS0bZkyZLy9PTU6dOn7eafPn1aYWFhGa4bFxenTz/9VP369ct0P76+vgoKCrKbAAAAAKvjF2MAAADICS4VbX18fNSgQQOtXbvWnJeSkqK1a9eqWbNmGa67ePFiJSQk6LHHHstepgAAAEAeud4hwdXOCvxiDAAAADnB5eERRo4cqXfffVcffPCBdu/erYEDByouLk59+/aVJPXq1cvuRmXXvf/+++rcubNKlChx81kDAAAAuahSpUoKCwuz66wQGxurn3/+OcPOCvxiDAAAADnBy9UVunXrprNnz2rcuHE6deqU6tatq9WrV5vjfR05ckQeHva14L1792rTpk369ttvcyZrAAAA4CZdvnxZ+/fvN58fPHhQO3bsUPHixVWhQgUNHz5cL730kqpWrapKlSpp7NixKlu2rDp37uy+pAEAAFAouFy0laQhQ4ZoyJAhDpdFRUWlm1e9enUZhpGdXQEAAAC5YuvWrbrjjjvM5yNHjpQk9e7dWwsWLNCzzz6ruLg4/fvf/1Z0dLRatGih1atXy8/Pz10pAwAAoJDIVtEWAAAAyO/atGmTYccCm82mSZMmadKkSXmYFQAAAEDRFgAAy/vixG/uTsElD5Wt7e4UAAAAACBfc/lGZAAAAAAAAACA3EPRFgAAAAAAAAAshKItAAAAAAAAAFgIRVsAAAAAAAAAsBCKtgAAAAAAAABgIRRtAQAAAAAAAMBCKNoCAAAAAAAAgIVQtAUAAAAAAAAAC6FoCwAAAAAAAAAWQtEWAAAAAAAAACyEoi0AAAAAAAAAWAhFWwAAAAAAAACwEIq2AAAAAAAAAGAhFG0BAAAAAAAAwEIo2gIAAAAAAACAhVC0BQAAAAAAAAALoWgLAAAAAAAAABZC0RYAAAAAAAAALISiLQAAAAAAAABYCEVbAAAAAAAAALAQirYAAAAAAAAAYCEUbQEAAAAAAADAQijaAgAAAAAAAICFULQFAAAAAAAAAAuhaAsAAAAAAAAAFkLRFgAAAAAAAAAshKItAAAAAAAAAFgIRVsAAAAAAAAAsBCKtgAAAAAAAABgIRRtAQAAAAAAAMBCKNoCAAAAAAAAgIVQtAUAAAAAAAAAC6FoCwAAAAAAAAAWQtEWAAAAAAAAACyEoi0AAAAAAAAAWAhFWwAAAAAAAACwEIq2AAAAAAAAAGAhFG0BAAAAAAAAwEIo2gIAAAAAAACAhVC0BQAAAAAAAAALoWgLAAAAAAAAABZC0RYAAAAAAAAALISiLQAAAAAAAABYCEVbAAAAAAAAALAQirYAAAAAAAAAYCEUbQEAAAAAAADAQijaAgAAAAAAAICFULQFAAAAAAAAAAuhaAsAAAAAAAAAFkLRFgAAAAAAAAAshKItAAAAAAAAAFgIRVsAAAAAAAAAsBCKtgAAAAAAAABgIRRtAQAAAAAAAMBCKNoCAAAAAAAAgIVQtAUAAAAAAAAAC6FoCwAAAAAAAAAWQtEWAAAAAAAAACyEoi0AAAAAAAAAWAhFWwAAAAAAAACwEIq2AAAAAAAAAGAhFG0BAAAAAAAAwEIo2gIAAAAAAACAhVC0BQAAAAAAAAALoWgLAAAAAAAAABZC0RYAAAAAAAAALISiLQAAAAAAAABYCEVbAAAAAAAAALAQirYAAAAAAAAAYCEUbQEAAAAAAADAQijaAgAAAAAAAICFULQFAAAAAAAAAAuhaAsAAAAAAAAAFkLRFgAAAAAAAAAshKItAAAAAAAAAFgIRVsAAAAAAAAAsBCKtgAAAAAAAABgIRRtAQAAAAAAAMBCKNoCAAAAAAAAgIVQtAUAAAAAAAAAC6FoCwAAAAAAAAAWQtEWAAAAAAAAACyEoi0AAAAAAAAAWAhFWwAAAAAAAACwEIq2AAAAAAAAAGAhFG0BAAAAAAAAwEIo2gIAAAAAAACAhVC0BQAAAAAAAAALoWgLAAAAAAAAABZC0RYAAAAAAAAALCRbRds5c+YoIiJCfn5+atKkibZs2ZJhfHR0tAYPHqwyZcrI19dX1apV08qVK7OVMAAAAAAAAAAUZC4XbRctWqSRI0dq/Pjx+vXXXxUZGal27drpzJkzDuMTExN1991369ChQ/r888+1d+9evfvuuypXrtxNJw8AAADkpgkTJshms9lNNWrUcHdaAAAAKOC8XF1hxowZ6t+/v/r27StJmjt3rlasWKF58+Zp9OjR6eLnzZunCxcu6Mcff5S3t7ckKSIi4uayBgAAAPLIbbfdpu+++8587uXlchMamZixL8rdKbhkZLU27k4BAAAUcC61OBMTE7Vt2zaNGTPGnOfh4aG2bdtq8+bNDtdZvny5mjVrpsGDB+vLL79UqVKl1KNHD40aNUqenp4O10lISFBCQoL5PDY21pU0AQAAgBzj5eWlsLCwm9tIXJzkqO3r6Sn5+dnHOePhIRUpkrXYpCTpfx0mJEmJSZIMJ8E2ySebsUlJkuEsVpKPT5Ziva5c1TX/f47NMyFRtuRkp5t1KbaIn2SzpcYmJsp2LQdi4+JSz4XH/364mJiYenzOuBLr5/fPe8WV2KSk1HhnPL0kz//lkJycOmUpNkVKvpZBrOc/ObgSm5KS8XvY2/uf909yshQfn7XYlBTp6tWcifXyknx9Ux8bhnTlSs7EuvK5zyw27Tm32W743GfwfkgXm0fXiBvzL1r0n8dXr6aeE2fSxsbHZ/wediXW39/83CshQbqWwXvYldjcukakpPwTey1ZSsng2Ly8/ol16XOfg9eItN9H166lvm7O+Pj8E+vK574wXCOcfZ5tHpJ3mtKeS5/7XLxGOLuu2Wypn6PrXPncF7RrRNocnHCpaHvu3DklJycrNDTUbn5oaKj27NnjcJ0DBw7o+++/V8+ePbVy5Urt379fgwYNUlJSksaPH+9wnSlTpmjixImupAYAAADkir/++ktly5aVn5+fmjVrpilTpqhChQoOY512Pihb1vHGO3aUVqz453np0s7/kGvdWoqK+ud5RIR07pzD0MDyYbo0rJf5PGj6+/K86LgjRHJoCcX+X79/Ymf/V56nzzuOLRak2DED/tnP25/I69gph7EpRYsoZvxQ83nA+5/L+8BRh7EDX56r2TtWmc87DR2nyut/dhgrSTP2rjMfd3hmsqp9s95p7BvbV5pF3rbjZui2pd84jX1781JdLR4iSWo95S3VXfil01gdPJh6DiTp+eel115zHvv779Jtt6U+njxZyuhvnS1bpEaNUh+//rr07LPOY9etk9q0SX38n/+o2AuznIZe6ttF126tIkny2f6nin62ymns5cfuV1Kd1GFAvP/Yp4CPljuNjXukgxIb1pYkee07qMD5S5zGXuncVgm310+NPXhMCghwGqtp06Rnnkl9/OuvUuPGzmPHj5cmTEh9vHu3VKuW89inn5ZefTX18ZEjUqVKzmMHDZLmzEl9fO5c6ufTmd69pQULUh9fuZLxsXXtKi1e/M/zjGIzuUYUSxOaVDlclwd0N58HT3lHHnGOC07X3HWNSPseLVlSOnv2n+cdOkjrnXyW/f3ti0BdukgZ3ScnbaH48celzz93Hnv58j/FkyeflD74wHnsmTNSqVKpj0eOlN56y3lsLl0jPIc+ruTwMpIk301b5b/S+fXv0pOP6lqV1O8r3593yn/Zd85jc+kaoXKRUp8+qY+/+Ua67z7nsW++KQ0enPp440bpjjucxxaya0QxJ6FJNSrr8r+6ms9DJs6RzUnRPy+vEXphpuOEK1aUDh3653mrVtLWrY5jC/o1IqN/ev9Ptm5E5oqUlBSVLl1a//nPf9SgQQN169ZNzz//vObOnet0nTFjxigmJsacjh513LgDAAAAclOTJk20YMECrV69Wm+//bYOHjyoli1b6tKlSw7jp0yZouDgYHMKDw/P44wBAABQENgMIwul3f9JTEyUv7+/Pv/8c3Xu3Nmc37t3b0VHR+vLL9P/J7p169by9va2Gwds1apV6tixoxISEuST9idTTsTGxio4OFgxMTEKCgrKaro3bdi/N+TZvnJCyzY13Z2CSx7uUdLdKTg0+7X880+Cug1D3J2CS1q2CXR3Cg41fHm4u1NwyZ333+XuFFwyrXYnd6eQzrO/feXuFFzStESEu1NwyUNla7s7BYcW9Wjj7hSy7K4yTnplWlTJ6QvdnUKeiY6OVsWKFTVjxgz169cv3XJHPW3Dw8MVc+KE43ZsLg2PcPH1sflqeIQP770nXw2PMKxqK0sOj3BxWga9ci04PEKxIROcxxa2nz5nM/birBf+eZIPhkcoNvwl+/CC/NPnXLpGXHxzQr4aHqHYs9MYHsHVWAefe7vPeloWHR6h2PAXnW+X4RHS5+CES8Mj+Pj4qEGDBlq7dq1ZtE1JSdHatWs1ZMgQh+s0b95cCxcuVEpKijz+d7HYt2+fypQpk6WCLQAAAGAVISEhqlatmvbv3+9wua+vr3yv/1GWVtGiWWqcZykmK7Fp/9CS7P+YyowrsTfuJ5uxaYuwkpTsm/W/E1yK9fGRshieYeyNr72Pj32BOiO5FZu20JCZtMXTTGM9JM+sbteFWA+PrL/fPT2zHuvKdl2JtdlyJ1a6udiMzrkrf2/n1TUio2MtUsT5shulLWjlZKyv7z+FtZyMzcnPvUeaH0x7eUrK6mfZlc99Dl4j0r4HvLxSp6zmwDUiVdGirr1/sio3rxFZPT5XPvcF+RrhhMvDI4wcOVLvvvuuPvjgA+3evVsDBw5UXFyc+vbtK0nq1auX3Y3KBg4cqAsXLuipp57Svn37tGLFCk2ePFmDr49TAgAAAOQTly9f1t9//60yZcq4OxUAAAAUYC71tJWkbt266ezZsxo3bpxOnTqlunXravXq1ebNyY4cOWL2qJWk8PBwffPNNxoxYoTq1KmjcuXK6amnntKoUaNy7igAAACAXPD000+rU6dOqlixok6cOKHx48fL09NT3bt3z3xlAAAAIJtcLtpK0pAhQ5wOhxCV9o62/9OsWTP99NNP2dkVAAAA4DbHjh1T9+7ddf78eZUqVUotWrTQTz/9pFLX7wwMINtm7ItydwouGVmtjbtTAAAUItkq2gIAAACFwaeffuruFAAAAFAIuTymLQAAAAAAAAAg91C0BQAAAAAAAAALoWgLAAAAAAAAABZC0RYAAAAAAAAALISiLQAAAAAAAABYCEVbAAAAAAAAALAQirYAAAAAAAAAYCEUbQEAAAAAAADAQijaAgAAAAAAAICFULQFAAAAAAAAAAuhaAsAAAAAAAAAFkLRFgAAAAAAAAAshKItAAAAAAAAAFiIl7sTAAAAAAAAAFAwzNgX5e4UXDKyWht3p+AQPW0BAAAAAAAAwEIo2gIAAAAAAACAhVC0BQAAAAAAAAALoWgLAAAAAAAAABZC0RYAAAAAAAAALISiLQAAAAAAAABYCEVbAAAAAAAAALAQirYAAAAAAAAAYCEUbQEAAAAAAADAQijaAgAAAAAAAICFULQFAAAAAAAAAAuhaAsAAAAAAAAAFkLRFgAAAAAAAAAshKItAAAAAAAAAFgIRVsAAAAAAAAAsBCKtgAAAAAAAABgIRRtAQAAAAAAAMBCKNoCAAAAAAAAgIVQtAUAAAAAAAAAC6FoCwAAAAAAAAAWQtEWAAAAAAAAACyEoi0AAAAAAAAAWAhFWwAAAAAAAACwEIq2AAAAAAAAAGAhFG0BAAAAAAAAwEIo2gIAAAAAAACAhVC0BQAAAAAAAAALoWgLAAAAAAAAABZC0RYAAAAAAAAALISiLQAAAAAAAABYCEVbAAAAAAAAALAQirYAAAAAAAAAYCEUbQEAAAAAAADAQijaAgAAAAAAAICFULQFAAAAAAAAAAuhaAsAAAAAAAAAFkLRFgAAAAAAAAAshKItAAAAAAAAAFiIl7sTAAAAAAAAQMEzY1+Uu1NwychqbdydAmCipy0AAAAAAAAAWAhFWwAAAAAAAACwEIq2AAAAAAAAAGAhFG0BAAAAAAAAwEIo2gIAAAAAAACAhVC0BQAAAAAAAAALoWgLAAAAAAAAABZC0RYAAAAAAAAALISiLQAAAAAAAABYCEVbAAAAAAAAALAQirYAAAAAAAAAYCEUbQEAAAAAAADAQijaAgAAAAAAAICFULQFAAAAAAAAAAuhaAsAAAAAAAAAFkLRFgAAAAAAAAAshKItAAAAAAAAAFgIRVsAAAAAAAAAsBCKtgAAAAAAAABgIRRtAQAAAAAAAMBCKNoCAAAAAAAAgIVQtAUAAAAAAAAAC6FoCwAAAAAAAAAWQtEWAAAAAAAAACyEoi0AAAAAAAAAWAhFWwAAAAAAAACwEIq2AAAAAAAAAGAhFG0BAAAAAAAAwEIo2gIAAAAAAACAhVC0BQAAAAAAAAALoWgLAAAAAAAAABZC0RYAAAAAAAAALISiLQAAAAAAAABYCEVbAAAAAAAAALAQirYAAAAAAAAAYCEUbQEAAAAAAADAQijaAgAAAAAAAICFULQFAAAAAAAAAAvJVtF2zpw5ioiIkJ+fn5o0aaItW7Y4jV2wYIFsNpvd5Ofnl+2EAQAAgLzkStsXAAAAyAkuF20XLVqkkSNHavz48fr1118VGRmpdu3a6cyZM07XCQoK0smTJ83p8OHDN5U0AAAAkBey0/YFAAAAbpaXqyvMmDFD/fv3V9++fSVJc+fO1YoVKzRv3jyNHj3a4To2m01hYWE3lykAAACQx7LT9gUAODZjX5S7U3DJyGpt3J0CgELMpZ62iYmJ2rZtm9q2bfvPBjw81LZtW23evNnpepcvX1bFihUVHh6uBx54QH/88Uf2MwYAAADyQHbbvgAAAMDNcqmn7blz55ScnKzQ0FC7+aGhodqzZ4/DdapXr6558+apTp06iomJ0Wuvvabbb79df/zxh8qXL+9wnYSEBCUkJJjPY2JiJEmxsbGupHvTEhPj8nR/N+vKlUvuTsElsbE+7k7Boavx+ed1jIvLX/cSjI013J2CQ8nxCZkHWUjC5SvuTsEleX3tzor89hpe8bns7hRcYsVzLklXkq65O4Usu5SQ5O4UXOLjxnMeGBgom82WK9vOTtvXKu3Y2Hz23RZ/OX+1u616neO85y4rnnfOee7inN88znnO4LznLned90zbsYYLjh8/bkgyfvzxR7v5zzzzjNG4ceMsbSMxMdGoUqWK8cILLziNGT9+vCGJiYmJiYmJiYmJKcMpJibGleasS7LT9qUdy8TExMTExMTElJUps3asSz1tS5YsKU9PT50+fdpu/unTp7M8Zq23t7fq1aun/fv3O40ZM2aMRo4caT5PSUnRhQsXVKJEiVzrSZHfxcbGKjw8XEePHlVQUJC700Ee4JwXTpz3wodzXvhwzl0TGBiYa9vOTtuXdqzreM8XTpz3wodzXvhwzgsnznvWZdaOdalo6+PjowYNGmjt2rXq3LmzpNSG6Nq1azVkyJAsbSM5OVm//fabOnbs6DTG19dXvr6+dvNCQkJcSbXQCgoK4kNRyHDOCyfOe+HDOS98OOful522L+3Y7OM9Xzhx3gsfznnhwzkvnDjvN8+loq0kjRw5Ur1791bDhg3VuHFjzZo1S3FxceYddXv16qVy5cppypQpkqRJkyapadOmuuWWWxQdHa1XX31Vhw8f1hNPPJGzRwIAAADksMzavgAAAEBucLlo261bN509e1bjxo3TqVOnVLduXa1evdq8QcORI0fk4fHPzZEuXryo/v3769SpUypWrJgaNGigH3/8UTVr1sy5owAAAAByQWZtXwAAACA3uFy0laQhQ4Y4/UlYVFSU3fOZM2dq5syZ2dkNXODr66vx48en+zkeCi7OeeHEeS98OOeFD+fcejJq++Lm8Z4vnDjvhQ/nvPDhnBdOnPecYzMMw3B3EgAAAAAAAACAVB6ZhwAAAAAAAAAA8gpFWwAAAAAAAACwEIq2+VBERIR27NhhN69Pnz4qV66c6tatqxo1aujxxx/XlStX3JMgchznvHCKiIhQ9erVVbduXVWvXl1Tp06VJB06dEienp6qW7euOc2dO9fN2SIncM6tzdG1+IknntC6devyNI+5c+eqTp065vW/Z8+ekqSOHTvqzTffTBcfGRmpL774QpL0999/q2vXrqpUqZIaNGigxo0b67333svT/FG40aYpnDjvhQ9tmsKHc25ttGPzKQP5TsWKFY3t27fbzevdu7cxc+ZMwzAMIz4+3rj99tuNV155Je+TQ67gnBdOac/7sWPHjKCgIOPnn382Dh48aAQHB7s1N+QOzrm1OboW56WkpCTjl19+MSpVqmScP3/eMAzDSElJMbZt22YYhmF8/vnnRv369e3W+eWXX4xSpUoZiYmJxsmTJ42wsDDjP//5j7n8woULxttvv513B4FCjzZN4cR5L3xo0xQ+nHNrox2bP9HTtgDy9fVVixYtdPjwYXengjzCOS/4ypUrpxo1anCOCxHOef7Qpk0bLVu2TFJqr7Enn3xSd911l6pVq6aHHnpIiYmJkqSkpCSNHj1ajRs3Vt26dfXII4/o4sWLkqSFCxeqSZMmqlevniIjI/XVV1/ZbX/YsGFq1qyZ7rnnHh07dkyBgYEKDAyUJNlsNtWvX1+SdP/99+vo0aPatWuXuf68efPUq1cveXt7a86cOWrZsqX69+9vLi9WrJgGDBiQq68R4AraNIUT571go01T+HDO8wfasdZH0bYAiomJUVRUlLp06eLuVJBHOOcF3549e3T+/Hm1adNGknTp0iW7nxgdPXrUvQkix3HO86cdO3boq6++0u7du3X69GktWbJEkvTqq6+qaNGi2rJli3bs2KHatWvrhRdekCS1a9dOP/30k7Zv364vv/xS/fv3V0JCgrnNffv2acOGDfr+++91zz33KDAwUBUqVFC3bt305ptvmo1mb29vPf7445o3b54kKT4+Xp988on69esnSdq2bZuaNWuWly8H4DLaNIUT571go01T+HDO8yfasdbj5e4EkHNeffVVzZs3T/v27dO9996rO+64w90pIZdxzgu+bt26ycPDQ3v37tXMmTNVqlQpxcXFKTAwMN2YRCgYOOf524MPPih/f39JUuPGjfX3339LkpYtW6aYmBiz8ZuYmKiIiAhJ0sGDB9WzZ08dO3ZMXl5eunDhgg4ePKgaNWpIkh577DF5e3tLkvz9/bVx40bt2LFDGzdu1BdffKFXXnlFO3fuVPHixdWvXz+1bt1a06ZN0xdffKFbb71Vt956ax6/CoDraNMUTpz3go02TeHDOc/faMdaDz1tC5BnnnlGu3bt0r59+7R161YG9y4EOOcF36JFi7R79259++23Gj16tH777Td3p4RcxjnP3/z8/MzHnp6eunbtmiTJMAzNnj1bO3bs0I4dO/Tnn39q5cqVkqRHH31UTzzxhH7//Xft2LFDAQEBio+PN7cTEBBgtw+bzaZ69epp2LBhWrt2rQICAhQVFSVJqlmzpm655RZ99dVXmjdvntk7QZIaNGigzZs359ahAzeFNk3hxHkv2GjTFD6c8/yNdqz1ULQtgCpUqKDZs2dr0qRJunr1qrvTQR7gnBd8bdu21cCBA82foaDg45wXLJ07d9bMmTPNO6NfuXJFf/zxhyTp4sWLqlSpkiTpo48+Mn8m5siePXvsxvo6evSozp49q8qVK5vz+vXrp8mTJ2vLli3q1q2bOX/QoEFav3695s+fb86Ljo7WO++8kzMHCeQA2jSFE+e9YKNNU/hwzgsW2rHuQ9E2n2rXrp3Kly9vTseOHbNbfv/996tGjRp666233JQhchrnHGPHjtWmTZt0/vx5d6eCPMI5t57MrsXOjBo1So0aNVKTJk1Up04dNW3a1PyZ4Ouvv66uXbuqXr162r59uypUqOB0O1euXNHQoUNVvXp11a1bV506ddLUqVNVt25dM6Zbt27au3evHn74YbveDWXKlNGmTZv09ddfq1KlSqpTp47uuusu8ydrQF6hTVM4cd4LN9o0hQ/n3Hpox+Y/NsMwDHcnAQAAAAAAAABIRU9bAAAAAAAAALAQirYAAAAAAAAAYCEUbQEAAAAAAADAQijauoHNZlN0dHSu72fChAkaPny4JGn58uUaMWJEru8TrpswYYLi4+PzbH9vvvmm+vTpk2f7K6z4nONGfNYBFAR8v+FGfL8VTHzWkRafc8A9KNoWEvfff79mzpzp7jTgwMSJE/P0CxAFF59za+OzDgDZw/ebtfH9hpzCZ926+JwD7kHR1k1ee+011atXT9WqVdPHH39szu/Zs6caNmyoOnXq6N5779WpU6ckSWfPntU999yj2rVrq06dOurbt6/dtho3bqz69eurffv2Onz4cLr9LViwQJ07d5YkRUVFqVatWho0aJAiIyN12223aevWrWbsN998oxYtWqhBgwZq3Lix1q1bl0uvAgYMGCBJatmyperWrauOHTtq1qxZ5vKnn35aEyZMkJT6381u3bqpU6dOqlmzpu68805duHDBjHX2Prh06ZK6deum6tWrq0WLFvrtt9/y7PgKOz7nuI7POoCChO83XMf3W8HGZx0Sn3PAnSjauonNZtP27du1evVqDR06VIcOHZIkzZo1S1u3btWuXbvUsmVL8+L30UcfqVKlSvrtt9+0a9cuTZ8+XZK0cOFC7d27V5s3b9avv/6qnj17atCgQZnuf8+ePerdu7d27typoUOH6vnnn5ek/2/n/l7Z7eM4jr+GIk0Oxk6cLE04XA5MsVJkh46kOZP4Axw5IDklK07miKLLmaQsaU5MCWnLgSS/Ug4lQjNb1/dAW+66c/su7Lrn+Tja1tV1fa7r+rz2rve1fXRxcaHx8XGFw2EdHh7KMAwFAgG9vLx8y3X47UKhkCQpGo0qHo/L6XR+uP3e3p4WFhZ0fHwsp9Opubk5SR/Pg4mJCZWWlurk5ETr6+va3t7+3pNCFjlHBlkHUEiob8igvhU2sg6JnAP5VJLvAfxWAwMDkqTa2lr5fD5tb2/L5XLJMAwtLi4qkUgokUioqqpKkuT1ehUMBjU8PCyfzye/3y9JWl1d1cHBgZqamiRJ6XT6U8d3u91qbm6WJLW0tGhqakqStLGxobOzM/l8vuy2RUVFur6+Vl1d3decPHLm9/vlcDgkvd23zBPIj+bB1taWgsGgbDabKisrFQgEdH5+/vOD/4XIOXJF1gFYGfUNuaK+/b+QdeSCnANfh6atRdhsNu3s7GhmZka7u7tyOp1aW1vT2NiYpLcvu3g8rkgkopWVFY2OjioWi8k0TY2MjGhwcPCvjldWVpZ9XVxcrFQqJUkyTVOdnZ0yDOPrTg6fVlJS8o/ilUgkZLfbs+8/um+fnQc2m+0LR4y/Qc6RQdYBFBLqGzKob4WNrEMi58BPYnmEPJmfn5ckXV1dKRqNqq2tTXd3d6qoqJDD4VAymcz+jUCSLi8vZbfb1dPTo9nZWZ2enurx8VHd3d0KhULZdWJeX18Vi8VyHldXV5cikYiOjo6yn+3v7+e8P/y3iooK3d/fS3p7mpy53re3twqHw5/ax0fzoKOjQ/Pz8zJNUw8PD1peXv6Gs8C/Ied4j6wDKBTUN7xHfStcZB0Z5BzID35pmyfpdFoej0dPT0+amZmRy+VSTU2NlpaWVF9fL4fDoY6ODt3c3Eh6W4h9eno6+6RqcnJSlZWV6uvr0+3trdrb2yVJqVRK/f398ng8OY3L7XbLMAwNDQ3p+flZyWRSHo+Hp5jfaHh4WJ2dnSovL9fm5qZ6e3vV2Nio2tpaeb3eT+3jo3kwOjqqgYEBNTQ0qLq6Wq2traz39EPIOd4j6wAKBfUN71HfChdZRwY5B/LDZpqmme9BAAAAAAAAAADesDwCAAAAAAAAAFgITVsAAAAAAAAAsBCatgAAAAAAAABgITRtAQAAAAAAAMBCaNoCAAAAAAAAgIXQtAUAAAAAAAAAC6FpCwAAAAAAAAAWQtMWAAAAAAAAACyEpi0AAAAAAAAAWAhNWwAAAAAAAACwEJq2AAAAAAAAAGAhfwDwJS7ygB6rVAAAAABJRU5ErkJggg==",
|
| 699 |
+
"text/plain": [
|
| 700 |
+
"<Figure size 1400x600 with 2 Axes>"
|
| 701 |
+
]
|
| 702 |
+
},
|
| 703 |
+
"metadata": {},
|
| 704 |
+
"output_type": "display_data"
|
| 705 |
+
}
|
| 706 |
+
],
|
| 707 |
+
"source": [
|
| 708 |
+
"fig, axes = plt.subplots(1, 2, figsize=(14, 6))\n",
|
| 709 |
+
"\n",
|
| 710 |
+
"models_list = list(comp_df.index)\n",
|
| 711 |
+
"x = np.arange(len(models_list))\n",
|
| 712 |
+
"w = 0.35\n",
|
| 713 |
+
"c_tr = ['#534AB7','#7F77DD','#0F6E56','#5DCAA5','#993C1D']\n",
|
| 714 |
+
"c_te = ['#9B96E8','#B8B4F0','#5DCAA5','#9FE1CB','#E8593C']\n",
|
| 715 |
+
"\n",
|
| 716 |
+
"axes[0].bar(x-w/2, comp_df['f1_train'], w, label='Train', color=c_tr, alpha=0.85)\n",
|
| 717 |
+
"axes[0].bar(x+w/2, comp_df['f1_test'], w, label='Test', color=c_te, alpha=0.85)\n",
|
| 718 |
+
"axes[0].axhline(0.75, color='gray', linestyle='--', alpha=0.4)\n",
|
| 719 |
+
"axes[0].set_title('F1 Train vs Test', fontweight='bold')\n",
|
| 720 |
+
"axes[0].set_xticks(x)\n",
|
| 721 |
+
"axes[0].set_xticklabels([m.replace(' ','\\n') for m in models_list], fontsize=8)\n",
|
| 722 |
+
"axes[0].set_ylim(0.5, 1.0)\n",
|
| 723 |
+
"axes[0].legend()\n",
|
| 724 |
+
"\n",
|
| 725 |
+
"# GAP comparativo\n",
|
| 726 |
+
"cv_gaps = [r['cv_test_gap_pp'] if r['cv_test_gap_pp'] else 0\n",
|
| 727 |
+
" for _, r in comp_df.iterrows()]\n",
|
| 728 |
+
"tr_gaps = comp_df['train_test_gap_pp'].tolist()\n",
|
| 729 |
+
"x2 = np.arange(len(models_list))\n",
|
| 730 |
+
"axes[1].bar(x2-w/2, tr_gaps, w, label='Train-Test gap', color='#E8593C', alpha=0.7)\n",
|
| 731 |
+
"axes[1].bar(x2+w/2, cv_gaps, w, label='CV-Test gap', color='#5DCAA5', alpha=0.7)\n",
|
| 732 |
+
"axes[1].axhline(5, color='red', linestyle='--', lw=1.5, label='Límite 5pp')\n",
|
| 733 |
+
"axes[1].set_title('Comparativa gaps (pp) — verde = rubrica correcta', fontweight='bold')\n",
|
| 734 |
+
"axes[1].set_xticks(x2)\n",
|
| 735 |
+
"axes[1].set_xticklabels([m.replace(' ','\\n') for m in models_list], fontsize=8)\n",
|
| 736 |
+
"axes[1].legend(fontsize=9)\n",
|
| 737 |
+
"\n",
|
| 738 |
+
"plt.tight_layout()\n",
|
| 739 |
+
"plt.savefig(PROJECT_ROOT / 'reports' / 'v2' / '14_optuna_comparativa.png',\n",
|
| 740 |
+
" dpi=150, bbox_inches='tight')\n",
|
| 741 |
+
"plt.show()"
|
| 742 |
+
]
|
| 743 |
+
},
|
| 744 |
+
{
|
| 745 |
+
"cell_type": "markdown",
|
| 746 |
+
"metadata": {
|
| 747 |
+
"id": "adb85f73"
|
| 748 |
+
},
|
| 749 |
+
"source": [
|
| 750 |
+
"## 8. Guardar ganador y best_params.yaml"
|
| 751 |
+
]
|
| 752 |
+
},
|
| 753 |
+
{
|
| 754 |
+
"cell_type": "code",
|
| 755 |
+
"execution_count": 16,
|
| 756 |
+
"metadata": {
|
| 757 |
+
"id": "0d178b01"
|
| 758 |
+
},
|
| 759 |
+
"outputs": [
|
| 760 |
+
{
|
| 761 |
+
"name": "stdout",
|
| 762 |
+
"output_type": "stream",
|
| 763 |
+
"text": [
|
| 764 |
+
"Modelo guardado: /mnt/c/Users/under/Documents/F5/3_Projects/Project_9_Equipo3/Project_YT/models/final_model.joblib\n",
|
| 765 |
+
"best_params.yaml guardado\n",
|
| 766 |
+
"winner: LR tuned\n",
|
| 767 |
+
"hyperparameters:\n",
|
| 768 |
+
" ngram_range: '1_2'\n",
|
| 769 |
+
" max_features: 4045\n",
|
| 770 |
+
" min_df: 2\n",
|
| 771 |
+
" sublinear_tf: false\n",
|
| 772 |
+
" C: 0.3235215031170205\n",
|
| 773 |
+
"results:\n",
|
| 774 |
+
" f1_test: 0.7579\n",
|
| 775 |
+
" f1_train: 0.8987\n",
|
| 776 |
+
" train_test_gap_pp: 14.07\n",
|
| 777 |
+
" cv_test_gap_pp: 4.76\n",
|
| 778 |
+
" roc_auc: 0.81\n",
|
| 779 |
+
" fp: 18\n",
|
| 780 |
+
" fn: 30\n",
|
| 781 |
+
"\n"
|
| 782 |
+
]
|
| 783 |
+
}
|
| 784 |
+
],
|
| 785 |
+
"source": [
|
| 786 |
+
"pipeline_map = {\n",
|
| 787 |
+
" 'LR baseline': lr_baseline_pipe,\n",
|
| 788 |
+
" 'LR tuned' : lr_tuned_pipe,\n",
|
| 789 |
+
" 'RF baseline': rf_baseline_pipe,\n",
|
| 790 |
+
" 'RF tuned' : rf_tuned_pipe,\n",
|
| 791 |
+
" 'LinearSVC' : svc_pipeline,\n",
|
| 792 |
+
"}\n",
|
| 793 |
+
"best_pipeline = pipeline_map[best_name]\n",
|
| 794 |
+
"\n",
|
| 795 |
+
"MODELS_DIR = PROJECT_ROOT / 'models'\n",
|
| 796 |
+
"MODELS_DIR.mkdir(exist_ok=True)\n",
|
| 797 |
+
"model_path = MODELS_DIR / 'final_model.joblib'\n",
|
| 798 |
+
"joblib.dump(best_pipeline, model_path)\n",
|
| 799 |
+
"print(f'Modelo guardado: {model_path}')\n",
|
| 800 |
+
"\n",
|
| 801 |
+
"best_row = comp_df.loc[best_name]\n",
|
| 802 |
+
"best_trial_params = {}\n",
|
| 803 |
+
"if 'LR tuned' == best_name:\n",
|
| 804 |
+
" best_trial_params = study_lr.best_trial.params\n",
|
| 805 |
+
"elif 'RF tuned' == best_name:\n",
|
| 806 |
+
" best_trial_params = study_rf.best_trial.params\n",
|
| 807 |
+
"\n",
|
| 808 |
+
"best_out = {\n",
|
| 809 |
+
" 'winner' : best_name,\n",
|
| 810 |
+
" 'hyperparameters' : best_trial_params,\n",
|
| 811 |
+
" 'results': {\n",
|
| 812 |
+
" 'f1_test' : float(best_row['f1_test']),\n",
|
| 813 |
+
" 'f1_train' : float(best_row['f1_train']),\n",
|
| 814 |
+
" 'train_test_gap_pp': float(best_row['train_test_gap_pp']),\n",
|
| 815 |
+
" 'cv_test_gap_pp' : float(best_row['cv_test_gap_pp'])\n",
|
| 816 |
+
" if best_row['cv_test_gap_pp'] is not None else None,\n",
|
| 817 |
+
" 'roc_auc' : float(best_row['roc_auc']),\n",
|
| 818 |
+
" 'fp' : int(best_row['fp']),\n",
|
| 819 |
+
" 'fn' : int(best_row['fn']),\n",
|
| 820 |
+
" }\n",
|
| 821 |
+
"}\n",
|
| 822 |
+
"\n",
|
| 823 |
+
"import yaml\n",
|
| 824 |
+
"best_path = PROJECT_ROOT / 'configs' / 'best_params.yaml'\n",
|
| 825 |
+
"with open(best_path, 'w') as f:\n",
|
| 826 |
+
" yaml.dump(best_out, f, default_flow_style=False, sort_keys=False)\n",
|
| 827 |
+
"print(f'best_params.yaml guardado')\n",
|
| 828 |
+
"with open(best_path) as f: print(f.read())"
|
| 829 |
+
]
|
| 830 |
+
},
|
| 831 |
+
{
|
| 832 |
+
"cell_type": "code",
|
| 833 |
+
"execution_count": 17,
|
| 834 |
+
"metadata": {
|
| 835 |
+
"id": "8e205676"
|
| 836 |
+
},
|
| 837 |
+
"outputs": [
|
| 838 |
+
{
|
| 839 |
+
"name": "stdout",
|
| 840 |
+
"output_type": "stream",
|
| 841 |
+
"text": [
|
| 842 |
+
"Verificacion final_model.joblib:\n",
|
| 843 |
+
" ✅ TOXICO 0.70] you are a stupid thug get out\n",
|
| 844 |
+
" ✅ NO TOXICO 0.45] I think the police should be more transparent\n",
|
| 845 |
+
" ✅ TOXICO 0.56] black people are criminal thugs\n",
|
| 846 |
+
" ✅ NO TOXICO 0.31] thank you for sharing this video\n"
|
| 847 |
+
]
|
| 848 |
+
}
|
| 849 |
+
],
|
| 850 |
+
"source": [
|
| 851 |
+
"# Verificacion\n",
|
| 852 |
+
"loaded = joblib.load(model_path)\n",
|
| 853 |
+
"tests = [\n",
|
| 854 |
+
" ('you are a stupid thug get out', True),\n",
|
| 855 |
+
" ('I think the police should be more transparent', False),\n",
|
| 856 |
+
" ('black people are criminal thugs', True),\n",
|
| 857 |
+
" ('thank you for sharing this video', False),\n",
|
| 858 |
+
"]\n",
|
| 859 |
+
"print('Verificacion final_model.joblib:')\n",
|
| 860 |
+
"for text, expected in tests:\n",
|
| 861 |
+
" pred = loaded.predict([text])[0]\n",
|
| 862 |
+
" prob = loaded.predict_proba([text])[0][1]\n",
|
| 863 |
+
" ok = '✅' if pred == expected else '❌'\n",
|
| 864 |
+
" print(f' {ok} {\"TOXICO\" if pred else \"NO TOXICO\"} {prob:.2f}] {text[:55]}')"
|
| 865 |
+
]
|
| 866 |
+
},
|
| 867 |
+
{
|
| 868 |
+
"cell_type": "markdown",
|
| 869 |
+
"metadata": {
|
| 870 |
+
"id": "82980c46"
|
| 871 |
+
},
|
| 872 |
+
"source": [
|
| 873 |
+
"## 9. Registro en MLflow"
|
| 874 |
+
]
|
| 875 |
+
},
|
| 876 |
+
{
|
| 877 |
+
"cell_type": "code",
|
| 878 |
+
"execution_count": 18,
|
| 879 |
+
"metadata": {
|
| 880 |
+
"id": "6dc9f27a"
|
| 881 |
+
},
|
| 882 |
+
"outputs": [
|
| 883 |
+
{
|
| 884 |
+
"name": "stderr",
|
| 885 |
+
"output_type": "stream",
|
| 886 |
+
"text": [
|
| 887 |
+
"2026/05/14 14:19:35 WARNING mlflow.models.model: `artifact_path` is deprecated. Please use `name` instead.\n",
|
| 888 |
+
"2026/05/14 14:19:37 WARNING mlflow.sklearn: Saving scikit-learn models in the pickle or cloudpickle format requires exercising caution because these formats rely on Python's object serialization mechanism, which can execute arbitrary code during deserialization. The recommended safe alternative is the 'skops' format. For more information, see: https://scikit-learn.org/stable/model_persistence.html\n"
|
| 889 |
+
]
|
| 890 |
+
},
|
| 891 |
+
{
|
| 892 |
+
"name": "stdout",
|
| 893 |
+
"output_type": "stream",
|
| 894 |
+
"text": [
|
| 895 |
+
" ✅ lr_tuned_optuna\n",
|
| 896 |
+
" ✅ rf_tuned_optuna\n",
|
| 897 |
+
" ✅ linear_svc\n"
|
| 898 |
+
]
|
| 899 |
+
}
|
| 900 |
+
],
|
| 901 |
+
"source": [
|
| 902 |
+
"MLFLOW_DIR = PROJECT_ROOT / 'mlruns'\n",
|
| 903 |
+
"mlflow.set_tracking_uri(f'file://{MLFLOW_DIR}')\n",
|
| 904 |
+
"mlflow.set_experiment('Youtube_project_experiment')\n",
|
| 905 |
+
"\n",
|
| 906 |
+
"runs_info = [\n",
|
| 907 |
+
" ('lr_tuned_optuna', study_lr, metrics_lr_tuned, lr_tuned_pipe,\n",
|
| 908 |
+
" {'model':'LR','optuna_trials':N_TRIALS}),\n",
|
| 909 |
+
" ('rf_tuned_optuna', study_rf, metrics_rf_tuned, rf_tuned_pipe,\n",
|
| 910 |
+
" {'model':'RF','optuna_trials':N_TRIALS}),\n",
|
| 911 |
+
" ('linear_svc', None, metrics_svc, svc_pipeline,\n",
|
| 912 |
+
" {'model':'LinearSVC','C':1.0}),\n",
|
| 913 |
+
"]\n",
|
| 914 |
+
"\n",
|
| 915 |
+
"for run_name, study, mets, pipe, extra in runs_info:\n",
|
| 916 |
+
" with mlflow.start_run(run_name=run_name):\n",
|
| 917 |
+
" for k,v in extra.items(): mlflow.log_param(k, v)\n",
|
| 918 |
+
" if study:\n",
|
| 919 |
+
" mlflow.log_param('best_trial', study.best_trial.number)\n",
|
| 920 |
+
" for k,v in study.best_trial.params.items(): mlflow.log_param(k, v)\n",
|
| 921 |
+
" mlflow.log_metric('test_f1', mets['f1_test'])\n",
|
| 922 |
+
" mlflow.log_metric('train_f1', mets['f1_train'])\n",
|
| 923 |
+
" mlflow.log_metric('train_test_gap_pp', mets['train_test_gap_pp'])\n",
|
| 924 |
+
" if mets['cv_test_gap_pp'] is not None:\n",
|
| 925 |
+
" mlflow.log_metric('cv_mean', mets['cv_mean'])\n",
|
| 926 |
+
" mlflow.log_metric('cv_test_gap_pp', mets['cv_test_gap_pp'])\n",
|
| 927 |
+
" mlflow.log_metric('roc_auc', mets['roc_auc'])\n",
|
| 928 |
+
" if run_name.split('_')[0].upper() in best_name.upper() and 'tuned' in best_name.lower():\n",
|
| 929 |
+
" mlflow.sklearn.log_model(pipe, 'final_model')\n",
|
| 930 |
+
" print(f' ✅ {run_name}')\n",
|
| 931 |
+
"mlflow.log_artifact(str(PROJECT_ROOT / 'reports' / 'v2' / '14_optuna_comparativa.png'))"
|
| 932 |
+
]
|
| 933 |
+
},
|
| 934 |
+
{
|
| 935 |
+
"cell_type": "markdown",
|
| 936 |
+
"metadata": {
|
| 937 |
+
"id": "96714037"
|
| 938 |
+
},
|
| 939 |
+
"source": [
|
| 940 |
+
"## 10. Conclusiones"
|
| 941 |
+
]
|
| 942 |
+
},
|
| 943 |
+
{
|
| 944 |
+
"cell_type": "code",
|
| 945 |
+
"execution_count": 19,
|
| 946 |
+
"metadata": {
|
| 947 |
+
"id": "b77fe7cc"
|
| 948 |
+
},
|
| 949 |
+
"outputs": [
|
| 950 |
+
{
|
| 951 |
+
"name": "stdout",
|
| 952 |
+
"output_type": "stream",
|
| 953 |
+
"text": [
|
| 954 |
+
"\n",
|
| 955 |
+
"CONCLUSIONES — OPTIMIZACION + LinearSVC\n",
|
| 956 |
+
"=======================================================\n",
|
| 957 |
+
"5 modelos evaluados bajo las mismas condiciones.\n",
|
| 958 |
+
"\n",
|
| 959 |
+
"Mejora Optuna:\n",
|
| 960 |
+
" LR: +0.48pp F1 test\n",
|
| 961 |
+
" RF: -1.49pp F1 test\n",
|
| 962 |
+
"\n",
|
| 963 |
+
"Ganador: LR tuned\n",
|
| 964 |
+
" F1 test : 0.7579\n",
|
| 965 |
+
" train-test gap : 14.07pp\n",
|
| 966 |
+
" cv-test gap : 4.76pp\n",
|
| 967 |
+
"\n",
|
| 968 |
+
"Nota metodologica:\n",
|
| 969 |
+
" El train-test gap esta inflado por ser in-sample vs OOS.\n",
|
| 970 |
+
" El cv-test gap compara OOS vs OOS — es la metrica correcta\n",
|
| 971 |
+
" para la rubrica. Ver informe metodologico adjunto.\n",
|
| 972 |
+
"\n",
|
| 973 |
+
"Siguiente: 07_data_augmentation.ipynb\n",
|
| 974 |
+
" Explorar si augmentation mejora los FN sin aumentar FP.\n",
|
| 975 |
+
"\n"
|
| 976 |
+
]
|
| 977 |
+
}
|
| 978 |
+
],
|
| 979 |
+
"source": [
|
| 980 |
+
"lr_delta = metrics_lr_tuned['f1_test'] - metrics_lr_base['f1_test']\n",
|
| 981 |
+
"rf_delta = metrics_rf_tuned['f1_test'] - metrics_rf_base['f1_test']\n",
|
| 982 |
+
"\n",
|
| 983 |
+
"print(f\"\"\"\n",
|
| 984 |
+
"CONCLUSIONES — OPTIMIZACION + LinearSVC\n",
|
| 985 |
+
"{'='*55}\n",
|
| 986 |
+
"5 modelos evaluados bajo las mismas condiciones.\n",
|
| 987 |
+
"\n",
|
| 988 |
+
"Mejora Optuna:\n",
|
| 989 |
+
" LR: {lr_delta*100:+.2f}pp F1 test\n",
|
| 990 |
+
" RF: {rf_delta*100:+.2f}pp F1 test\n",
|
| 991 |
+
"\n",
|
| 992 |
+
"Ganador: {best_name}\n",
|
| 993 |
+
" F1 test : {comp_df.loc[best_name, 'f1_test']:.4f}\n",
|
| 994 |
+
" train-test gap : {comp_df.loc[best_name, 'train_test_gap_pp']:.2f}pp\n",
|
| 995 |
+
" cv-test gap : {comp_df.loc[best_name, 'cv_test_gap_pp']:.2f}pp\n",
|
| 996 |
+
"\n",
|
| 997 |
+
"Nota metodologica:\n",
|
| 998 |
+
" El train-test gap esta inflado por ser in-sample vs OOS.\n",
|
| 999 |
+
" El cv-test gap compara OOS vs OOS — es la metrica correcta\n",
|
| 1000 |
+
" para la rubrica. Ver informe metodologico adjunto.\n",
|
| 1001 |
+
"\n",
|
| 1002 |
+
"Siguiente: 07_data_augmentation.ipynb\n",
|
| 1003 |
+
" Explorar si augmentation mejora los FN sin aumentar FP.\n",
|
| 1004 |
+
"\"\"\")"
|
| 1005 |
+
]
|
| 1006 |
+
}
|
| 1007 |
+
],
|
| 1008 |
+
"metadata": {
|
| 1009 |
+
"colab": {
|
| 1010 |
+
"provenance": []
|
| 1011 |
+
},
|
| 1012 |
+
"kernelspec": {
|
| 1013 |
+
"display_name": "py310",
|
| 1014 |
+
"language": "python",
|
| 1015 |
+
"name": "python3"
|
| 1016 |
+
},
|
| 1017 |
+
"language_info": {
|
| 1018 |
+
"codemirror_mode": {
|
| 1019 |
+
"name": "ipython",
|
| 1020 |
+
"version": 3
|
| 1021 |
+
},
|
| 1022 |
+
"file_extension": ".py",
|
| 1023 |
+
"mimetype": "text/x-python",
|
| 1024 |
+
"name": "python",
|
| 1025 |
+
"nbconvert_exporter": "python",
|
| 1026 |
+
"pygments_lexer": "ipython3",
|
| 1027 |
+
"version": "3.10.20"
|
| 1028 |
+
}
|
| 1029 |
+
},
|
| 1030 |
+
"nbformat": 4,
|
| 1031 |
+
"nbformat_minor": 0
|
| 1032 |
+
}
|
reports/v2/12_ensemble_comparativa.png
ADDED
|
Git LFS Details
|
reports/v2/13_best_model_test.png
ADDED
|
Git LFS Details
|
reports/v2/14_optuna_comparativa.png
ADDED
|
Git LFS Details
|
reports/v2/14_optuna_convergencia.png
ADDED
|
Git LFS Details
|
reports/v2/15_optuna_convergencia.png
ADDED
|
Git LFS Details
|