JonnyBP commited on
Commit ·
6cef02d
1
Parent(s): 93a1356
fix: change versions. #2
Browse files- data/processed/{v1 → v2}/comments_with_stats.csv +0 -0
- notebooks/01_eda_v2.ipynb +16 -16
- reports/{v1 → v2}/01_distribucion_target.png +0 -0
- reports/{v1 → v2}/02_longitud_texto.png +0 -0
- reports/{v1 → v2}/03_wordclouds.png +2 -2
- reports/{v1 → v2}/04_correlacion_sublabels.png +0 -0
- reports/{v1 → v2}/05_multilabel_overlap.png +0 -0
- reports/{v1 → v2}/06_toxicidad_por_video.png +0 -0
data/processed/{v1 → v2}/comments_with_stats.csv
RENAMED
|
File without changes
|
notebooks/01_eda_v2.ipynb
CHANGED
|
@@ -372,7 +372,7 @@
|
|
| 372 |
},
|
| 373 |
{
|
| 374 |
"cell_type": "code",
|
| 375 |
-
"execution_count":
|
| 376 |
"id": "76f135bb",
|
| 377 |
"metadata": {},
|
| 378 |
"outputs": [
|
|
@@ -418,7 +418,7 @@
|
|
| 418 |
" axes[1].text(v + 2, i, str(v), va='center')\n",
|
| 419 |
"\n",
|
| 420 |
"plt.tight_layout()\n",
|
| 421 |
-
"plt.savefig('../reports/
|
| 422 |
"plt.show()\n",
|
| 423 |
"print(\"✅ Guardado en reports/01_distribucion_target.png\")\n"
|
| 424 |
]
|
|
@@ -517,7 +517,7 @@
|
|
| 517 |
},
|
| 518 |
{
|
| 519 |
"cell_type": "code",
|
| 520 |
-
"execution_count":
|
| 521 |
"id": "8d6b4108",
|
| 522 |
"metadata": {},
|
| 523 |
"outputs": [
|
|
@@ -575,7 +575,7 @@
|
|
| 575 |
"axes[1,1].set_ylabel('Nº palabras')\n",
|
| 576 |
"\n",
|
| 577 |
"plt.tight_layout()\n",
|
| 578 |
-
"plt.savefig('../reports/
|
| 579 |
"plt.show()\n",
|
| 580 |
"print(\"✅ Guardado en reports/02_longitud_texto.png\")\n"
|
| 581 |
]
|
|
@@ -659,7 +659,7 @@
|
|
| 659 |
},
|
| 660 |
{
|
| 661 |
"cell_type": "code",
|
| 662 |
-
"execution_count":
|
| 663 |
"id": "82c3951c",
|
| 664 |
"metadata": {},
|
| 665 |
"outputs": [
|
|
@@ -719,9 +719,9 @@
|
|
| 719 |
"axes[1].axis('off')\n",
|
| 720 |
"\n",
|
| 721 |
"plt.tight_layout()\n",
|
| 722 |
-
"plt.savefig('../reports/
|
| 723 |
"plt.show()\n",
|
| 724 |
-
"print(\"✅ Guardado en reports/
|
| 725 |
]
|
| 726 |
},
|
| 727 |
{
|
|
@@ -810,7 +810,7 @@
|
|
| 810 |
},
|
| 811 |
{
|
| 812 |
"cell_type": "code",
|
| 813 |
-
"execution_count":
|
| 814 |
"id": "f667be4d",
|
| 815 |
"metadata": {},
|
| 816 |
"outputs": [
|
|
@@ -847,14 +847,14 @@
|
|
| 847 |
" annot_kws={'size': 11})\n",
|
| 848 |
"ax.set_title('Correlación entre labels de toxicidad', fontweight='bold', pad=15)\n",
|
| 849 |
"plt.tight_layout()\n",
|
| 850 |
-
"plt.savefig('../reports/
|
| 851 |
"plt.show()\n",
|
| 852 |
-
"print(\"✅ Guardado en reports/
|
| 853 |
]
|
| 854 |
},
|
| 855 |
{
|
| 856 |
"cell_type": "code",
|
| 857 |
-
"execution_count":
|
| 858 |
"id": "be01eb96",
|
| 859 |
"metadata": {},
|
| 860 |
"outputs": [
|
|
@@ -882,7 +882,7 @@
|
|
| 882 |
"for x, y in zip(label_dist.index, label_dist.values):\n",
|
| 883 |
" ax.text(x, y + 1, str(y), ha='center', fontweight='bold')\n",
|
| 884 |
"plt.tight_layout()\n",
|
| 885 |
-
"plt.savefig('../reports/
|
| 886 |
"plt.show()\n"
|
| 887 |
]
|
| 888 |
},
|
|
@@ -981,7 +981,7 @@
|
|
| 981 |
},
|
| 982 |
{
|
| 983 |
"cell_type": "code",
|
| 984 |
-
"execution_count":
|
| 985 |
"id": "d9d5856f",
|
| 986 |
"metadata": {},
|
| 987 |
"outputs": [
|
|
@@ -1007,7 +1007,7 @@
|
|
| 1007 |
"ax.axhline(50, color='gray', linestyle='--', alpha=0.5, label='50%')\n",
|
| 1008 |
"ax.legend()\n",
|
| 1009 |
"plt.tight_layout()\n",
|
| 1010 |
-
"plt.savefig('../reports/
|
| 1011 |
"plt.show()\n"
|
| 1012 |
]
|
| 1013 |
},
|
|
@@ -1207,7 +1207,7 @@
|
|
| 1207 |
},
|
| 1208 |
{
|
| 1209 |
"cell_type": "code",
|
| 1210 |
-
"execution_count":
|
| 1211 |
"id": "2cb72391",
|
| 1212 |
"metadata": {},
|
| 1213 |
"outputs": [
|
|
@@ -1223,7 +1223,7 @@
|
|
| 1223 |
"# Guardar dataset con columnas auxiliares para usar en las siguientes fases\n",
|
| 1224 |
"df_clean = df.copy()\n",
|
| 1225 |
"# Guardamos char_length y word_count por si son útiles como features adicionales\n",
|
| 1226 |
-
"df_clean.to_csv('../data/processed/
|
| 1227 |
"print(\"✅ Dataset con estadísticas guardado en data/processed/comments_with_stats.csv\")\n"
|
| 1228 |
]
|
| 1229 |
}
|
|
|
|
| 372 |
},
|
| 373 |
{
|
| 374 |
"cell_type": "code",
|
| 375 |
+
"execution_count": null,
|
| 376 |
"id": "76f135bb",
|
| 377 |
"metadata": {},
|
| 378 |
"outputs": [
|
|
|
|
| 418 |
" axes[1].text(v + 2, i, str(v), va='center')\n",
|
| 419 |
"\n",
|
| 420 |
"plt.tight_layout()\n",
|
| 421 |
+
"plt.savefig('../reports/v2/01_distribucion_target.png', dpi=150, bbox_inches='tight')\n",
|
| 422 |
"plt.show()\n",
|
| 423 |
"print(\"✅ Guardado en reports/01_distribucion_target.png\")\n"
|
| 424 |
]
|
|
|
|
| 517 |
},
|
| 518 |
{
|
| 519 |
"cell_type": "code",
|
| 520 |
+
"execution_count": null,
|
| 521 |
"id": "8d6b4108",
|
| 522 |
"metadata": {},
|
| 523 |
"outputs": [
|
|
|
|
| 575 |
"axes[1,1].set_ylabel('Nº palabras')\n",
|
| 576 |
"\n",
|
| 577 |
"plt.tight_layout()\n",
|
| 578 |
+
"plt.savefig('../reports/v2/02_longitud_texto.png', dpi=150, bbox_inches='tight')\n",
|
| 579 |
"plt.show()\n",
|
| 580 |
"print(\"✅ Guardado en reports/02_longitud_texto.png\")\n"
|
| 581 |
]
|
|
|
|
| 659 |
},
|
| 660 |
{
|
| 661 |
"cell_type": "code",
|
| 662 |
+
"execution_count": null,
|
| 663 |
"id": "82c3951c",
|
| 664 |
"metadata": {},
|
| 665 |
"outputs": [
|
|
|
|
| 719 |
"axes[1].axis('off')\n",
|
| 720 |
"\n",
|
| 721 |
"plt.tight_layout()\n",
|
| 722 |
+
"plt.savefig('../reports/v2/03_wordclouds.png', dpi=150, bbox_inches='tight')\n",
|
| 723 |
"plt.show()\n",
|
| 724 |
+
"print(\"✅ Guardado en reports/v2/03_wordclouds.png\")\n"
|
| 725 |
]
|
| 726 |
},
|
| 727 |
{
|
|
|
|
| 810 |
},
|
| 811 |
{
|
| 812 |
"cell_type": "code",
|
| 813 |
+
"execution_count": null,
|
| 814 |
"id": "f667be4d",
|
| 815 |
"metadata": {},
|
| 816 |
"outputs": [
|
|
|
|
| 847 |
" annot_kws={'size': 11})\n",
|
| 848 |
"ax.set_title('Correlación entre labels de toxicidad', fontweight='bold', pad=15)\n",
|
| 849 |
"plt.tight_layout()\n",
|
| 850 |
+
"plt.savefig('../reports/v2/04_correlacion_sublabels.png', dpi=150, bbox_inches='tight')\n",
|
| 851 |
"plt.show()\n",
|
| 852 |
+
"print(\"✅ Guardado en reports/v2/04_correlacion_sublabels.png\")\n"
|
| 853 |
]
|
| 854 |
},
|
| 855 |
{
|
| 856 |
"cell_type": "code",
|
| 857 |
+
"execution_count": null,
|
| 858 |
"id": "be01eb96",
|
| 859 |
"metadata": {},
|
| 860 |
"outputs": [
|
|
|
|
| 882 |
"for x, y in zip(label_dist.index, label_dist.values):\n",
|
| 883 |
" ax.text(x, y + 1, str(y), ha='center', fontweight='bold')\n",
|
| 884 |
"plt.tight_layout()\n",
|
| 885 |
+
"plt.savefig('../reports/v2/05_multilabel_overlap.png', dpi=150, bbox_inches='tight')\n",
|
| 886 |
"plt.show()\n"
|
| 887 |
]
|
| 888 |
},
|
|
|
|
| 981 |
},
|
| 982 |
{
|
| 983 |
"cell_type": "code",
|
| 984 |
+
"execution_count": null,
|
| 985 |
"id": "d9d5856f",
|
| 986 |
"metadata": {},
|
| 987 |
"outputs": [
|
|
|
|
| 1007 |
"ax.axhline(50, color='gray', linestyle='--', alpha=0.5, label='50%')\n",
|
| 1008 |
"ax.legend()\n",
|
| 1009 |
"plt.tight_layout()\n",
|
| 1010 |
+
"plt.savefig('../reports/v2/06_toxicidad_por_video.png', dpi=150, bbox_inches='tight')\n",
|
| 1011 |
"plt.show()\n"
|
| 1012 |
]
|
| 1013 |
},
|
|
|
|
| 1207 |
},
|
| 1208 |
{
|
| 1209 |
"cell_type": "code",
|
| 1210 |
+
"execution_count": null,
|
| 1211 |
"id": "2cb72391",
|
| 1212 |
"metadata": {},
|
| 1213 |
"outputs": [
|
|
|
|
| 1223 |
"# Guardar dataset con columnas auxiliares para usar en las siguientes fases\n",
|
| 1224 |
"df_clean = df.copy()\n",
|
| 1225 |
"# Guardamos char_length y word_count por si son útiles como features adicionales\n",
|
| 1226 |
+
"df_clean.to_csv('../data/processed/v2/comments_with_stats.csv', index=False)\n",
|
| 1227 |
"print(\"✅ Dataset con estadísticas guardado en data/processed/comments_with_stats.csv\")\n"
|
| 1228 |
]
|
| 1229 |
}
|
reports/{v1 → v2}/01_distribucion_target.png
RENAMED
|
File without changes
|
reports/{v1 → v2}/02_longitud_texto.png
RENAMED
|
File without changes
|
reports/{v1 → v2}/03_wordclouds.png
RENAMED
|
File without changes
|
reports/{v1 → v2}/04_correlacion_sublabels.png
RENAMED
|
File without changes
|
reports/{v1 → v2}/05_multilabel_overlap.png
RENAMED
|
File without changes
|
reports/{v1 → v2}/06_toxicidad_por_video.png
RENAMED
|
File without changes
|