JonnyBP commited on
Commit
6cef02d
·
1 Parent(s): 93a1356

fix: change versions. #2

Browse files
data/processed/{v1 → v2}/comments_with_stats.csv RENAMED
File without changes
notebooks/01_eda_v2.ipynb CHANGED
@@ -372,7 +372,7 @@
372
  },
373
  {
374
  "cell_type": "code",
375
- "execution_count": 6,
376
  "id": "76f135bb",
377
  "metadata": {},
378
  "outputs": [
@@ -418,7 +418,7 @@
418
  " axes[1].text(v + 2, i, str(v), va='center')\n",
419
  "\n",
420
  "plt.tight_layout()\n",
421
- "plt.savefig('../reports/v1/01_distribucion_target.png', dpi=150, bbox_inches='tight')\n",
422
  "plt.show()\n",
423
  "print(\"✅ Guardado en reports/01_distribucion_target.png\")\n"
424
  ]
@@ -517,7 +517,7 @@
517
  },
518
  {
519
  "cell_type": "code",
520
- "execution_count": 9,
521
  "id": "8d6b4108",
522
  "metadata": {},
523
  "outputs": [
@@ -575,7 +575,7 @@
575
  "axes[1,1].set_ylabel('Nº palabras')\n",
576
  "\n",
577
  "plt.tight_layout()\n",
578
- "plt.savefig('../reports/v1/02_longitud_texto.png', dpi=150, bbox_inches='tight')\n",
579
  "plt.show()\n",
580
  "print(\"✅ Guardado en reports/02_longitud_texto.png\")\n"
581
  ]
@@ -659,7 +659,7 @@
659
  },
660
  {
661
  "cell_type": "code",
662
- "execution_count": 12,
663
  "id": "82c3951c",
664
  "metadata": {},
665
  "outputs": [
@@ -719,9 +719,9 @@
719
  "axes[1].axis('off')\n",
720
  "\n",
721
  "plt.tight_layout()\n",
722
- "plt.savefig('../reports/v1/03_wordclouds.png', dpi=150, bbox_inches='tight')\n",
723
  "plt.show()\n",
724
- "print(\"✅ Guardado en reports/v1/03_wordclouds.png\")\n"
725
  ]
726
  },
727
  {
@@ -810,7 +810,7 @@
810
  },
811
  {
812
  "cell_type": "code",
813
- "execution_count": 14,
814
  "id": "f667be4d",
815
  "metadata": {},
816
  "outputs": [
@@ -847,14 +847,14 @@
847
  " annot_kws={'size': 11})\n",
848
  "ax.set_title('Correlación entre labels de toxicidad', fontweight='bold', pad=15)\n",
849
  "plt.tight_layout()\n",
850
- "plt.savefig('../reports/v1/04_correlacion_sublabels.png', dpi=150, bbox_inches='tight')\n",
851
  "plt.show()\n",
852
- "print(\"✅ Guardado en reports/v1/04_correlacion_sublabels.png\")\n"
853
  ]
854
  },
855
  {
856
  "cell_type": "code",
857
- "execution_count": 15,
858
  "id": "be01eb96",
859
  "metadata": {},
860
  "outputs": [
@@ -882,7 +882,7 @@
882
  "for x, y in zip(label_dist.index, label_dist.values):\n",
883
  " ax.text(x, y + 1, str(y), ha='center', fontweight='bold')\n",
884
  "plt.tight_layout()\n",
885
- "plt.savefig('../reports/v1/05_multilabel_overlap.png', dpi=150, bbox_inches='tight')\n",
886
  "plt.show()\n"
887
  ]
888
  },
@@ -981,7 +981,7 @@
981
  },
982
  {
983
  "cell_type": "code",
984
- "execution_count": 18,
985
  "id": "d9d5856f",
986
  "metadata": {},
987
  "outputs": [
@@ -1007,7 +1007,7 @@
1007
  "ax.axhline(50, color='gray', linestyle='--', alpha=0.5, label='50%')\n",
1008
  "ax.legend()\n",
1009
  "plt.tight_layout()\n",
1010
- "plt.savefig('../reports/v1/06_toxicidad_por_video.png', dpi=150, bbox_inches='tight')\n",
1011
  "plt.show()\n"
1012
  ]
1013
  },
@@ -1207,7 +1207,7 @@
1207
  },
1208
  {
1209
  "cell_type": "code",
1210
- "execution_count": 23,
1211
  "id": "2cb72391",
1212
  "metadata": {},
1213
  "outputs": [
@@ -1223,7 +1223,7 @@
1223
  "# Guardar dataset con columnas auxiliares para usar en las siguientes fases\n",
1224
  "df_clean = df.copy()\n",
1225
  "# Guardamos char_length y word_count por si son útiles como features adicionales\n",
1226
- "df_clean.to_csv('../data/processed/v1/comments_with_stats.csv', index=False)\n",
1227
  "print(\"✅ Dataset con estadísticas guardado en data/processed/comments_with_stats.csv\")\n"
1228
  ]
1229
  }
 
372
  },
373
  {
374
  "cell_type": "code",
375
+ "execution_count": null,
376
  "id": "76f135bb",
377
  "metadata": {},
378
  "outputs": [
 
418
  " axes[1].text(v + 2, i, str(v), va='center')\n",
419
  "\n",
420
  "plt.tight_layout()\n",
421
+ "plt.savefig('../reports/v2/01_distribucion_target.png', dpi=150, bbox_inches='tight')\n",
422
  "plt.show()\n",
423
  "print(\"✅ Guardado en reports/01_distribucion_target.png\")\n"
424
  ]
 
517
  },
518
  {
519
  "cell_type": "code",
520
+ "execution_count": null,
521
  "id": "8d6b4108",
522
  "metadata": {},
523
  "outputs": [
 
575
  "axes[1,1].set_ylabel('Nº palabras')\n",
576
  "\n",
577
  "plt.tight_layout()\n",
578
+ "plt.savefig('../reports/v2/02_longitud_texto.png', dpi=150, bbox_inches='tight')\n",
579
  "plt.show()\n",
580
  "print(\"✅ Guardado en reports/02_longitud_texto.png\")\n"
581
  ]
 
659
  },
660
  {
661
  "cell_type": "code",
662
+ "execution_count": null,
663
  "id": "82c3951c",
664
  "metadata": {},
665
  "outputs": [
 
719
  "axes[1].axis('off')\n",
720
  "\n",
721
  "plt.tight_layout()\n",
722
+ "plt.savefig('../reports/v2/03_wordclouds.png', dpi=150, bbox_inches='tight')\n",
723
  "plt.show()\n",
724
+ "print(\"✅ Guardado en reports/v2/03_wordclouds.png\")\n"
725
  ]
726
  },
727
  {
 
810
  },
811
  {
812
  "cell_type": "code",
813
+ "execution_count": null,
814
  "id": "f667be4d",
815
  "metadata": {},
816
  "outputs": [
 
847
  " annot_kws={'size': 11})\n",
848
  "ax.set_title('Correlación entre labels de toxicidad', fontweight='bold', pad=15)\n",
849
  "plt.tight_layout()\n",
850
+ "plt.savefig('../reports/v2/04_correlacion_sublabels.png', dpi=150, bbox_inches='tight')\n",
851
  "plt.show()\n",
852
+ "print(\"✅ Guardado en reports/v2/04_correlacion_sublabels.png\")\n"
853
  ]
854
  },
855
  {
856
  "cell_type": "code",
857
+ "execution_count": null,
858
  "id": "be01eb96",
859
  "metadata": {},
860
  "outputs": [
 
882
  "for x, y in zip(label_dist.index, label_dist.values):\n",
883
  " ax.text(x, y + 1, str(y), ha='center', fontweight='bold')\n",
884
  "plt.tight_layout()\n",
885
+ "plt.savefig('../reports/v2/05_multilabel_overlap.png', dpi=150, bbox_inches='tight')\n",
886
  "plt.show()\n"
887
  ]
888
  },
 
981
  },
982
  {
983
  "cell_type": "code",
984
+ "execution_count": null,
985
  "id": "d9d5856f",
986
  "metadata": {},
987
  "outputs": [
 
1007
  "ax.axhline(50, color='gray', linestyle='--', alpha=0.5, label='50%')\n",
1008
  "ax.legend()\n",
1009
  "plt.tight_layout()\n",
1010
+ "plt.savefig('../reports/v2/06_toxicidad_por_video.png', dpi=150, bbox_inches='tight')\n",
1011
  "plt.show()\n"
1012
  ]
1013
  },
 
1207
  },
1208
  {
1209
  "cell_type": "code",
1210
+ "execution_count": null,
1211
  "id": "2cb72391",
1212
  "metadata": {},
1213
  "outputs": [
 
1223
  "# Guardar dataset con columnas auxiliares para usar en las siguientes fases\n",
1224
  "df_clean = df.copy()\n",
1225
  "# Guardamos char_length y word_count por si son útiles como features adicionales\n",
1226
+ "df_clean.to_csv('../data/processed/v2/comments_with_stats.csv', index=False)\n",
1227
  "print(\"✅ Dataset con estadísticas guardado en data/processed/comments_with_stats.csv\")\n"
1228
  ]
1229
  }
reports/{v1 → v2}/01_distribucion_target.png RENAMED
File without changes
reports/{v1 → v2}/02_longitud_texto.png RENAMED
File without changes
reports/{v1 → v2}/03_wordclouds.png RENAMED
File without changes
reports/{v1 → v2}/04_correlacion_sublabels.png RENAMED
File without changes
reports/{v1 → v2}/05_multilabel_overlap.png RENAMED
File without changes
reports/{v1 → v2}/06_toxicidad_por_video.png RENAMED
File without changes