Spaces:

devrup404
/

SignalMod

Running

JonnyBP commited on 21 days ago

Commit

79ecfdc

1 Parent(s): df4616d

feat: add vectorization notebook. #4

Files changed (4) hide show

notebooks/01_eda_v2.ipynb CHANGED Viewed

@@ -515,6 +515,40 @@
     "print(df['word_count'].describe().round(1))\n"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 32,
@@ -932,7 +966,7 @@
    "id": "b90fab71",
    "metadata": {},
    "source": [
-    "## 7. Análisis por VideoId\n",
     "\n",
     "¿Algunos vídeos tienen más comentarios de odio? ¿El origen del vídeo sesga el dataset?\n"
    ]

     "print(df['word_count'].describe().round(1))\n"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "c239ed0f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "   -- CARACTERES --\n",
+      "La media > mediana = distribución sesgada\n",
+      "Hay comentarios MUY largos (hasta 4421 caracteres)\n",
+      "Pero no representan el comportamiento típico\n",
+      "      \n",
+      "    -- PALABRAS --\n",
+      "Igual patrón: muchos textos cortos, pocos muy largos\n",
+      "815 palabras es un outlier fuerte\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"\"\"   -- CARACTERES --\n",
+    "La media > mediana = distribución sesgada\n",
+    "Hay comentarios MUY largos (hasta 4421 caracteres)\n",
+    "Pero no representan el comportamiento típico\n",
+    "      \n",
+    "    -- PALABRAS --\n",
+    "Igual patrón: muchos textos cortos, pocos muy largos\n",
+    "815 palabras es un outlier fuerte\n",
+    "\"\"\")"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 32,
    "id": "b90fab71",
    "metadata": {},
    "source": [
+    "## 7. Análisis por VideoId (Por ahora no tiene relevancia)\n",
     "\n",
     "¿Algunos vídeos tienen más comentarios de odio? ¿El origen del vídeo sesga el dataset?\n"
    ]

notebooks/02_preprocessing_v2.ipynb CHANGED Viewed

@@ -656,7 +656,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
@@ -680,7 +680,7 @@
    "source": [
     "# ── Configuración MLflow ──\n",
     "MLFLOW_DIR      = PROJECT_ROOT / 'mlruns'\n",
-    "EXPERIMENT_NAME = 'Youtube_project_experiment'\n",
     "\n",
     "mlflow.set_tracking_uri(f\"file:{MLFLOW_DIR}\")\n",
     "mlflow.set_experiment(EXPERIMENT_NAME)\n",
@@ -718,7 +718,7 @@
     "    run_id = mlflow.active_run().info.run_id\n",
     "    print(f'MLflow run registrado')\n",
     "    print(f'  Run ID     : {run_id}')\n",
-    "    print(f'  Experimento: Youtube_project_experiments')\n",
     "    print(f'  Ver UI     : mlflow ui --backend-store-uri file://{MLFLOW_DIR}')"
    ]
   },

   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
    "source": [
     "# ── Configuración MLflow ──\n",
     "MLFLOW_DIR      = PROJECT_ROOT / 'mlruns'\n",
+    "EXPERIMENT_NAME = 'Youtube_project_data'\n",
     "\n",
     "mlflow.set_tracking_uri(f\"file:{MLFLOW_DIR}\")\n",
     "mlflow.set_experiment(EXPERIMENT_NAME)\n",
     "    run_id = mlflow.active_run().info.run_id\n",
     "    print(f'MLflow run registrado')\n",
     "    print(f'  Run ID     : {run_id}')\n",
+    "    print(f'  Experimento: Youtube_project_data')\n",
     "    print(f'  Ver UI     : mlflow ui --backend-store-uri file://{MLFLOW_DIR}')"
    ]
   },

notebooks/03_vectorization_v2.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

reports/v2/08_tfidf_top_features.png ADDED Viewed