The... positive\n",
"2 I thought this was a wonderful way to spend ti... positive\n",
"3 Basically there's a family where a little boy ... negative\n",
"4 Petter Mattei's \"Love in the Time of Money\" is... positive\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"nltk.download(\"punkt_tab\")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "DCj_cP5xQ_lA",
"outputId": "493920ed-26b6-4dec-a3e0-7d8084698a7c"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"[nltk_data] Downloading package punkt_tab to /root/nltk_data...\n",
"[nltk_data] Package punkt_tab is already up-to-date!\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"True"
]
},
"metadata": {},
"execution_count": 5
}
]
},
{
"cell_type": "code",
"source": [
"\n",
"\n",
"stop_words = set(stopwords.words('english'))\n",
"lemmatizer = WordNetLemmatizer()\n",
"\n",
"def preprocess_text(text):\n",
" # 1. Lowercase\n",
" text = text.lower()\n",
"\n",
" # 2. Remove HTML tags\n",
" text = re.sub(r'<.*?>', '', text)\n",
"\n",
" # 3. Remove punctuation & numbers\n",
" text = re.sub(r'[^a-z\\s]', '', text)\n",
"\n",
" # 4. Tokenize\n",
" tokens = nltk.word_tokenize(text)\n",
"\n",
" # 5. Remove stopwords\n",
" tokens = [word for word in tokens if word not in stop_words]\n",
"\n",
" # 6. Lemmatization\n",
" tokens = [lemmatizer.lemmatize(word) for word in tokens]\n",
"\n",
" return \" \".join(tokens)\n",
"\n"
],
"metadata": {
"id": "zID5UONlP1ok"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"df[\"cleaned_review\"] = df[\"review\"].apply(preprocess_text)\n",
"\n",
"\n",
"print(df[[\"review\", \"cleaned_review\", \"sentiment\"]].tail())"
],
"metadata": {
"id": "HDsRgcxuP1rS",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "a30f30f1-6640-451b-8a46-247d488c9a68"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" review \\\n",
"49995 I thought this movie did a down right good job... \n",
"49996 Bad plot, bad dialogue, bad acting, idiotic di... \n",
"49997 I am a Catholic taught in parochial elementary... \n",
"49998 I'm going to have to disagree with the previou... \n",
"49999 No one expects the Star Trek movies to be high... \n",
"\n",
" cleaned_review sentiment \n",
"49995 thought movie right good job wasnt creative or... positive \n",
"49996 bad plot bad dialogue bad acting idiotic direc... negative \n",
"49997 catholic taught parochial elementary school nu... negative \n",
"49998 im going disagree previous comment side maltin... negative \n",
"49999 one expects star trek movie high art fan expec... negative \n"
]
}
]
},
{
"cell_type": "code",
"source": [
"!pip install transformers torch sentence-transformers\n"
],
"metadata": {
"id": "DUf6-RvRP1uL",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "c4b4362f-8aad-43d8-ac0e-56f7d20c4140"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Requirement already satisfied: transformers in /usr/local/lib/python3.12/dist-packages (4.56.2)\n",
"Requirement already satisfied: torch in /usr/local/lib/python3.12/dist-packages (2.8.0+cu126)\n",
"Requirement already satisfied: sentence-transformers in /usr/local/lib/python3.12/dist-packages (5.1.1)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from transformers) (3.19.1)\n",
"Requirement already satisfied: huggingface-hub<1.0,>=0.34.0 in /usr/local/lib/python3.12/dist-packages (from transformers) (0.35.0)\n",
"Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.12/dist-packages (from transformers) (1.26.0)\n",
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.12/dist-packages (from transformers) (25.0)\n",
"Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.12/dist-packages (from transformers) (6.0.2)\n",
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.12/dist-packages (from transformers) (2024.11.6)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.12/dist-packages (from transformers) (2.32.4)\n",
"Requirement already satisfied: tokenizers<=0.23.0,>=0.22.0 in /usr/local/lib/python3.12/dist-packages (from transformers) (0.22.0)\n",
"Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.12/dist-packages (from transformers) (0.6.2)\n",
"Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.12/dist-packages (from transformers) (4.67.1)\n",
"Requirement already satisfied: typing-extensions>=4.10.0 in /usr/local/lib/python3.12/dist-packages (from torch) (4.15.0)\n",
"Requirement already satisfied: setuptools in /usr/local/lib/python3.12/dist-packages (from torch) (75.2.0)\n",
"Requirement already satisfied: sympy>=1.13.3 in /usr/local/lib/python3.12/dist-packages (from torch) (1.13.3)\n",
"Requirement already satisfied: networkx in /usr/local/lib/python3.12/dist-packages (from torch) (3.5)\n",
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.12/dist-packages (from torch) (3.1.6)\n",
"Requirement already satisfied: fsspec in /usr/local/lib/python3.12/dist-packages (from torch) (2025.3.0)\n",
"Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.6.77 in /usr/local/lib/python3.12/dist-packages (from torch) (12.6.77)\n",
"Requirement already satisfied: nvidia-cuda-runtime-cu12==12.6.77 in /usr/local/lib/python3.12/dist-packages (from torch) (12.6.77)\n",
"Requirement already satisfied: nvidia-cuda-cupti-cu12==12.6.80 in /usr/local/lib/python3.12/dist-packages (from torch) (12.6.80)\n",
"Requirement already satisfied: nvidia-cudnn-cu12==9.10.2.21 in /usr/local/lib/python3.12/dist-packages (from torch) (9.10.2.21)\n",
"Requirement already satisfied: nvidia-cublas-cu12==12.6.4.1 in /usr/local/lib/python3.12/dist-packages (from torch) (12.6.4.1)\n",
"Requirement already satisfied: nvidia-cufft-cu12==11.3.0.4 in /usr/local/lib/python3.12/dist-packages (from torch) (11.3.0.4)\n",
"Requirement already satisfied: nvidia-curand-cu12==10.3.7.77 in /usr/local/lib/python3.12/dist-packages (from torch) (10.3.7.77)\n",
"Requirement already satisfied: nvidia-cusolver-cu12==11.7.1.2 in /usr/local/lib/python3.12/dist-packages (from torch) (11.7.1.2)\n",
"Requirement already satisfied: nvidia-cusparse-cu12==12.5.4.2 in /usr/local/lib/python3.12/dist-packages (from torch) (12.5.4.2)\n",
"Requirement already satisfied: nvidia-cusparselt-cu12==0.7.1 in /usr/local/lib/python3.12/dist-packages (from torch) (0.7.1)\n",
"Requirement already satisfied: nvidia-nccl-cu12==2.27.3 in /usr/local/lib/python3.12/dist-packages (from torch) (2.27.3)\n",
"Requirement already satisfied: nvidia-nvtx-cu12==12.6.77 in /usr/local/lib/python3.12/dist-packages (from torch) (12.6.77)\n",
"Requirement already satisfied: nvidia-nvjitlink-cu12==12.6.85 in /usr/local/lib/python3.12/dist-packages (from torch) (12.6.85)\n",
"Requirement already satisfied: nvidia-cufile-cu12==1.11.1.6 in /usr/local/lib/python3.12/dist-packages (from torch) (1.11.1.6)\n",
"Requirement already satisfied: triton==3.4.0 in /usr/local/lib/python3.12/dist-packages (from torch) (3.4.0)\n",
"Requirement already satisfied: scikit-learn in /usr/local/lib/python3.12/dist-packages (from sentence-transformers) (1.6.1)\n",
"Requirement already satisfied: scipy in /usr/local/lib/python3.12/dist-packages (from sentence-transformers) (1.16.2)\n",
"Requirement already satisfied: Pillow in /usr/local/lib/python3.12/dist-packages (from sentence-transformers) (11.3.0)\n",
"Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<1.0,>=0.34.0->transformers) (1.1.10)\n",
"Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.12/dist-packages (from sympy>=1.13.3->torch) (1.3.0)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.12/dist-packages (from jinja2->torch) (3.0.2)\n",
"Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (3.4.3)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (3.10)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (2.5.0)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (2025.8.3)\n",
"Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.12/dist-packages (from scikit-learn->sentence-transformers) (1.5.2)\n",
"Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.12/dist-packages (from scikit-learn->sentence-transformers) (3.6.0)\n"
]
}
]
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "so6p265dWAfb"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "j7qu7HlyWAkb"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"!pip install -U torch torchvision torchaudio\n",
"!pip install -U transformers sentence-transformers\n"
],
"metadata": {
"id": "kQkAlTaKP1w0",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "77568f04-e205-4524-c838-c46042a317a2"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Requirement already satisfied: torch in /usr/local/lib/python3.12/dist-packages (2.8.0+cu126)\n",
"Requirement already satisfied: torchvision in /usr/local/lib/python3.12/dist-packages (0.23.0+cu126)\n",
"Requirement already satisfied: torchaudio in /usr/local/lib/python3.12/dist-packages (2.8.0+cu126)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from torch) (3.19.1)\n",
"Requirement already satisfied: typing-extensions>=4.10.0 in /usr/local/lib/python3.12/dist-packages (from torch) (4.15.0)\n",
"Requirement already satisfied: setuptools in /usr/local/lib/python3.12/dist-packages (from torch) (75.2.0)\n",
"Requirement already satisfied: sympy>=1.13.3 in /usr/local/lib/python3.12/dist-packages (from torch) (1.13.3)\n",
"Requirement already satisfied: networkx in /usr/local/lib/python3.12/dist-packages (from torch) (3.5)\n",
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.12/dist-packages (from torch) (3.1.6)\n",
"Requirement already satisfied: fsspec in /usr/local/lib/python3.12/dist-packages (from torch) (2025.3.0)\n",
"Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.6.77 in /usr/local/lib/python3.12/dist-packages (from torch) (12.6.77)\n",
"Requirement already satisfied: nvidia-cuda-runtime-cu12==12.6.77 in /usr/local/lib/python3.12/dist-packages (from torch) (12.6.77)\n",
"Requirement already satisfied: nvidia-cuda-cupti-cu12==12.6.80 in /usr/local/lib/python3.12/dist-packages (from torch) (12.6.80)\n",
"Requirement already satisfied: nvidia-cudnn-cu12==9.10.2.21 in /usr/local/lib/python3.12/dist-packages (from torch) (9.10.2.21)\n",
"Requirement already satisfied: nvidia-cublas-cu12==12.6.4.1 in /usr/local/lib/python3.12/dist-packages (from torch) (12.6.4.1)\n",
"Requirement already satisfied: nvidia-cufft-cu12==11.3.0.4 in /usr/local/lib/python3.12/dist-packages (from torch) (11.3.0.4)\n",
"Requirement already satisfied: nvidia-curand-cu12==10.3.7.77 in /usr/local/lib/python3.12/dist-packages (from torch) (10.3.7.77)\n",
"Requirement already satisfied: nvidia-cusolver-cu12==11.7.1.2 in /usr/local/lib/python3.12/dist-packages (from torch) (11.7.1.2)\n",
"Requirement already satisfied: nvidia-cusparse-cu12==12.5.4.2 in /usr/local/lib/python3.12/dist-packages (from torch) (12.5.4.2)\n",
"Requirement already satisfied: nvidia-cusparselt-cu12==0.7.1 in /usr/local/lib/python3.12/dist-packages (from torch) (0.7.1)\n",
"Requirement already satisfied: nvidia-nccl-cu12==2.27.3 in /usr/local/lib/python3.12/dist-packages (from torch) (2.27.3)\n",
"Requirement already satisfied: nvidia-nvtx-cu12==12.6.77 in /usr/local/lib/python3.12/dist-packages (from torch) (12.6.77)\n",
"Requirement already satisfied: nvidia-nvjitlink-cu12==12.6.85 in /usr/local/lib/python3.12/dist-packages (from torch) (12.6.85)\n",
"Requirement already satisfied: nvidia-cufile-cu12==1.11.1.6 in /usr/local/lib/python3.12/dist-packages (from torch) (1.11.1.6)\n",
"Requirement already satisfied: triton==3.4.0 in /usr/local/lib/python3.12/dist-packages (from torch) (3.4.0)\n",
"Requirement already satisfied: numpy in /usr/local/lib/python3.12/dist-packages (from torchvision) (1.26.0)\n",
"Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.12/dist-packages (from torchvision) (11.3.0)\n",
"Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.12/dist-packages (from sympy>=1.13.3->torch) (1.3.0)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.12/dist-packages (from jinja2->torch) (3.0.2)\n",
"Requirement already satisfied: transformers in /usr/local/lib/python3.12/dist-packages (4.56.2)\n",
"Requirement already satisfied: sentence-transformers in /usr/local/lib/python3.12/dist-packages (5.1.1)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from transformers) (3.19.1)\n",
"Requirement already satisfied: huggingface-hub<1.0,>=0.34.0 in /usr/local/lib/python3.12/dist-packages (from transformers) (0.35.0)\n",
"Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.12/dist-packages (from transformers) (1.26.0)\n",
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.12/dist-packages (from transformers) (25.0)\n",
"Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.12/dist-packages (from transformers) (6.0.2)\n",
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.12/dist-packages (from transformers) (2024.11.6)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.12/dist-packages (from transformers) (2.32.4)\n",
"Requirement already satisfied: tokenizers<=0.23.0,>=0.22.0 in /usr/local/lib/python3.12/dist-packages (from transformers) (0.22.0)\n",
"Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.12/dist-packages (from transformers) (0.6.2)\n",
"Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.12/dist-packages (from transformers) (4.67.1)\n",
"Requirement already satisfied: torch>=1.11.0 in /usr/local/lib/python3.12/dist-packages (from sentence-transformers) (2.8.0+cu126)\n",
"Requirement already satisfied: scikit-learn in /usr/local/lib/python3.12/dist-packages (from sentence-transformers) (1.6.1)\n",
"Requirement already satisfied: scipy in /usr/local/lib/python3.12/dist-packages (from sentence-transformers) (1.16.2)\n",
"Requirement already satisfied: Pillow in /usr/local/lib/python3.12/dist-packages (from sentence-transformers) (11.3.0)\n",
"Requirement already satisfied: typing_extensions>=4.5.0 in /usr/local/lib/python3.12/dist-packages (from sentence-transformers) (4.15.0)\n",
"Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<1.0,>=0.34.0->transformers) (2025.3.0)\n",
"Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<1.0,>=0.34.0->transformers) (1.1.10)\n",
"Requirement already satisfied: setuptools in /usr/local/lib/python3.12/dist-packages (from torch>=1.11.0->sentence-transformers) (75.2.0)\n",
"Requirement already satisfied: sympy>=1.13.3 in /usr/local/lib/python3.12/dist-packages (from torch>=1.11.0->sentence-transformers) (1.13.3)\n",
"Requirement already satisfied: networkx in /usr/local/lib/python3.12/dist-packages (from torch>=1.11.0->sentence-transformers) (3.5)\n",
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.12/dist-packages (from torch>=1.11.0->sentence-transformers) (3.1.6)\n",
"Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.6.77 in /usr/local/lib/python3.12/dist-packages (from torch>=1.11.0->sentence-transformers) (12.6.77)\n",
"Requirement already satisfied: nvidia-cuda-runtime-cu12==12.6.77 in /usr/local/lib/python3.12/dist-packages (from torch>=1.11.0->sentence-transformers) (12.6.77)\n",
"Requirement already satisfied: nvidia-cuda-cupti-cu12==12.6.80 in /usr/local/lib/python3.12/dist-packages (from torch>=1.11.0->sentence-transformers) (12.6.80)\n",
"Requirement already satisfied: nvidia-cudnn-cu12==9.10.2.21 in /usr/local/lib/python3.12/dist-packages (from torch>=1.11.0->sentence-transformers) (9.10.2.21)\n",
"Requirement already satisfied: nvidia-cublas-cu12==12.6.4.1 in /usr/local/lib/python3.12/dist-packages (from torch>=1.11.0->sentence-transformers) (12.6.4.1)\n",
"Requirement already satisfied: nvidia-cufft-cu12==11.3.0.4 in /usr/local/lib/python3.12/dist-packages (from torch>=1.11.0->sentence-transformers) (11.3.0.4)\n",
"Requirement already satisfied: nvidia-curand-cu12==10.3.7.77 in /usr/local/lib/python3.12/dist-packages (from torch>=1.11.0->sentence-transformers) (10.3.7.77)\n",
"Requirement already satisfied: nvidia-cusolver-cu12==11.7.1.2 in /usr/local/lib/python3.12/dist-packages (from torch>=1.11.0->sentence-transformers) (11.7.1.2)\n",
"Requirement already satisfied: nvidia-cusparse-cu12==12.5.4.2 in /usr/local/lib/python3.12/dist-packages (from torch>=1.11.0->sentence-transformers) (12.5.4.2)\n",
"Requirement already satisfied: nvidia-cusparselt-cu12==0.7.1 in /usr/local/lib/python3.12/dist-packages (from torch>=1.11.0->sentence-transformers) (0.7.1)\n",
"Requirement already satisfied: nvidia-nccl-cu12==2.27.3 in /usr/local/lib/python3.12/dist-packages (from torch>=1.11.0->sentence-transformers) (2.27.3)\n",
"Requirement already satisfied: nvidia-nvtx-cu12==12.6.77 in /usr/local/lib/python3.12/dist-packages (from torch>=1.11.0->sentence-transformers) (12.6.77)\n",
"Requirement already satisfied: nvidia-nvjitlink-cu12==12.6.85 in /usr/local/lib/python3.12/dist-packages (from torch>=1.11.0->sentence-transformers) (12.6.85)\n",
"Requirement already satisfied: nvidia-cufile-cu12==1.11.1.6 in /usr/local/lib/python3.12/dist-packages (from torch>=1.11.0->sentence-transformers) (1.11.1.6)\n",
"Requirement already satisfied: triton==3.4.0 in /usr/local/lib/python3.12/dist-packages (from torch>=1.11.0->sentence-transformers) (3.4.0)\n",
"Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (3.4.3)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (3.10)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (2.5.0)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (2025.8.3)\n",
"Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.12/dist-packages (from scikit-learn->sentence-transformers) (1.5.2)\n",
"Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.12/dist-packages (from scikit-learn->sentence-transformers) (3.6.0)\n",
"Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.12/dist-packages (from sympy>=1.13.3->torch>=1.11.0->sentence-transformers) (1.3.0)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.12/dist-packages (from jinja2->torch>=1.11.0->sentence-transformers) (3.0.2)\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"\n",
"from sklearn.feature_extraction.text import TfidfVectorizer"
],
"metadata": {
"id": "Ai71-ZR9jXUy"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"\n",
"tfidf = TfidfVectorizer(max_features=5000)\n",
"X = tfidf.fit_transform(df[\"cleaned_review\"])\n",
"\n",
"y = df[\"sentiment\"].map({\"positive\": 1, \"negative\": 0}).values\n",
"print(X)"
],
"metadata": {
"id": "qrre8w1hP1zn",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "7e36be98-44f0-4f69-efcb-9d38c6d7a5a8"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\n",
" Coords\tValues\n",
" (0, 3086)\t0.022306340769509683\n",
" (0, 3687)\t0.0716098594890523\n",
" (0, 2778)\t0.06997013472849029\n",
" (0, 4832)\t0.08220463762202762\n",
" (0, 3137)\t0.47260726598173247\n",
" (0, 1467)\t0.11279543497089747\n",
" (0, 4990)\t0.05792759900745007\n",
" (0, 2119)\t0.0884283468115394\n",
" (0, 3707)\t0.09072957775856476\n",
" (0, 1513)\t0.06118924586115793\n",
" (0, 1993)\t0.06126011378849219\n",
" (0, 1697)\t0.06827831217826244\n",
" (0, 4468)\t0.034484905907198556\n",
" (0, 4258)\t0.1771814513806625\n",
" (0, 543)\t0.09851073786514736\n",
" (0, 3831)\t0.03333634078441469\n",
" (0, 4763)\t0.25095990028835147\n",
" (0, 3914)\t0.04650048507467132\n",
" (0, 4938)\t0.05348789066286241\n",
" (0, 1883)\t0.07135314796729297\n",
" (0, 4610)\t0.07763035600809258\n",
" (0, 3975)\t0.15304406167950377\n",
" (0, 3451)\t0.06917492387135953\n",
" (0, 3456)\t0.08224792976382196\n",
" (0, 3583)\t0.08260003424873928\n",
" :\t:\n",
" (49999, 4949)\t0.09967306284736142\n",
" (49999, 1608)\t0.18895470226431363\n",
" (49999, 2146)\t0.0938134095848708\n",
" (49999, 4608)\t0.10421390262146894\n",
" (49999, 680)\t0.1210849951944911\n",
" (49999, 3818)\t0.1207533622254777\n",
" (49999, 2470)\t0.12550521294127479\n",
" (49999, 4670)\t0.11972754064045821\n",
" (49999, 2540)\t0.10566290485449034\n",
" (49999, 1538)\t0.12096018782971787\n",
" (49999, 303)\t0.13528867065174088\n",
" (49999, 2002)\t0.14117928521526385\n",
" (49999, 2225)\t0.12437816971606588\n",
" (49999, 2205)\t0.1883414548411176\n",
" (49999, 241)\t0.12037119185480823\n",
" (49999, 4588)\t0.1774967991895605\n",
" (49999, 2457)\t0.2041086873930473\n",
" (49999, 4154)\t0.2230098076678039\n",
" (49999, 3632)\t0.17275135778399867\n",
" (49999, 581)\t0.16361717948998034\n",
" (49999, 2995)\t0.1828212276262584\n",
" (49999, 684)\t0.14413194406724505\n",
" (49999, 1897)\t0.17552265388590801\n",
" (49999, 3630)\t0.16768695545427548\n",
" (49999, 2903)\t0.19720455104959872\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"\n",
"from sklearn.feature_extraction.text import TfidfVectorizer\n"
],
"metadata": {
"id": "y0voh7wgYX5z"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"**MODEL**"
],
"metadata": {
"id": "9ahaUAiOjaih"
}
},
{
"cell_type": "code",
"source": [
"from sklearn.model_selection import train_test_split\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.metrics import accuracy_score, classification_report, confusion_matrix\n",
"\n",
"\n",
"\n"
],
"metadata": {
"id": "fDNAJ33YP12O"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"\n",
"X_train, X_test, y_train, y_test = train_test_split(\n",
" X, y, test_size=0.2, random_state=42\n",
")\n"
],
"metadata": {
"id": "a_u6DHxwP15A"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"\n",
"clf = LogisticRegression(max_iter=200)\n",
"\n",
"clf.fit(X_train, y_train)\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 80
},
"id": "rebafV7PZQa8",
"outputId": "1237907c-d30f-4e97-914f-b16a544a98bf"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"LogisticRegression(max_iter=200)"
],
"text/html": [
"
LogisticRegression(max_iter=200)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.