{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "N3shQZoZPScM", "outputId": "63642e05-bd32-4fd9-f029-8f50148a1e8a" }, "outputs": [], "source": [ "!pip install -U sentence_transformers --q" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "rcBH0FzwVOk6", "outputId": "f5b4b762-9b30-4474-d1d0-7ba3ab68a2ef" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.0.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip3 install --upgrade pip\u001b[0m\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "pip install datasets --q" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "y-pDMu97XyVd", "outputId": "737160a3-2c34-4293-a129-bb053cd91117" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting sentence-transformers\n", " Using cached sentence_transformers-3.4.1-py3-none-any.whl.metadata (10 kB)\n", "Requirement already satisfied: scikit-learn in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (1.4.1.post1)\n", "Requirement already satisfied: pandas in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (2.2.1)\n", "Collecting torch\n", " Downloading torch-2.6.0-cp312-none-macosx_11_0_arm64.whl.metadata (28 kB)\n", "Collecting transformers<5.0.0,>=4.41.0 (from sentence-transformers)\n", " Downloading transformers-4.48.3-py3-none-any.whl.metadata (44 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.4/44.4 kB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: tqdm in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from sentence-transformers) (4.67.1)\n", "Requirement already satisfied: scipy in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from sentence-transformers) (1.12.0)\n", "Requirement already satisfied: huggingface-hub>=0.20.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from sentence-transformers) (0.28.1)\n", "Requirement already satisfied: Pillow in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from sentence-transformers) (10.2.0)\n", "Requirement already satisfied: numpy<2.0,>=1.19.5 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from scikit-learn) (1.26.4)\n", "Requirement already satisfied: joblib>=1.2.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from scikit-learn) (1.3.2)\n", "Requirement already satisfied: threadpoolctl>=2.0.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from scikit-learn) (3.3.0)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/markushenriksson/Library/Python/3.12/lib/python/site-packages (from pandas) (2.9.0.post0)\n", "Requirement already satisfied: pytz>=2020.1 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from pandas) (2024.1)\n", "Requirement already satisfied: tzdata>=2022.7 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from pandas) (2024.1)\n", "Requirement already satisfied: filelock in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from torch) (3.16.0)\n", "Requirement already satisfied: typing-extensions>=4.10.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from torch) (4.10.0)\n", "Collecting networkx (from torch)\n", " Using cached networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)\n", "Requirement already satisfied: jinja2 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from torch) (3.1.3)\n", "Requirement already satisfied: fsspec in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from torch) (2024.2.0)\n", "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from torch) (69.1.1)\n", "Collecting sympy==1.13.1 (from torch)\n", " Using cached sympy-1.13.1-py3-none-any.whl.metadata (12 kB)\n", "Collecting mpmath<1.4,>=1.1.0 (from sympy==1.13.1->torch)\n", " Using cached mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)\n", "Requirement already satisfied: packaging>=20.9 in /Users/markushenriksson/Library/Python/3.12/lib/python/site-packages (from huggingface-hub>=0.20.0->sentence-transformers) (24.0)\n", "Requirement already satisfied: pyyaml>=5.1 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from huggingface-hub>=0.20.0->sentence-transformers) (6.0.1)\n", "Requirement already satisfied: requests in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from huggingface-hub>=0.20.0->sentence-transformers) (2.32.3)\n", "Requirement already satisfied: six>=1.5 in /Users/markushenriksson/Library/Python/3.12/lib/python/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n", "Collecting regex!=2019.12.17 (from transformers<5.0.0,>=4.41.0->sentence-transformers)\n", " Using cached regex-2024.11.6-cp312-cp312-macosx_11_0_arm64.whl.metadata (40 kB)\n", "Collecting tokenizers<0.22,>=0.21 (from transformers<5.0.0,>=4.41.0->sentence-transformers)\n", " Downloading tokenizers-0.21.0-cp39-abi3-macosx_11_0_arm64.whl.metadata (6.7 kB)\n", "Collecting safetensors>=0.4.1 (from transformers<5.0.0,>=4.41.0->sentence-transformers)\n", " Downloading safetensors-0.5.2-cp38-abi3-macosx_11_0_arm64.whl.metadata (3.8 kB)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from jinja2->torch) (2.1.5)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from requests->huggingface-hub>=0.20.0->sentence-transformers) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from requests->huggingface-hub>=0.20.0->sentence-transformers) (3.6)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from requests->huggingface-hub>=0.20.0->sentence-transformers) (2.0.7)\n", "Requirement already satisfied: certifi>=2017.4.17 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from requests->huggingface-hub>=0.20.0->sentence-transformers) (2024.2.2)\n", "Using cached sentence_transformers-3.4.1-py3-none-any.whl (275 kB)\n", "Downloading torch-2.6.0-cp312-none-macosx_11_0_arm64.whl (66.5 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.5/66.5 MB\u001b[0m \u001b[31m24.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hUsing cached sympy-1.13.1-py3-none-any.whl (6.2 MB)\n", "Downloading transformers-4.48.3-py3-none-any.whl (9.7 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.7/9.7 MB\u001b[0m \u001b[31m35.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hUsing cached networkx-3.4.2-py3-none-any.whl (1.7 MB)\n", "Using cached mpmath-1.3.0-py3-none-any.whl (536 kB)\n", "Using cached regex-2024.11.6-cp312-cp312-macosx_11_0_arm64.whl (284 kB)\n", "Downloading safetensors-0.5.2-cp38-abi3-macosx_11_0_arm64.whl (408 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m408.9/408.9 kB\u001b[0m \u001b[31m27.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading tokenizers-0.21.0-cp39-abi3-macosx_11_0_arm64.whl (2.6 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.6/2.6 MB\u001b[0m \u001b[31m36.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hInstalling collected packages: mpmath, sympy, safetensors, regex, networkx, torch, tokenizers, transformers, sentence-transformers\n", "Successfully installed mpmath-1.3.0 networkx-3.4.2 regex-2024.11.6 safetensors-0.5.2 sentence-transformers-3.4.1 sympy-1.13.1 tokenizers-0.21.0 torch-2.6.0 transformers-4.48.3\n", "\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.0.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip3 install --upgrade pip\u001b[0m\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "pip install sentence-transformers scikit-learn pandas torch\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "id": "m-tmgXuldd3C" }, "outputs": [], "source": [ "import seaborn as sns" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "id": "1Z0mgYZEgjC4" }, "outputs": [], "source": [ "from sklearn.ensemble import RandomForestClassifier" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "id": "aBmXLbZ4cc1U" }, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "id": "LXkXdIWgUcWI" }, "outputs": [], "source": [ "from datasets import load_dataset, Dataset\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "id": "AFkI23ySgtkV" }, "outputs": [], "source": [ "from sklearn.metrics import accuracy_score" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ehvh1BJZWa_1", "outputId": "212a5f82-885d-4e61-a73f-94dcf12a3a39" }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ab0344420d964f64a16c911f17aae057", "version_major": 2, "version_minor": 0 }, "text/plain": [ "README.md: 0%| | 0.00/515 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
answersystem_promptuser_prompttask_type
0neutralYou are a financial sentiment analysis expert....According to Gran , the company has no plans t...sentiment_analysis
1neutralYou are a financial sentiment analysis expert....Technopolis plans to develop in stages an area...sentiment_analysis
2negativeYou are a financial sentiment analysis expert....The international electronic industry company ...sentiment_analysis
3positiveYou are a financial sentiment analysis expert....With the new production plant the company woul...sentiment_analysis
4positiveYou are a financial sentiment analysis expert....According to the company 's updated strategy f...sentiment_analysis
\n", "" ], "text/plain": [ " answer system_prompt \\\n", "0 neutral You are a financial sentiment analysis expert.... \n", "1 neutral You are a financial sentiment analysis expert.... \n", "2 negative You are a financial sentiment analysis expert.... \n", "3 positive You are a financial sentiment analysis expert.... \n", "4 positive You are a financial sentiment analysis expert.... \n", "\n", " user_prompt task_type \n", "0 According to Gran , the company has no plans t... sentiment_analysis \n", "1 Technopolis plans to develop in stages an area... sentiment_analysis \n", "2 The international electronic industry company ... sentiment_analysis \n", "3 With the new production plant the company woul... sentiment_analysis \n", "4 According to the company 's updated strategy f... sentiment_analysis " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "id": "cQ3tjGFTW5kE" }, "outputs": [], "source": [ "df.drop(['system_prompt', 'task_type'], axis=1, inplace=True)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 423 }, "id": "Va5867ATXAxD", "outputId": "c258f546-d8af-4ba5-8228-3a07f2283baf" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
answeruser_prompt
0neutralAccording to Gran , the company has no plans t...
1neutralTechnopolis plans to develop in stages an area...
2negativeThe international electronic industry company ...
3positiveWith the new production plant the company woul...
4positiveAccording to the company 's updated strategy f...
.........
61194Treasuries | Corporate DebtKfW credit line for Uniper could be raised to ...
61195Treasuries | Corporate DebtKfW credit line for Uniper could be raised to ...
61196Treasuries | Corporate DebtRussian https://t.co/R0iPhyo5p7 sells 1 bln r...
61197Treasuries | Corporate DebtGlobal ESG bond issuance posts H1 dip as supra...
61198Treasuries | Corporate DebtBrazil's Petrobras says it signed a $1.25 bill...
\n", "

61199 rows × 2 columns

\n", "
" ], "text/plain": [ " answer \\\n", "0 neutral \n", "1 neutral \n", "2 negative \n", "3 positive \n", "4 positive \n", "... ... \n", "61194 Treasuries | Corporate Debt \n", "61195 Treasuries | Corporate Debt \n", "61196 Treasuries | Corporate Debt \n", "61197 Treasuries | Corporate Debt \n", "61198 Treasuries | Corporate Debt \n", "\n", " user_prompt \n", "0 According to Gran , the company has no plans t... \n", "1 Technopolis plans to develop in stages an area... \n", "2 The international electronic industry company ... \n", "3 With the new production plant the company woul... \n", "4 According to the company 's updated strategy f... \n", "... ... \n", "61194 KfW credit line for Uniper could be raised to ... \n", "61195 KfW credit line for Uniper could be raised to ... \n", "61196 Russian https://t.co/R0iPhyo5p7 sells 1 bln r... \n", "61197 Global ESG bond issuance posts H1 dip as supra... \n", "61198 Brazil's Petrobras says it signed a $1.25 bill... \n", "\n", "[61199 rows x 2 columns]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "a2PtcHIfeM5t", "outputId": "2214b201-c68d-4112-d224-855bd7103213" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(39641, 2)\n" ] } ], "source": [ "# only want to keep rows where 'answer' is 'neutral', 'positive', or 'negative'\n", "df_filtered = df[df[\"answer\"].isin([\"neutral\", \"positive\", \"negative\"])]\n", "\n", "# Showing the shape of the new DataFrame\n", "print(df_filtered.shape)\n" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "OzmsDZ-tZuGA", "outputId": "cb743649-521b-45da-8c9a-182fff7584bd" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(5946, 2)\n" ] } ], "source": [ "df_sampled = df_filtered.sample(frac=0.15, random_state=42) # 15% sample\n", "print(df_sampled.shape) # Checking new size\n" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 466 }, "id": "kkaxAfyQdcgZ", "outputId": "1fa73eb8-c4ea-4b0b-8288-7dcb3c517798" }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAGwCAYAAABIC3rIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/H5lhTAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAyrUlEQVR4nO3de1hVdd7//9cGBVHc4IGDJJ4yFUwtzXRXKqkJSt46Y40ahRXabTdqSh6Ga0odq2GyrByncqopdG6dbGY6KaUSCp7wRIPnGPPGwbkUMBUQD6iwf3/0Y33baaaI7K2f5+O61nWx1ue913p/uJbwcq21Nzan0+kUAACAwbzc3QAAAIC7EYgAAIDxCEQAAMB4BCIAAGA8AhEAADAegQgAABiPQAQAAIxXz90N3Aiqqqp0+PBhNW7cWDabzd3tAACAK+B0OnXy5EmFhYXJy+vy14AIRFfg8OHDCg8Pd3cbAACgBg4dOqSWLVtetoZAdAUaN24s6ftvqN1ud3M3AADgSpSVlSk8PNz6PX45BKIrUH2bzG63E4gAALjBXMnjLjxUDQAAjEcgAgAAxiMQAQAA4xGIAACA8QhEAADAeAQiAABgPAIRAAAwHoEIAAAYj0AEAACMRyACAADGIxABAADjEYgAAIDxCEQAAMB4BCIAAGA8AhEAADBePXc3YJIe0xa7uwV4kJxX4t3dAgDg/8cVIgAAYDwCEQAAMB6BCAAAGI9ABAAAjEcgAgAAxiMQAQAA4xGIAACA8QhEAADAeAQiAABgPAIRAAAwHoEIAAAYj0AEAACMRyACAADGIxABAADjEYgAAIDxCEQAAMB4BCIAAGA8twait99+W127dpXdbpfdbpfD4dCXX35pjZ89e1aJiYlq1qyZ/P39NWLECBUVFbnso6CgQLGxsWrYsKGCg4M1bdo0XbhwwaUmMzNT3bt3l6+vr9q3b6/U1NS6mB4AALhBuDUQtWzZUr///e+Vk5Oj7du3q3///ho2bJj27NkjSZoyZYqWL1+uv/3tb8rKytLhw4f1y1/+0np9ZWWlYmNjde7cOW3atEmLFi1SamqqZs6cadXk5+crNjZW999/v3JzczV58mSNHTtWq1atqvP5AgAAz2RzOp1OdzfxQ02bNtUrr7yihx56SEFBQVq6dKkeeughSdI333yjiIgIZWdnq3fv3vryyy/14IMP6vDhwwoJCZEkLVy4UDNmzNDRo0fl4+OjGTNmKC0tTbt377aOMWrUKJWUlGjlypWX7KGiokIVFRXWellZmcLDw1VaWiq73V7jufWYtrjGr8XNJ+eVeHe3AAA3tbKyMgUEBFzR72+PeYaosrJSH374oU6dOiWHw6GcnBydP39eAwcOtGo6deqkVq1aKTs7W5KUnZ2tLl26WGFIkqKjo1VWVmZdZcrOznbZR3VN9T4uJSUlRQEBAdYSHh5em1MFAAAexu2BaNeuXfL395evr6/Gjx+vTz75RJGRkSosLJSPj48CAwNd6kNCQlRYWChJKiwsdAlD1ePVY5erKSsr05kzZy7ZU3JyskpLS63l0KFDtTFVAADgoeq5u4GOHTsqNzdXpaWl+vvf/64xY8YoKyvLrT35+vrK19fXrT0AAIC64/ZA5OPjo/bt20uSevTooW3btmn+/PkaOXKkzp07p5KSEperREVFRQoNDZUkhYaGauvWrS77q34X2g9rfvzOtKKiItntdvn5+V2vaQEAgBuI22+Z/VhVVZUqKirUo0cP1a9fXxkZGdZYXl6eCgoK5HA4JEkOh0O7du1ScXGxVZOeni673a7IyEir5of7qK6p3gcAAIBbrxAlJydr8ODBatWqlU6ePKmlS5cqMzNTq1atUkBAgBISEpSUlKSmTZvKbrdr4sSJcjgc6t27tyRp0KBBioyM1GOPPaa5c+eqsLBQzz33nBITE61bXuPHj9cf//hHTZ8+XU8++aTWrFmjjz76SGlpae6cOgAA8CBuDUTFxcWKj4/XkSNHFBAQoK5du2rVqlV64IEHJEmvv/66vLy8NGLECFVUVCg6OlpvvfWW9Xpvb2+tWLFCTz/9tBwOhxo1aqQxY8Zozpw5Vk3btm2VlpamKVOmaP78+WrZsqXee+89RUdH1/l8AQCAZ/K4zyHyRFfzOQaXw+cQ4Yf4HCIAuL5uyM8hAgAAcBcCEQAAMB6BCAAAGI9ABAAAjEcgAgAAxiMQAQAA4xGIAACA8QhEAADAeAQiAABgPAIRAAAwHoEIAAAYj0AEAACMRyACAADGIxABAADjEYgAAIDxCEQAAMB4BCIAAGA8AhEAADAegQgAABiPQAQAAIxHIAIAAMYjEAEAAOMRiAAAgPEIRAAAwHgEIgAAYDwCEQAAMB6BCAAAGI9ABAAAjEcgAgAAxiMQAQAA4xGIAACA8QhEAADAeAQiAABgPAIRAAAwHoEIAAAYj0AEAACMRyACAADGIxABAADjEYgAAIDxCEQAAMB4BCIAAGA8AhEAADAegQgAABiPQAQAAIzn1kCUkpKinj17qnHjxgoODtbw4cOVl5fnUhMVFSWbzeayjB8/3qWmoKBAsbGxatiwoYKDgzVt2jRduHDBpSYzM1Pdu3eXr6+v2rdvr9TU1Os9PQAAcINwayDKyspSYmKiNm/erPT0dJ0/f16DBg3SqVOnXOrGjRunI0eOWMvcuXOtscrKSsXGxurcuXPatGmTFi1apNTUVM2cOdOqyc/PV2xsrO6//37l5uZq8uTJGjt2rFatWlVncwUAAJ6rnjsPvnLlSpf11NRUBQcHKycnR3379rW2N2zYUKGhoZfcx+rVq7V371599dVXCgkJ0R133KEXXnhBM2bM0OzZs+Xj46OFCxeqbdu2mjdvniQpIiJCGzZs0Ouvv67o6OiL9llRUaGKigprvaysrDamCwAAPJRHPUNUWloqSWratKnL9iVLlqh58+a6/fbblZycrNOnT1tj2dnZ6tKli0JCQqxt0dHRKisr0549e6yagQMHuuwzOjpa2dnZl+wjJSVFAQEB1hIeHl4r8wMAAJ7JrVeIfqiqqkqTJ0/Wvffeq9tvv93a/sgjj6h169YKCwvTzp07NWPGDOXl5enjjz+WJBUWFrqEIUnWemFh4WVrysrKdObMGfn5+bmMJScnKykpyVovKysjFAEAcBPzmECUmJio3bt3a8OGDS7bn3rqKevrLl26qEWLFhowYIAOHDigW2+99br04uvrK19f3+uybwAA4Hk84pbZhAkTtGLFCq1du1YtW7a8bG2vXr0kSd9++60kKTQ0VEVFRS411evVzx39VI3dbr/o6hAAADCPWwOR0+nUhAkT9Mknn2jNmjVq27btz74mNzdXktSiRQtJksPh0K5du1RcXGzVpKeny263KzIy0qrJyMhw2U96erocDkctzQQAANzI3BqIEhMT9b//+79aunSpGjdurMLCQhUWFurMmTOSpAMHDuiFF15QTk6ODh48qM8//1zx8fHq27evunbtKkkaNGiQIiMj9dhjj2nHjh1atWqVnnvuOSUmJlq3vcaPH6//+7//0/Tp0/XNN9/orbfe0kcffaQpU6a4be4AAMBzuDUQvf322yotLVVUVJRatGhhLcuWLZMk+fj46KuvvtKgQYPUqVMnPfvssxoxYoSWL19u7cPb21srVqyQt7e3HA6HHn30UcXHx2vOnDlWTdu2bZWWlqb09HR169ZN8+bN03vvvXfJt9wDAADz2JxOp9PdTXi6srIyBQQEqLS0VHa7vcb76TFtcS12hRtdzivx7m4BAG5qV/P72yMeqgYAAHAnAhEAADAegQgAABiPQAQAAIxHIAIAAMYjEAEAAOMRiAAAgPEIRAAAwHgEIgAAYDwCEQAAMB6BCAAAGI9ABAAAjEcgAgAAxiMQAQAA4xGIAACA8QhEAADAeAQiAABgPAIRAAAwHoEIAAAYj0AEAACMRyACAADGIxABAADjEYgAAIDxCEQAAMB4BCIAAGA8AhEAADAegQgAABiPQAQAAIxHIAIAAMYjEAEAAOMRiAAAgPEIRAAAwHgEIgAAYDwCEQAAMB6BCAAAGI9ABAAAjEcgAgAAxiMQAQAA4xGIAACA8QhEAADAeAQiAABgPAIRAAAwHoEIAAAYj0AEAACM59ZAlJKSop49e6px48YKDg7W8OHDlZeX51Jz9uxZJSYmqlmzZvL399eIESNUVFTkUlNQUKDY2Fg1bNhQwcHBmjZtmi5cuOBSk5mZqe7du8vX11ft27dXamrq9Z4eAAC4Qbg1EGVlZSkxMVGbN29Wenq6zp8/r0GDBunUqVNWzZQpU7R8+XL97W9/U1ZWlg4fPqxf/vKX1nhlZaViY2N17tw5bdq0SYsWLVJqaqpmzpxp1eTn5ys2Nlb333+/cnNzNXnyZI0dO1arVq2q0/kCAADPZHM6nU53N1Ht6NGjCg4OVlZWlvr27avS0lIFBQVp6dKleuihhyRJ33zzjSIiIpSdna3evXvryy+/1IMPPqjDhw8rJCREkrRw4ULNmDFDR48elY+Pj2bMmKG0tDTt3r3bOtaoUaNUUlKilStXXtRHRUWFKioqrPWysjKFh4ertLRUdru9xvPrMW1xjV+Lm0/OK/HubgEAbmplZWUKCAi4ot/fHvUMUWlpqSSpadOmkqScnBydP39eAwcOtGo6deqkVq1aKTs7W5KUnZ2tLl26WGFIkqKjo1VWVqY9e/ZYNT/cR3VN9T5+LCUlRQEBAdYSHh5ee5MEAAAex2MCUVVVlSZPnqx7771Xt99+uySpsLBQPj4+CgwMdKkNCQlRYWGhVfPDMFQ9Xj12uZqysjKdOXPmol6Sk5NVWlpqLYcOHaqVOQIAAM9Uz90NVEtMTNTu3bu1YcMGd7ciX19f+fr6ursNAABQRzziCtGECRO0YsUKrV27Vi1btrS2h4aG6ty5cyopKXGpLyoqUmhoqFXz43edVa//XI3dbpefn19tTwcAANxg3BqInE6nJkyYoE8++URr1qxR27ZtXcZ79Oih+vXrKyMjw9qWl5engoICORwOSZLD4dCuXbtUXFxs1aSnp8tutysyMtKq+eE+qmuq9wEAAMzm1ltmiYmJWrp0qT777DM1btzYeuYnICBAfn5+CggIUEJCgpKSktS0aVPZ7XZNnDhRDodDvXv3liQNGjRIkZGReuyxxzR37lwVFhbqueeeU2JionXba/z48frjH/+o6dOn68knn9SaNWv00UcfKS0tzW1zBwAAnsOtV4jefvttlZaWKioqSi1atLCWZcuWWTWvv/66HnzwQY0YMUJ9+/ZVaGioPv74Y2vc29tbK1askLe3txwOhx599FHFx8drzpw5Vk3btm2Vlpam9PR0devWTfPmzdN7772n6OjoOp0vAADwTB71OUSe6mo+x+By+Bwi/BCfQwQA19cN+zlEAAAA7kAgAgAAxiMQAQAA4xGIAACA8QhEAADAeAQiAABgPAIRAAAwHoEIAAAYj0AEAACMRyACAADGIxABAADj1SgQ9e/fXyUlJRdtLysrU//+/a+1JwAAgDpVo0CUmZmpc+fOXbT97NmzWr9+/TU3BQAAUJfqXU3xzp07ra/37t2rwsJCa72yslIrV67ULbfcUnvdAQAA1IGrCkR33HGHbDabbDbbJW+N+fn5acGCBbXWHAAAQF24qkCUn58vp9Opdu3aaevWrQoKCrLGfHx8FBwcLG9v71pvEgAA4Hq6qkDUunVrSVJVVdV1aQYAAMAdrioQ/dD+/fu1du1aFRcXXxSQZs6cec2NAQAA1JUaBaJ3331XTz/9tJo3b67Q0FDZbDZrzGazEYgAAMANpUaB6MUXX9RLL72kGTNm1HY/AAAAda5Gn0N04sQJPfzww7XdCwAAgFvUKBA9/PDDWr16dW33AgAA4BY1umXWvn17Pf/889q8ebO6dOmi+vXru4xPmjSpVpoDAACoCzUKRO+88478/f2VlZWlrKwslzGbzUYgAgAAN5QaBaL8/Pza7gMAAMBtavQMEQAAwM2kRleInnzyycuOv//++zVqBgAAwB1qFIhOnDjhsn7+/Hnt3r1bJSUll/yjrwAAAJ6sRoHok08+uWhbVVWVnn76ad16663X3BQAAEBdqrVniLy8vJSUlKTXX3+9tnYJAABQJ2r1oeoDBw7owoULtblLAACA665Gt8ySkpJc1p1Op44cOaK0tDSNGTOmVhoDAACoKzUKRP/85z9d1r28vBQUFKR58+b97DvQAAAAPE2NAtHatWtruw8AAAC3qVEgqnb06FHl5eVJkjp27KigoKBaaQoAAKAu1eih6lOnTunJJ59UixYt1LdvX/Xt21dhYWFKSEjQ6dOna7tHAACA66pGgSgpKUlZWVlavny5SkpKVFJSos8++0xZWVl69tlna7tHAACA66pGt8z+8Y9/6O9//7uioqKsbUOGDJGfn59+9atf6e23366t/gAAAK67Gl0hOn36tEJCQi7aHhwczC0zAABww6lRIHI4HJo1a5bOnj1rbTtz5ox++9vfyuFw1FpzAAAAdaFGt8zeeOMNxcTEqGXLlurWrZskaceOHfL19dXq1atrtUEAAIDrrUaBqEuXLtq/f7+WLFmib775RpI0evRoxcXFyc/Pr1YbBAAAuN5qFIhSUlIUEhKicePGuWx///33dfToUc2YMaNWmgMAAKgLNXqG6E9/+pM6dep00fbOnTtr4cKF19wUAABAXapRICosLFSLFi0u2h4UFKQjR45c8X7WrVunoUOHKiwsTDabTZ9++qnL+OOPPy6bzeayxMTEuNQcP35ccXFxstvtCgwMVEJCgsrLy11qdu7cqT59+qhBgwYKDw/X3Llzr3yyAADgplejQBQeHq6NGzdetH3jxo0KCwu74v2cOnVK3bp105tvvvmTNTExMTpy5Ii1/PWvf3UZj4uL0549e5Senq4VK1Zo3bp1euqpp6zxsrIyDRo0SK1bt1ZOTo5eeeUVzZ49W++8884V9wkAAG5uNXqGaNy4cZo8ebLOnz+v/v37S5IyMjI0ffr0q/qk6sGDB2vw4MGXrfH19VVoaOglx/bt26eVK1dq27ZtuuuuuyRJCxYs0JAhQ/Tqq68qLCxMS5Ys0blz5/T+++/Lx8dHnTt3Vm5url577TWX4PRDFRUVqqiosNbLysqueE4AgGvTY9pid7cAD5LzSnydHKdGV4imTZumhIQE/c///I/atWundu3aaeLEiZo0aZKSk5NrtcHMzEwFBwerY8eOevrpp3Xs2DFrLDs7W4GBgVYYkqSBAwfKy8tLW7ZssWr69u0rHx8fqyY6Olp5eXk6ceLEJY+ZkpKigIAAawkPD6/VOQEAAM9So0Bks9n08ssv6+jRo9q8ebN27Nih48ePa+bMmbXaXExMjBYvXqyMjAy9/PLLysrK0uDBg1VZWSnp+2eZgoODXV5Tr149NW3aVIWFhVbNjz9Vu3q9uubHkpOTVVpaai2HDh2q1XkBAADPUqNbZtX8/f3Vs2fP2urlIqNGjbK+7tKli7p27apbb71VmZmZGjBgwHU7rq+vr3x9fa/b/gEAgGep0RUid2nXrp2aN2+ub7/9VpIUGhqq4uJil5oLFy7o+PHj1nNHoaGhKioqcqmpXv+pZ5MAAIBZbqhA9J///EfHjh2z3vLvcDhUUlKinJwcq2bNmjWqqqpSr169rJp169bp/PnzVk16ero6duyoJk2a1O0EAACAR3JrICovL1dubq5yc3MlSfn5+crNzVVBQYHKy8s1bdo0bd68WQcPHlRGRoaGDRum9u3bKzo6WpIUERGhmJgYjRs3Tlu3btXGjRs1YcIEjRo1ynr7/yOPPCIfHx8lJCRoz549WrZsmebPn6+kpCR3TRsAAHgYtwai7du3684779Sdd94pSUpKStKdd96pmTNnytvbWzt37tR//dd/qUOHDkpISFCPHj20fv16l+d7lixZok6dOmnAgAEaMmSI7rvvPpfPGAoICNDq1auVn5+vHj166Nlnn9XMmTN/8i33AADAPNf0UPW1ioqKktPp/MnxVatW/ew+mjZtqqVLl162pmvXrlq/fv1V9wcAAMxwQz1DBAAAcD0QiAAAgPEIRAAAwHgEIgAAYDwCEQAAMB6BCAAAGI9ABAAAjEcgAgAAxiMQAQAA4xGIAACA8QhEAADAeAQiAABgPAIRAAAwHoEIAAAYj0AEAACMRyACAADGIxABAADjEYgAAIDxCEQAAMB4BCIAAGA8AhEAADAegQgAABivnrsbAOA+PaYtdncL8DA5r8S7uwXALbhCBAAAjEcgAgAAxiMQAQAA4xGIAACA8QhEAADAeAQiAABgPAIRAAAwHoEIAAAYj0AEAACMRyACAADGIxABAADjEYgAAIDxCEQAAMB4BCIAAGA8AhEAADAegQgAABiPQAQAAIxHIAIAAMYjEAEAAOMRiAAAgPEIRAAAwHhuDUTr1q3T0KFDFRYWJpvNpk8//dRl3Ol0aubMmWrRooX8/Pw0cOBA7d+/36Xm+PHjiouLk91uV2BgoBISElReXu5Ss3PnTvXp00cNGjRQeHi45s6de72nBgAAbiBuDUSnTp1St27d9Oabb15yfO7cufrDH/6ghQsXasuWLWrUqJGio6N19uxZqyYuLk579uxRenq6VqxYoXXr1umpp56yxsvKyjRo0CC1bt1aOTk5euWVVzR79my98847131+AADgxlDPnQcfPHiwBg8efMkxp9OpN954Q88995yGDRsmSVq8eLFCQkL06aefatSoUdq3b59Wrlypbdu26a677pIkLViwQEOGDNGrr76qsLAwLVmyROfOndP7778vHx8fde7cWbm5uXrttddcghMAADCXxz5DlJ+fr8LCQg0cONDaFhAQoF69eik7O1uSlJ2drcDAQCsMSdLAgQPl5eWlLVu2WDV9+/aVj4+PVRMdHa28vDydOHHikseuqKhQWVmZywIAAG5eHhuICgsLJUkhISEu20NCQqyxwsJCBQcHu4zXq1dPTZs2dam51D5+eIwfS0lJUUBAgLWEh4df+4QAAIDH8thA5E7JyckqLS21lkOHDrm7JQAAcB15bCAKDQ2VJBUVFblsLyoqssZCQ0NVXFzsMn7hwgUdP37cpeZS+/jhMX7M19dXdrvdZQEAADcvjw1Ebdu2VWhoqDIyMqxtZWVl2rJlixwOhyTJ4XCopKREOTk5Vs2aNWtUVVWlXr16WTXr1q3T+fPnrZr09HR17NhRTZo0qaPZAAAAT+bWQFReXq7c3Fzl5uZK+v5B6tzcXBUUFMhms2ny5Ml68cUX9fnnn2vXrl2Kj49XWFiYhg8fLkmKiIhQTEyMxo0bp61bt2rjxo2aMGGCRo0apbCwMEnSI488Ih8fHyUkJGjPnj1atmyZ5s+fr6SkJDfNGgAAeBq3vu1++/btuv/++6316pAyZswYpaamavr06Tp16pSeeuoplZSU6L777tPKlSvVoEED6zVLlizRhAkTNGDAAHl5eWnEiBH6wx/+YI0HBARo9erVSkxMVI8ePdS8eXPNnDmTt9wDAACLWwNRVFSUnE7nT47bbDbNmTNHc+bM+cmapk2baunSpZc9TteuXbV+/foa9wkAAG5uHvsMEQAAQF0hEAEAAOMRiAAAgPEIRAAAwHgEIgAAYDwCEQAAMB6BCAAAGI9ABAAAjEcgAgAAxiMQAQAA4xGIAACA8QhEAADAeAQiAABgPAIRAAAwHoEIAAAYj0AEAACMRyACAADGIxABAADjEYgAAIDxCEQAAMB4BCIAAGA8AhEAADAegQgAABiPQAQAAIxHIAIAAMYjEAEAAOMRiAAAgPEIRAAAwHgEIgAAYDwCEQAAMB6BCAAAGI9ABAAAjEcgAgAAxiMQAQAA4xGIAACA8QhEAADAeAQiAABgPAIRAAAwHoEIAAAYj0AEAACMRyACAADGIxABAADjEYgAAIDxCEQAAMB4Hh2IZs+eLZvN5rJ06tTJGj979qwSExPVrFkz+fv7a8SIESoqKnLZR0FBgWJjY9WwYUMFBwdr2rRpunDhQl1PBQAAeLB67m7g53Tu3FlfffWVtV6v3v9recqUKUpLS9Pf/vY3BQQEaMKECfrlL3+pjRs3SpIqKysVGxur0NBQbdq0SUeOHFF8fLzq16+v3/3ud3U+FwAA4Jk8PhDVq1dPoaGhF20vLS3Vn//8Zy1dulT9+/eXJH3wwQeKiIjQ5s2b1bt3b61evVp79+7VV199pZCQEN1xxx164YUXNGPGDM2ePVs+Pj51PR0AAOCBPPqWmSTt379fYWFhateuneLi4lRQUCBJysnJ0fnz5zVw4ECrtlOnTmrVqpWys7MlSdnZ2erSpYtCQkKsmujoaJWVlWnPnj0/ecyKigqVlZW5LAAA4Obl0YGoV69eSk1N1cqVK/X2228rPz9fffr00cmTJ1VYWCgfHx8FBga6vCYkJESFhYWSpMLCQpcwVD1ePfZTUlJSFBAQYC3h4eG1OzEAAOBRPPqW2eDBg62vu3btql69eql169b66KOP5Ofnd92Om5ycrKSkJGu9rKyMUAQAwE3Mo68Q/VhgYKA6dOigb7/9VqGhoTp37pxKSkpcaoqKiqxnjkJDQy9611n1+qWeS6rm6+sru93usgAAgJvXDRWIysvLdeDAAbVo0UI9evRQ/fr1lZGRYY3n5eWpoKBADodDkuRwOLRr1y4VFxdbNenp6bLb7YqMjKzz/gEAgGfy6FtmU6dO1dChQ9W6dWsdPnxYs2bNkre3t0aPHq2AgAAlJCQoKSlJTZs2ld1u18SJE+VwONS7d29J0qBBgxQZGanHHntMc+fOVWFhoZ577jklJibK19fXzbMDAACewqMD0X/+8x+NHj1ax44dU1BQkO677z5t3rxZQUFBkqTXX39dXl5eGjFihCoqKhQdHa233nrLer23t7dWrFihp59+Wg6HQ40aNdKYMWM0Z84cd00JAAB4II8ORB9++OFlxxs0aKA333xTb7755k/WtG7dWl988UVttwYAAG4iN9QzRAAAANcDgQgAABiPQAQAAIxHIAIAAMYjEAEAAOMRiAAAgPEIRAAAwHgEIgAAYDwCEQAAMB6BCAAAGI9ABAAAjEcgAgAAxiMQAQAA4xGIAACA8QhEAADAeAQiAABgPAIRAAAwHoEIAAAYj0AEAACMRyACAADGIxABAADjEYgAAIDxCEQAAMB4BCIAAGA8AhEAADAegQgAABiPQAQAAIxHIAIAAMYjEAEAAOMRiAAAgPEIRAAAwHgEIgAAYDwCEQAAMB6BCAAAGI9ABAAAjEcgAgAAxiMQAQAA4xGIAACA8QhEAADAeAQiAABgPAIRAAAwHoEIAAAYj0AEAACMRyACAADGMyoQvfnmm2rTpo0aNGigXr16aevWre5uCQAAeABjAtGyZcuUlJSkWbNm6euvv1a3bt0UHR2t4uJid7cGAADczJhA9Nprr2ncuHF64oknFBkZqYULF6phw4Z6//333d0aAABws3rubqAunDt3Tjk5OUpOTra2eXl5aeDAgcrOzr6ovqKiQhUVFdZ6aWmpJKmsrOya+qisOHNNr8fN5VrPp9rAOYkf47yEp7mWc7L6tU6n82drjQhE3333nSorKxUSEuKyPSQkRN98881F9SkpKfrtb3970fbw8PDr1iPME7BgvLtbAC7CeQlPUxvn5MmTJxUQEHDZGiMC0dVKTk5WUlKStV5VVaXjx4+rWbNmstlsbuzsxldWVqbw8HAdOnRIdrvd3e0AnJPwSJyXtcPpdOrkyZMKCwv72VojAlHz5s3l7e2toqIil+1FRUUKDQ29qN7X11e+vr4u2wIDA69ni8ax2+38I4dH4ZyEJ+K8vHY/d2WomhEPVfv4+KhHjx7KyMiwtlVVVSkjI0MOh8ONnQEAAE9gxBUiSUpKStKYMWN011136e6779Ybb7yhU6dO6YknnnB3awAAwM2MCUQjR47U0aNHNXPmTBUWFuqOO+7QypUrL3rQGteXr6+vZs2addEtScBdOCfhiTgv657NeSXvRQMAALiJGfEMEQAAwOUQiAAAgPEIRAAAwHgEItwU2rRpozfeeMPdbeAmN3v2bN1xxx3ubgM3sczMTNlsNpWUlFy2jp95tY9ABLeIiorS5MmT3d0G8JNsNps+/fRTl21Tp051+TwzoLbdc889OnLkiPVhgqmpqZf8YOBt27bpqaeequPubm7GvO0eNx6n06nKykrVq8dpCs/g7+8vf39/d7eBm5iPj88l/4LCjwUFBdVBN2bhChEuEhUVpUmTJmn69Olq2rSpQkNDNXv2bGu8pKREY8eOVVBQkOx2u/r3768dO3ZY448//riGDx/uss/JkycrKirKGs/KytL8+fNls9lks9l08OBB61Lxl19+qR49esjX11cbNmzQgQMHNGzYMIWEhMjf3189e/bUV199VQffCbjDtZ5/kvTiiy8qODhYjRs31tixY/XrX//a5VbXtm3b9MADD6h58+YKCAhQv3799PXXX1vjbdq0kST94he/kM1ms9Z/eMts9erVatCgwUW3Np555hn179/fWt+wYYP69OkjPz8/hYeHa9KkSTp16tQ1f5/gPlFRUZowYYImTJiggIAANW/eXM8//7z1F9VPnDih+Ph4NWnSRA0bNtTgwYO1f/9+6/X//ve/NXToUDVp0kSNGjVS586d9cUXX0hyvWWWmZmpJ554QqWlpdbPyup/Cz+8ZfbII49o5MiRLj2eP39ezZs31+LFiyV9/9cZUlJS1LZtW/n5+albt276+9//fp2/UzcWAhEuadGiRWrUqJG2bNmiuXPnas6cOUpPT5ckPfzwwyouLtaXX36pnJwcde/eXQMGDNDx48evaN/z58+Xw+HQuHHjdOTIER05ckTh4eHW+K9//Wv9/ve/1759+9S1a1eVl5dryJAhysjI0D//+U/FxMRo6NChKigouC5zh/tdy/m3ZMkSvfTSS3r55ZeVk5OjVq1a6e2333bZ/8mTJzVmzBht2LBBmzdv1m233aYhQ4bo5MmTkr4PTJL0wQcf6MiRI9b6Dw0YMECBgYH6xz/+YW2rrKzUsmXLFBcXJ0k6cOCAYmJiNGLECO3cuVPLli3Thg0bNGHChNr/pqFOLVq0SPXq1dPWrVs1f/58vfbaa3rvvfckff+fvu3bt+vzzz9Xdna2nE6nhgwZovPnz0uSEhMTVVFRoXXr1mnXrl16+eWXL3nl8Z577tEbb7whu91u/aycOnXqRXVxcXFavny5ysvLrW2rVq3S6dOn9Ytf/EKSlJKSosWLF2vhwoXas2ePpkyZokcffVRZWVnX49tzY3ICP9KvXz/nfffd57KtZ8+ezhkzZjjXr1/vtNvtzrNnz7qM33rrrc4//elPTqfT6RwzZoxz2LBhLuPPPPOMs1+/fi7HeOaZZ1xq1q5d65Tk/PTTT3+2x86dOzsXLFhgrbdu3dr5+uuv//zk4PGu9fzr1auXMzEx0WX83nvvdXbr1u0nj1lZWels3Lixc/ny5dY2Sc5PPvnEpW7WrFku+3nmmWec/fv3t9ZXrVrl9PX1dZ44ccLpdDqdCQkJzqeeesplH+vXr3d6eXk5z5w585P9wLP169fPGRER4ayqqrK2zZgxwxkREeH817/+5ZTk3LhxozX23XffOf38/JwfffSR0+l0Ort06eKcPXv2Jfdd/XOw+hz64IMPnAEBARfV/fBn3vnz553Nmzd3Ll682BofPXq0c+TIkU6n0+k8e/ass2HDhs5Nmza57CMhIcE5evToq57/zYorRLikrl27uqy3aNFCxcXF2rFjh8rLy9WsWTPreQp/f3/l5+frwIEDtXLsu+66y2W9vLxcU6dOVUREhAIDA+Xv7699+/Zxhegmdi3nX15enu6++26X1/94vaioSOPGjdNtt92mgIAA2e12lZeXX/U5FRcXp8zMTB0+fFjS91enYmNjrYdgd+zYodTUVJdeo6OjVVVVpfz8/Ks6FjxL7969ZbPZrHWHw6H9+/dr7969qlevnnr16mWNNWvWTB07dtS+ffskSZMmTdKLL76oe++9V7NmzdLOnTuvqZd69erpV7/6lZYsWSJJOnXqlD777DPrSuW3336r06dP64EHHnA5FxcvXlxrP7dvBjytikuqX7++y7rNZlNVVZXKy8vVokULZWZmXvSa6l8CXl5e1r30atWXiq9Eo0aNXNanTp2q9PR0vfrqq2rfvr38/Pz00EMP6dy5c1e8T9xYruX8uxJjxozRsWPHNH/+fLVu3Vq+vr5yOBxXfU717NlTt956qz788EM9/fTT+uSTT5SammqNl5eX67//+781adKki17bqlWrqzoWbh5jx45VdHS00tLStHr1aqWkpGjevHmaOHFijfcZFxenfv36qbi4WOnp6fLz81NMTIwkWbfS0tLSdMstt7i8jr+V9v8QiHBVunfvrsLCQtWrV8960PTHgoKCtHv3bpdtubm5Lr/kfHx8VFlZeUXH3Lhxox5//HHrXnh5ebkOHjxYo/5xY7uS869jx47atm2b4uPjrW0/fgZo48aNeuuttzRkyBBJ0qFDh/Tdd9+51NSvX/+KztG4uDgtWbJELVu2lJeXl2JjY1363bt3r9q3b3+lU8QNYsuWLS7r1c+iRUZG6sKFC9qyZYvuueceSdKxY8eUl5enyMhIqz48PFzjx4/X+PHjlZycrHffffeSgehKf1bec889Cg8P17Jly/Tll1/q4Ycftn7mRkZGytfXVwUFBerXr9+1TPumxi0zXJWBAwfK4XBo+PDhWr16tQ4ePKhNmzbpN7/5jbZv3y5J6t+/v7Zv367Fixdr//79mjVr1kUBqU2bNtqyZYsOHjyo7777TlVVVT95zNtuu00ff/yxcnNztWPHDj3yyCOXrcfN60rOv4kTJ+rPf/6zFi1apP379+vFF1/Uzp07XW5v3HbbbfrLX/6iffv2acuWLYqLi5Ofn5/Lsdq0aaOMjAwVFhbqxIkTP9lTXFycvv76a7300kt66KGHXP7HPWPGDG3atEkTJkxQbm6u9u/fr88++4yHqm8CBQUFSkpKUl5env76179qwYIFeuaZZ3Tbbbdp2LBhGjdunDZs2KAdO3bo0Ucf1S233KJhw4ZJ+v5dt6tWrVJ+fr6+/vprrV27VhEREZc8Tps2bVReXq6MjAx99913On369E/29Mgjj2jhwoVKT0+3bpdJUuPGjTV16lRNmTJFixYt0oEDB/T1119rwYIFWrRoUe1+Y25gBCJcFZvNpi+++EJ9+/bVE088oQ4dOmjUqFH697//rZCQEElSdHS0nn/+eU2fPl09e/bUyZMnXf63Ln1/G8zb21uRkZEKCgq67LMbr732mpo0aaJ77rlHQ4cOVXR0tLp3735d5wnPdCXnX1xcnJKTkzV16lR1795d+fn5evzxx9WgQQNrP3/+85914sQJde/eXY899pgmTZqk4OBgl2PNmzdP6enpCg8P15133vmTPbVv31533323du7c6fJLSPr+WaisrCz961//Up8+fXTnnXdq5syZCgsLq8XvCtwhPj5eZ86c0d13363ExEQ988wz1gclfvDBB+rRo4cefPBBORwOOZ1OffHFF9YVm8rKSiUmJioiIkIxMTHq0KGD3nrrrUse55577tH48eM1cuRIBQUFae7cuT/ZU1xcnPbu3atbbrlF9957r8vYCy+8oOeff14pKSnWcdPS0tS2bdta+o7c+GzOHz/sAQA3mQceeEChoaH6y1/+4u5WcBOIiorSHXfcwZ/OuMnwDBGAm8rp06e1cOFCRUdHy9vbW3/961/11VdfWZ9jBACXQiACcFOpvq320ksv6ezZs+rYsaP+8Y9/aODAge5uDYAH45YZAAAwHg9VAwAA4xGIAACA8QhEAADAeAQiAABgPAIRAAAwHoEIAAAYj0AEAACMRyACgOussrKSP0gMeDgCEYAbwsqVK3XfffcpMDBQzZo104MPPqgDBw5Ikg4ePCibzaaPP/5Y999/vxo2bKhu3bopOzvbev2///1vDR06VE2aNFGjRo3UuXNnffHFF5Kku+66S6+++qpVO3z4cNWvX1/l5eWSpP/85z+y2Wz69ttvJUkVFRWaOnWqbrnlFjVq1Ei9evVSZmam9frU1FQFBgbq888/V2RkpHx9fS/7B4wBuB+BCMAN4dSpU0pKStL27duVkZEhLy8v/eIXv3C58vKb3/xGU6dOVW5urjp06KDRo0frwoULkqTExERVVFRo3bp12rVrl15++WX5+/tLkvr162cFGqfTqfXr1yswMFAbNmyQJGVlZemWW25R+/btJUkTJkxQdna2PvzwQ+3cuVMPP/ywYmJitH//fquX06dP6+WXX9Z7772nPXv2KDg4uC6+TQBqiD/dAeCG9N133ykoKEi7du2Sv7+/2rZtq/fee08JCQmSpL1796pz587at2+fOnXqpK5du2rEiBGaNWvWRftavny5HnvsMR07dky7d+9WTEyMRo4cqQYNGuj3v/+9xo0bp9OnT2vJkiUqKChQu3btVFBQoLCwMGsfAwcO1N13363f/e53Sk1N1RNPPKHc3Fx169atzr4nAGqOK0QAbgj79+/X6NGj1a5dO9ntdrVp00aSXG5Fde3a1fq6RYsWkqTi4mJJ0qRJk/Tiiy/q3nvv1axZs7Rz506rtk+fPjp58qT++c9/KisrS/369VNUVJR11SgrK0tRUVGSpF27dqmyslIdOnSQv7+/tWRlZVm38CTJx8fHpR8Ano2/dg/ghjB06FC1bt1a7777rsLCwlRVVaXbb79d586ds2rq169vfW2z2STJuqU2duxYRUdHKy0tTatXr1ZKSormzZuniRMnKjAwUN26dVNmZqays7P1wAMPqG/fvho5cqT+9a9/af/+/erXr58kqby8XN7e3srJyZG3t7dLj9W34CTJz8/P6gGA5+MKEQCPd+zYMeXl5em5557TgAEDFBERoRMnTlz1fsLDwzV+/Hh9/PHHevbZZ/Xuu+9aY/369dPatWu1bt06RUVFqWnTpoqIiNBLL72kFi1aqEOHDpKkO++8U5WVlSouLlb79u1dltDQ0FqbM4C6RSAC4PGaNGmiZs2a6Z133tG3336rNWvWKCkp6ar2MXnyZK1atUr5+fn6+uuvtXbtWkVERFjjUVFRWrVqlerVq6dOnTpZ25YsWWJdHZKkDh06KC4uTvHx8fr444+Vn5+vrVu3KiUlRWlpabUzYQB1jkAEwON5eXnpww8/VE5Ojm6//XZNmTJFr7zyylXto7KyUomJiYqIiFBMTIw6dOigt956yxrv06ePqqqqXMJPVFSUKisrreeHqn3wwQeKj4/Xs88+q44dO2r48OHatm2bWrVqdU3zBOA+vMsMAAAYjytEAADAeAQiAABgPAIRAAAwHoEIAAAYj0AEAACMRyACAADGIxABAADjEYgAAIDxCEQAAMB4BCIAAGA8AhEAADDe/weCiimbenEpXAAAAABJRU5ErkJggg==", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sns.countplot(x=df_sampled[\"answer\"])" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "e2DP6ekqfNbe", "outputId": "3aacbc50-8554-40eb-9cdb-c949c30d634e" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "answer\n", "negative 1236\n", "neutral 1236\n", "positive 1236\n", "Name: count, dtype: int64\n", "(3708, 2)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/xc/v1l81vkx6fjc9wpqc0tsnl400000gn/T/ipykernel_11468/1830774783.py:5: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n", " df_balanced = df_sampled.groupby(\"answer\").apply(lambda x: x.sample(min_class_count, random_state=42)).reset_index(drop=True)\n" ] } ], "source": [ "# Undersampling each class to match the class with the smallest number of samples\n", "min_class_count = df_sampled[\"answer\"].value_counts().min()\n", "\n", "# Sampling an equal number of rows from each class\n", "df_balanced = df_sampled.groupby(\"answer\").apply(lambda x: x.sample(min_class_count, random_state=42)).reset_index(drop=True)\n", "\n", "# Showing the new class distribution\n", "print(df_balanced[\"answer\"].value_counts())\n", "print(df_balanced.shape)\n" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "id": "dJosNJACYDCc" }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "593c0e8a5f6b4b9495ff422cc2382975", "version_major": 2, "version_minor": 0 }, "text/plain": [ "modules.json: 0%| | 0.00/349 [00:00#sk-container-id-1 {\n", " /* Definition of color scheme common for light and dark mode */\n", " --sklearn-color-text: black;\n", " --sklearn-color-line: gray;\n", " /* Definition of color scheme for unfitted estimators */\n", " --sklearn-color-unfitted-level-0: #fff5e6;\n", " --sklearn-color-unfitted-level-1: #f6e4d2;\n", " --sklearn-color-unfitted-level-2: #ffe0b3;\n", " --sklearn-color-unfitted-level-3: chocolate;\n", " /* Definition of color scheme for fitted estimators */\n", " --sklearn-color-fitted-level-0: #f0f8ff;\n", " --sklearn-color-fitted-level-1: #d4ebff;\n", " --sklearn-color-fitted-level-2: #b3dbfd;\n", " --sklearn-color-fitted-level-3: cornflowerblue;\n", "\n", " /* Specific color for light theme */\n", " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n", " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n", " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n", " --sklearn-color-icon: #696969;\n", "\n", " @media (prefers-color-scheme: dark) {\n", " /* Redefinition of color scheme for dark theme */\n", " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n", " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n", " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n", " --sklearn-color-icon: #878787;\n", " }\n", "}\n", "\n", "#sk-container-id-1 {\n", " color: var(--sklearn-color-text);\n", "}\n", "\n", "#sk-container-id-1 pre {\n", " padding: 0;\n", "}\n", "\n", "#sk-container-id-1 input.sk-hidden--visually {\n", " border: 0;\n", " clip: rect(1px 1px 1px 1px);\n", " clip: rect(1px, 1px, 1px, 1px);\n", " height: 1px;\n", " margin: -1px;\n", " overflow: hidden;\n", " padding: 0;\n", " position: absolute;\n", " width: 1px;\n", "}\n", "\n", "#sk-container-id-1 div.sk-dashed-wrapped {\n", " border: 1px dashed var(--sklearn-color-line);\n", " margin: 0 0.4em 0.5em 0.4em;\n", " box-sizing: border-box;\n", " padding-bottom: 0.4em;\n", " background-color: var(--sklearn-color-background);\n", "}\n", "\n", "#sk-container-id-1 div.sk-container {\n", " /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n", " but bootstrap.min.css set `[hidden] { display: none !important; }`\n", " so we also need the `!important` here to be able to override the\n", " default hidden behavior on the sphinx rendered scikit-learn.org.\n", " See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n", " display: inline-block !important;\n", " position: relative;\n", "}\n", "\n", "#sk-container-id-1 div.sk-text-repr-fallback {\n", " display: none;\n", "}\n", "\n", "div.sk-parallel-item,\n", "div.sk-serial,\n", "div.sk-item {\n", " /* draw centered vertical line to link estimators */\n", " background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n", " background-size: 2px 100%;\n", " background-repeat: no-repeat;\n", " background-position: center center;\n", "}\n", "\n", "/* Parallel-specific style estimator block */\n", "\n", "#sk-container-id-1 div.sk-parallel-item::after {\n", " content: \"\";\n", " width: 100%;\n", " border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n", " flex-grow: 1;\n", "}\n", "\n", "#sk-container-id-1 div.sk-parallel {\n", " display: flex;\n", " align-items: stretch;\n", " justify-content: center;\n", " background-color: var(--sklearn-color-background);\n", " position: relative;\n", "}\n", "\n", "#sk-container-id-1 div.sk-parallel-item {\n", " display: flex;\n", " flex-direction: column;\n", "}\n", "\n", "#sk-container-id-1 div.sk-parallel-item:first-child::after {\n", " align-self: flex-end;\n", " width: 50%;\n", "}\n", "\n", "#sk-container-id-1 div.sk-parallel-item:last-child::after {\n", " align-self: flex-start;\n", " width: 50%;\n", "}\n", "\n", "#sk-container-id-1 div.sk-parallel-item:only-child::after {\n", " width: 0;\n", "}\n", "\n", "/* Serial-specific style estimator block */\n", "\n", "#sk-container-id-1 div.sk-serial {\n", " display: flex;\n", " flex-direction: column;\n", " align-items: center;\n", " background-color: var(--sklearn-color-background);\n", " padding-right: 1em;\n", " padding-left: 1em;\n", "}\n", "\n", "\n", "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n", "clickable and can be expanded/collapsed.\n", "- Pipeline and ColumnTransformer use this feature and define the default style\n", "- Estimators will overwrite some part of the style using the `sk-estimator` class\n", "*/\n", "\n", "/* Pipeline and ColumnTransformer style (default) */\n", "\n", "#sk-container-id-1 div.sk-toggleable {\n", " /* Default theme specific background. It is overwritten whether we have a\n", " specific estimator or a Pipeline/ColumnTransformer */\n", " background-color: var(--sklearn-color-background);\n", "}\n", "\n", "/* Toggleable label */\n", "#sk-container-id-1 label.sk-toggleable__label {\n", " cursor: pointer;\n", " display: block;\n", " width: 100%;\n", " margin-bottom: 0;\n", " padding: 0.5em;\n", " box-sizing: border-box;\n", " text-align: center;\n", "}\n", "\n", "#sk-container-id-1 label.sk-toggleable__label-arrow:before {\n", " /* Arrow on the left of the label */\n", " content: \"▸\";\n", " float: left;\n", " margin-right: 0.25em;\n", " color: var(--sklearn-color-icon);\n", "}\n", "\n", "#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {\n", " color: var(--sklearn-color-text);\n", "}\n", "\n", "/* Toggleable content - dropdown */\n", "\n", "#sk-container-id-1 div.sk-toggleable__content {\n", " max-height: 0;\n", " max-width: 0;\n", " overflow: hidden;\n", " text-align: left;\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-0);\n", "}\n", "\n", "#sk-container-id-1 div.sk-toggleable__content.fitted {\n", " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-0);\n", "}\n", "\n", "#sk-container-id-1 div.sk-toggleable__content pre {\n", " margin: 0.2em;\n", " border-radius: 0.25em;\n", " color: var(--sklearn-color-text);\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-0);\n", "}\n", "\n", "#sk-container-id-1 div.sk-toggleable__content.fitted pre {\n", " /* unfitted */\n", " background-color: var(--sklearn-color-fitted-level-0);\n", "}\n", "\n", "#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n", " /* Expand drop-down */\n", " max-height: 200px;\n", " max-width: 100%;\n", " overflow: auto;\n", "}\n", "\n", "#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n", " content: \"▾\";\n", "}\n", "\n", "/* Pipeline/ColumnTransformer-specific style */\n", "\n", "#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", " color: var(--sklearn-color-text);\n", " background-color: var(--sklearn-color-unfitted-level-2);\n", "}\n", "\n", "#sk-container-id-1 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", " background-color: var(--sklearn-color-fitted-level-2);\n", "}\n", "\n", "/* Estimator-specific style */\n", "\n", "/* Colorize estimator box */\n", "#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-2);\n", "}\n", "\n", "#sk-container-id-1 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-2);\n", "}\n", "\n", "#sk-container-id-1 div.sk-label label.sk-toggleable__label,\n", "#sk-container-id-1 div.sk-label label {\n", " /* The background is the default theme color */\n", " color: var(--sklearn-color-text-on-default-background);\n", "}\n", "\n", "/* On hover, darken the color of the background */\n", "#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {\n", " color: var(--sklearn-color-text);\n", " background-color: var(--sklearn-color-unfitted-level-2);\n", "}\n", "\n", "/* Label box, darken color on hover, fitted */\n", "#sk-container-id-1 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n", " color: var(--sklearn-color-text);\n", " background-color: var(--sklearn-color-fitted-level-2);\n", "}\n", "\n", "/* Estimator label */\n", "\n", "#sk-container-id-1 div.sk-label label {\n", " font-family: monospace;\n", " font-weight: bold;\n", " display: inline-block;\n", " line-height: 1.2em;\n", "}\n", "\n", "#sk-container-id-1 div.sk-label-container {\n", " text-align: center;\n", "}\n", "\n", "/* Estimator-specific */\n", "#sk-container-id-1 div.sk-estimator {\n", " font-family: monospace;\n", " border: 1px dotted var(--sklearn-color-border-box);\n", " border-radius: 0.25em;\n", " box-sizing: border-box;\n", " margin-bottom: 0.5em;\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-0);\n", "}\n", "\n", "#sk-container-id-1 div.sk-estimator.fitted {\n", " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-0);\n", "}\n", "\n", "/* on hover */\n", "#sk-container-id-1 div.sk-estimator:hover {\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-2);\n", "}\n", "\n", "#sk-container-id-1 div.sk-estimator.fitted:hover {\n", " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-2);\n", "}\n", "\n", "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n", "\n", "/* Common style for \"i\" and \"?\" */\n", "\n", ".sk-estimator-doc-link,\n", "a:link.sk-estimator-doc-link,\n", "a:visited.sk-estimator-doc-link {\n", " float: right;\n", " font-size: smaller;\n", " line-height: 1em;\n", " font-family: monospace;\n", " background-color: var(--sklearn-color-background);\n", " border-radius: 1em;\n", " height: 1em;\n", " width: 1em;\n", " text-decoration: none !important;\n", " margin-left: 1ex;\n", " /* unfitted */\n", " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n", " color: var(--sklearn-color-unfitted-level-1);\n", "}\n", "\n", ".sk-estimator-doc-link.fitted,\n", "a:link.sk-estimator-doc-link.fitted,\n", "a:visited.sk-estimator-doc-link.fitted {\n", " /* fitted */\n", " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n", " color: var(--sklearn-color-fitted-level-1);\n", "}\n", "\n", "/* On hover */\n", "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n", ".sk-estimator-doc-link:hover,\n", "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n", ".sk-estimator-doc-link:hover {\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-3);\n", " color: var(--sklearn-color-background);\n", " text-decoration: none;\n", "}\n", "\n", "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n", ".sk-estimator-doc-link.fitted:hover,\n", "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n", ".sk-estimator-doc-link.fitted:hover {\n", " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-3);\n", " color: var(--sklearn-color-background);\n", " text-decoration: none;\n", "}\n", "\n", "/* Span, style for the box shown on hovering the info icon */\n", ".sk-estimator-doc-link span {\n", " display: none;\n", " z-index: 9999;\n", " position: relative;\n", " font-weight: normal;\n", " right: .2ex;\n", " padding: .5ex;\n", " margin: .5ex;\n", " width: min-content;\n", " min-width: 20ex;\n", " max-width: 50ex;\n", " color: var(--sklearn-color-text);\n", " box-shadow: 2pt 2pt 4pt #999;\n", " /* unfitted */\n", " background: var(--sklearn-color-unfitted-level-0);\n", " border: .5pt solid var(--sklearn-color-unfitted-level-3);\n", "}\n", "\n", ".sk-estimator-doc-link.fitted span {\n", " /* fitted */\n", " background: var(--sklearn-color-fitted-level-0);\n", " border: var(--sklearn-color-fitted-level-3);\n", "}\n", "\n", ".sk-estimator-doc-link:hover span {\n", " display: block;\n", "}\n", "\n", "/* \"?\"-specific style due to the `` HTML tag */\n", "\n", "#sk-container-id-1 a.estimator_doc_link {\n", " float: right;\n", " font-size: 1rem;\n", " line-height: 1em;\n", " font-family: monospace;\n", " background-color: var(--sklearn-color-background);\n", " border-radius: 1rem;\n", " height: 1rem;\n", " width: 1rem;\n", " text-decoration: none;\n", " /* unfitted */\n", " color: var(--sklearn-color-unfitted-level-1);\n", " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n", "}\n", "\n", "#sk-container-id-1 a.estimator_doc_link.fitted {\n", " /* fitted */\n", " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n", " color: var(--sklearn-color-fitted-level-1);\n", "}\n", "\n", "/* On hover */\n", "#sk-container-id-1 a.estimator_doc_link:hover {\n", " /* unfitted */\n", " background-color: var(--sklearn-color-unfitted-level-3);\n", " color: var(--sklearn-color-background);\n", " text-decoration: none;\n", "}\n", "\n", "#sk-container-id-1 a.estimator_doc_link.fitted:hover {\n", " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-3);\n", "}\n", "" ], "text/plain": [ "RandomForestClassifier(random_state=42)" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clf = RandomForestClassifier(n_estimators=100, random_state=42)\n", "clf.fit(X_train, y_train)\n" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " precision recall f1-score support\n", "\n", " 0 0.66 0.52 0.58 277\n", " 1 0.62 0.80 0.70 237\n", " 2 0.55 0.52 0.54 228\n", "\n", " accuracy 0.61 742\n", " macro avg 0.61 0.61 0.61 742\n", "weighted avg 0.61 0.61 0.60 742\n", "\n" ] } ], "source": [ "from sentence_transformers import SentenceTransformer\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import LabelEncoder\n", "from sklearn.metrics import classification_report\n", "\n", "# Load model (already done)\n", "model = SentenceTransformer(\"all-MiniLM-L6-v2\")\n", "\n", "# Converting text to embeddings\n", "X = model.encode(df_balanced[\"user_prompt\"].tolist(), convert_to_numpy=True)\n", "\n", "# Encode labels (already done)\n", "label_encoder = LabelEncoder()\n", "y = label_encoder.fit_transform(df_balanced[\"answer\"])\n", "\n", "# Train-test split (already done)\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", "\n", "# Initialize and train RandomForestClassifier\n", "clf = RandomForestClassifier(n_estimators=100, random_state=42)\n", "clf.fit(X_train, y_train)\n", "\n", "# Make predictions on the test set\n", "y_pred = clf.predict(X_test)\n", "\n", "# Print classification report to evaluate performance\n", "print(classification_report(y_test, y_pred))\n" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", "To disable this warning, you can either:\n", "\t- Avoid using `tokenizers` before the fork if possible\n", "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", "To disable this warning, you can either:\n", "\t- Avoid using `tokenizers` before the fork if possible\n", "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", "To disable this warning, you can either:\n", "\t- Avoid using `tokenizers` before the fork if possible\n", "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", "To disable this warning, you can either:\n", "\t- Avoid using `tokenizers` before the fork if possible\n", "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", "To disable this warning, you can either:\n", "\t- Avoid using `tokenizers` before the fork if possible\n", "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", "To disable this warning, you can either:\n", "\t- Avoid using `tokenizers` before the fork if possible\n", "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", "To disable this warning, you can either:\n", "\t- Avoid using `tokenizers` before the fork if possible\n", "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", "To disable this warning, you can either:\n", "\t- Avoid using `tokenizers` before the fork if possible\n", "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", "To disable this warning, you can either:\n", "\t- Avoid using `tokenizers` before the fork if possible\n", "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Best Parameters: {'max_depth': 20, 'min_samples_split': 5, 'n_estimators': 200}\n" ] } ], "source": [ "from sklearn.model_selection import GridSearchCV\n", "\n", "param_grid = {\n", " 'n_estimators': [50, 100, 200],\n", " 'max_depth': [10, 20, 30],\n", " 'min_samples_split': [2, 5, 10]\n", "}\n", "\n", "grid_search = GridSearchCV(estimator=clf, param_grid=param_grid, cv=3, n_jobs=-1)\n", "grid_search.fit(X_train, y_train)\n", "print(\"Best Parameters:\", grid_search.best_params_)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 339 }, "id": "QasSqfQhnsqs", "outputId": "ca0b33bf-d2b2-46a5-9e4f-9a68ff77abeb" }, "outputs": [ { "ename": "ValueError", "evalue": "No columns in the dataset match the model's forward method signature. The following columns have been ignored: [user_prompt, answer]. Please check the dataset and model. You may need to set `remove_unused_columns=False` in `TrainingArguments`.", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 32\u001b[0m )\n\u001b[1;32m 33\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 34\u001b[0;31m \u001b[0mtrainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 2169\u001b[0m \u001b[0mhf_hub_utils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menable_progress_bars\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2170\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2171\u001b[0;31m return inner_training_loop(\n\u001b[0m\u001b[1;32m 2172\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2173\u001b[0m \u001b[0mresume_from_checkpoint\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mresume_from_checkpoint\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36m_inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m 2198\u001b[0m \u001b[0mlogger\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdebug\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Currently training with a batch size of: {self._train_batch_size}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2199\u001b[0m \u001b[0;31m# Data loader and number of training steps\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2200\u001b[0;31m \u001b[0mtrain_dataloader\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_train_dataloader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2201\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_fsdp_xla_v2_enabled\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2202\u001b[0m \u001b[0mtrain_dataloader\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtpu_spmd_dataloader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_dataloader\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36mget_train_dataloader\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 998\u001b[0m \u001b[0mdata_collator\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata_collator\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 999\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_datasets_available\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_dataset\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdatasets\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataset\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1000\u001b[0;31m \u001b[0mtrain_dataset\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_remove_unused_columns\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_dataset\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdescription\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"training\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1001\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1002\u001b[0m \u001b[0mdata_collator\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_collator_with_removed_columns\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_collator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdescription\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"training\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36m_remove_unused_columns\u001b[0;34m(self, dataset, description)\u001b[0m\n\u001b[1;32m 924\u001b[0m \u001b[0mcolumns\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mk\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m \u001b[0;32min\u001b[0m \u001b[0msignature_columns\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mk\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdataset\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumn_names\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 925\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 926\u001b[0;31m raise ValueError(\n\u001b[0m\u001b[1;32m 927\u001b[0m \u001b[0;34m\"No columns in the dataset match the model's forward method signature. \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 928\u001b[0m \u001b[0;34mf\"The following columns have been ignored: [{', '.join(ignored_columns)}]. \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mValueError\u001b[0m: No columns in the dataset match the model's forward method signature. The following columns have been ignored: [user_prompt, answer]. Please check the dataset and model. You may need to set `remove_unused_columns=False` in `TrainingArguments`." ] } ], "source": [ "from transformers import BertForSequenceClassification, Trainer, TrainingArguments\n", "from datasets import Dataset\n", "\n", "\n", "dataset = Dataset.from_pandas(df_balanced)\n", "\n", "\n", "#dataset = dataset.filter(lambda e: e['answer'] is not None and len(e['answer']) > 0)\n", "\n", "\n", "#dataset = dataset.map(lambda e: {'labels': label_encoder.transform([e['answer']])[0]}, batched=False) # Transform expects a list\n", "\n", "\n", "#model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=len(label_encoder.classes_))\n", "\n", "\n", "#training_args = TrainingArguments(\n", " output_dir='./results',\n", " num_train_epochs=3,\n", " per_device_train_batch_size=8,\n", " per_device_eval_batch_size=16,\n", " warmup_steps=500,\n", " weight_decay=0.01,\n", " logging_dir='./logs',\n", ")\n", "\n", "trainer = Trainer(\n", " model=model,\n", " args=training_args,\n", " train_dataset=dataset,\n", " eval_dataset=dataset,\n", ")\n", "\n", "trainer.train()" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "v8DE8aAzg4jQ", "outputId": "5ce78149-c53b-45f3-994f-5f6c7d21b819" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Predicted Label: neutral\n" ] } ], "source": [ "new_texts = [\"The company is doing OK\"]\n", "new_embeddings = model.encode(new_texts, convert_to_numpy=True)\n", "predicted_label = clf.predict(new_embeddings)\n", "\n", "# Convert back to original label names\n", "decoded_label = label_encoder.inverse_transform(predicted_label)\n", "print(\"Predicted Label:\", decoded_label[0])\n" ] } ], "metadata": { "colab": { "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 0 }