Spaces:

alxvlsv
/

emotions

Running

App Files Files Community

alxvlsv commited on Apr 6, 2025

Commit

82c5f87

1 Parent(s): 5210d19

training notebook

Browse files

Files changed (1) hide show

notebooks/emotions_training.ipynb +976 -0

notebooks/emotions_training.ipynb ADDED Viewed

	@@ -0,0 +1,976 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "8d5c6c94-3c83-4252-a1d5-690104ac69d9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"2\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9a42461a-8ea6-4760-822d-48b7f055182e",
+   "metadata": {},
+   "source": [
+    "## Imports"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "0374c250-ffae-4ca4-81de-fc1bdce0c98d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datasets import load_dataset\n",
+    "import datasets\n",
+    "from transformers import pipeline\n",
+    "import torch\n",
+    "from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification\n",
+    "from torch.utils.data import DataLoader\n",
+    "from transformers import Trainer, TrainingArguments\n",
+    "\n",
+    "import numpy as np\n",
+    "from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a9414e7d-1b89-4182-b6e6-e483a475f5e2",
+   "metadata": {},
+   "source": [
+    "## Dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "6c542e4a-61e1-4598-ac08-8a36024e07fd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ds = load_dataset(\"seara/ru_go_emotions\", \"simplified\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "2bc3197f-513a-4267-8db2-d42d71185314",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "DatasetDict({\n",
+       "    train: Dataset({\n",
+       "        features: ['ru_text', 'text', 'labels', 'id'],\n",
+       "        num_rows: 43410\n",
+       "    })\n",
+       "    validation: Dataset({\n",
+       "        features: ['ru_text', 'text', 'labels', 'id'],\n",
+       "        num_rows: 5426\n",
+       "    })\n",
+       "    test: Dataset({\n",
+       "        features: ['ru_text', 'text', 'labels', 'id'],\n",
+       "        num_rows: 5427\n",
+       "    })\n",
+       "})"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ds"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "b0bc35d4-492d-4fa9-9e5d-54495df25429",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'ru_text': Value(dtype='string', id=None),\n",
+       " 'text': Value(dtype='string', id=None),\n",
+       " 'labels': Sequence(feature=ClassLabel(names=['admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism', 'pride', 'realization', 'relief', 'remorse', 'sadness', 'surprise', 'neutral'], id=None), length=-1, id=None),\n",
+       " 'id': Value(dtype='string', id=None)}"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ds['train'].features"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "000a51e0-df27-4a55-94d3-a31e0b0749f7",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'ru_text': 'Моя любимая еда — это все, что мне не приходилось готовить самому.',\n",
+       " 'text': \"My favourite food is anything I didn't have to cook myself.\",\n",
+       " 'labels': [27],\n",
+       " 'id': 'eebbqej'}"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ds['train'][0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "47601982-246d-4815-b8fd-f4dd9ea3b736",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['admiration',\n",
+       " 'amusement',\n",
+       " 'anger',\n",
+       " 'annoyance',\n",
+       " 'approval',\n",
+       " 'caring',\n",
+       " 'confusion',\n",
+       " 'curiosity',\n",
+       " 'desire',\n",
+       " 'disappointment',\n",
+       " 'disapproval',\n",
+       " 'disgust',\n",
+       " 'embarrassment',\n",
+       " 'excitement',\n",
+       " 'fear',\n",
+       " 'gratitude',\n",
+       " 'grief',\n",
+       " 'joy',\n",
+       " 'love',\n",
+       " 'nervousness',\n",
+       " 'optimism',\n",
+       " 'pride',\n",
+       " 'realization',\n",
+       " 'relief',\n",
+       " 'remorse',\n",
+       " 'sadness',\n",
+       " 'surprise',\n",
+       " 'neutral']"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ds['train'].features['labels'].feature.names"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "2ab5e33e-12b2-4739-a7cb-17b494bd1c1c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_classes = len(ds['train'].features['labels'].feature.names)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "938b4f50-7f39-43dc-9c15-9153838dd575",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "28"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "num_classes"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "32b7f3a5-265d-4674-8a12-63c4c88220b3",
+   "metadata": {},
+   "source": [
+    "## Model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "a01edf3f-5af5-495a-b1df-9f0fff586a48",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at DeepPavlov/rubert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# model_name = 'cointegrated/rubert-tiny2'\n",
+    "model_name = 'DeepPavlov/rubert-base-cased'\n",
+    "# model_name = 'distilbert-base-cased'\n",
+    "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
+    "# model = AutoModel.from_pretrained(model_name)\n",
+    "model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_classes, problem_type=\"multi_label_classification\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "939cdacf-8c9a-418c-882e-d494f40bc5c6",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "BertForSequenceClassification(\n",
+       "  (bert): BertModel(\n",
+       "    (embeddings): BertEmbeddings(\n",
+       "      (word_embeddings): Embedding(119547, 768, padding_idx=0)\n",
+       "      (position_embeddings): Embedding(512, 768)\n",
+       "      (token_type_embeddings): Embedding(2, 768)\n",
+       "      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
+       "      (dropout): Dropout(p=0.1, inplace=False)\n",
+       "    )\n",
+       "    (encoder): BertEncoder(\n",
+       "      (layer): ModuleList(\n",
+       "        (0-11): 12 x BertLayer(\n",
+       "          (attention): BertAttention(\n",
+       "            (self): BertSdpaSelfAttention(\n",
+       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "            (output): BertSelfOutput(\n",
+       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
+       "              (dropout): Dropout(p=0.1, inplace=False)\n",
+       "            )\n",
+       "          )\n",
+       "          (intermediate): BertIntermediate(\n",
+       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+       "            (intermediate_act_fn): GELUActivation()\n",
+       "          )\n",
+       "          (output): BertOutput(\n",
+       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+       "            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
+       "            (dropout): Dropout(p=0.1, inplace=False)\n",
+       "          )\n",
+       "        )\n",
+       "      )\n",
+       "    )\n",
+       "    (pooler): BertPooler(\n",
+       "      (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+       "      (activation): Tanh()\n",
+       "    )\n",
+       "  )\n",
+       "  (dropout): Dropout(p=0.1, inplace=False)\n",
+       "  (classifier): Linear(in_features=768, out_features=28, bias=True)\n",
+       ")"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "5828f4c4-f3cc-4bb7-99f1-3f9c001adfeb",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SequenceClassifierOutput(loss=None, logits=tensor([[ 0.1746,  0.0823, -0.0107,  0.0438,  0.1315, -0.0874,  0.0370,  0.0327,\n",
+      "          0.3731, -0.0010,  0.0453,  0.0532, -0.0753, -0.1153, -0.2895,  0.0379,\n",
+      "         -0.1960,  0.0733, -0.0482,  0.0208, -0.1297,  0.0133, -0.0212, -0.0974,\n",
+      "          0.1149,  0.0732,  0.0702, -0.2103],\n",
+      "        [ 0.1693, -0.0349,  0.0288, -0.1285, -0.0371, -0.0007,  0.1751,  0.0494,\n",
+      "          0.2685, -0.1137,  0.0994,  0.0226,  0.0758, -0.0487, -0.0107, -0.0709,\n",
+      "          0.0073, -0.0396,  0.0166,  0.0358,  0.0964, -0.1060,  0.0394,  0.0961,\n",
+      "          0.0808, -0.0306,  0.2214, -0.0157]]), hidden_states=None, attentions=None)\n"
+     ]
+    }
+   ],
+   "source": [
+    "lines = [\n",
+    "    \"Крутая тачка.\",\n",
+    "    \"Моя любимая еда — это все, что мне не приходилось готовить самому.\",\n",
+    "]\n",
+    "\n",
+    "tokens_info = tokenizer(lines, padding=True, truncation=True, return_tensors=\"pt\")\n",
+    "\n",
+    "# прямой проход через модель\n",
+    "with torch.no_grad():\n",
+    "    outputs = model(**tokens_info)\n",
+    "\n",
+    "print(outputs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8a9f349e-847e-47d5-a6c6-8ea4800f86be",
+   "metadata": {},
+   "source": [
+    "## Tokenize"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "96aa6f5f-5c11-4db4-8288-f2a7bcc571b8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def tokenize_function(examples):\n",
+    "    return tokenizer(examples[\"ru_text\"], padding='longest', truncation=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "id": "2ea8b2bf-1a8a-4167-a1a0-1f8ce7b20e94",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def one_hot_labels(example):\n",
+    "    one_hot = [0.0] * num_classes\n",
+    "    for label in example[\"labels\"]:\n",
+    "        one_hot[label] = 1.0\n",
+    "    example[\"labels\"] = one_hot\n",
+    "    return example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "id": "4b408bb0-97f0-42f4-b13b-35b0b3b01506",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "21"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(tokenize_function(ds[\"train\"][2])['input_ids'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "id": "0bfdfddd-6d73-45a1-89f5-221a75f28745",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e8f6a19a7e9449eabd1d62a382d0db96",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map:   0%|          | 0/43410 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a3d27e8f742642c99e27787850ba4bf9",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map:   0%|          | 0/5426 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "95ac2872c042459c992a935a82549f8d",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map:   0%|          | 0/5427 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f9f45cce90b140d09ac6f580d1e4e00e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map:   0%|          | 0/43410 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "740df2a579bb471493de1582891e6e51",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map:   0%|          | 0/5426 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "4f1b63546df94e2e9635fbdcde980b5a",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map:   0%|          | 0/5427 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "tokenized_datasets = ds.map(tokenize_function)\n",
+    "converted_datasets = tokenized_datasets.map(one_hot_labels)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "84d53da8-5364-4079-b9ee-110e7facef96",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a3cbea591a984389b0d147a53d20b65f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Casting the dataset:   0%|          | 0/43410 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d3df1c1449ff4e08a56f18e664c20195",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Casting the dataset:   0%|          | 0/5426 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "43fa8619d62443da92be37fb8c8aada1",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Casting the dataset:   0%|          | 0/5427 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "converted_datasets.set_format(type=\"torch\", columns=[\"input_ids\", \"attention_mask\", \"labels\"])\n",
+    "converted_datasets = converted_datasets.cast_column(\"labels\", datasets.features.Sequence(datasets.Value(\"float32\")))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "id": "64326646-2b8b-40f4-99be-9422b53018e4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_dataset = converted_datasets[\"train\"].shuffle(seed=42)\n",
+    "val_dataset = converted_datasets[\"validation\"].shuffle(seed=42)\n",
+    "test_dataset = converted_datasets[\"test\"].shuffle(seed=42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "eeb7f1cd-6586-40a1-a629-1f2b760fe7d4",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "torch.float32"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train_dataset['labels'][0].dtype"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "id": "1e28ccc5-3fe6-41b3-b515-f92969660249",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'labels': tensor([0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
+       "         0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),\n",
+       " 'input_ids': tensor([   101,  11601,   7363,    128,   1761,  18934,    842,  15991,  47993,\n",
+       "            860,   1703,  38969,  70261,    128,  63935,    128,   8542,   4725,\n",
+       "         106183,  40831,  28231,    845,  10843, 100820,   4346,  89470,    132,\n",
+       "            102]),\n",
+       " 'attention_mask': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+       "         1, 1, 1, 1])}"
+      ]
+     },
+     "execution_count": 40,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train_dataset[4]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "36f67b80-61bc-4147-b817-ce81296d2ecf",
+   "metadata": {},
+   "source": [
+    "## Training"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "id": "b3e4c392-b725-4647-81bf-bc9f14f5c814",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def compute_metrics(eval_pred):\n",
+    "    logits, labels = eval_pred\n",
+    "    probs = 1 / (1 + np.exp(-logits))  # Sigmoid\n",
+    "    preds = (probs > 0.5).astype(int)  # Превращаем в 0/1\n",
+    "\n",
+    "    return {\n",
+    "        \"f1_micro\": f1_score(labels, preds, average=\"micro\"),\n",
+    "        \"f1_macro\": f1_score(labels, preds, average=\"macro\"),\n",
+    "        \"precision\": precision_score(labels, preds, average=\"micro\"),\n",
+    "        \"recall\": recall_score(labels, preds, average=\"micro\"),\n",
+    "        \"accuracy\": accuracy_score(labels, preds)  # Кол-во совпавших полных наборов меток\n",
+    "    }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "9ff1a4e7-a3df-4c4f-bba8-8961cfa1a144",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "training_args = TrainingArguments(\n",
+    "    output_dir=f\"./rubert\",\n",
+    "    overwrite_output_dir=True,\n",
+    "    num_train_epochs=10,\n",
+    "    learning_rate=1e-5,\n",
+    "    lr_scheduler_type=\"cosine\",\n",
+    "    # lr_scheduler_kwargs={},\n",
+    "    warmup_ratio=0.05,\n",
+    "    # warmup_steps=10,\n",
+    "    per_device_train_batch_size=16,\n",
+    "    gradient_accumulation_steps=1,\n",
+    "    log_level=\"error\",\n",
+    "    # logging_dir=\"output_dir/runs/CURRENT_DATETIME_HOSTNAME\"  # логи для tensorboard (default)\n",
+    "    logging_strategy=\"steps\",\n",
+    "    logging_steps=1,\n",
+    "    save_strategy=\"epoch\",\n",
+    "    # save_steps=1,\n",
+    "    save_total_limit=2,\n",
+    "    save_safetensors=True,  # safetensors вместо torch.save / torch.load\n",
+    "    save_only_model=False,  # сохраняем optimizer, shceduler, rng, ...\n",
+    "    use_cpu=False,\n",
+    "    seed=42,\n",
+    "    # bf16=True,  # использовать bf16 вместо fp32\n",
+    "    eval_strategy=\"epoch\",\n",
+    "    # eval_steps=32,\n",
+    "    disable_tqdm=False,\n",
+    "    load_best_model_at_end=False,\n",
+    "    # label_smoothing_factor=0.,\n",
+    "    optim=\"adamw_torch\",\n",
+    "    # optim_args=...,\n",
+    "    # resume_from_checkpoint=...,\n",
+    "    # auto_find_batch_size=...,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "id": "7c33f1b2-cdfa-4295-814f-261f066633c8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import gc\n",
+    "\n",
+    "gc.collect()\n",
+    "torch.cuda.empty_cache()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "id": "a2ac0381-be84-4dd4-94e7-53cfd3bddc63",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='27140' max='27140' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [27140/27140 25:37, Epoch 10/10]\n",
+       "    </div>\n",
+       "    <table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       " <tr style=\"text-align: left;\">\n",
+       "      <th>Epoch</th>\n",
+       "      <th>Training Loss</th>\n",
+       "      <th>Validation Loss</th>\n",
+       "      <th>F1 Micro</th>\n",
+       "      <th>F1 Macro</th>\n",
+       "      <th>Precision</th>\n",
+       "      <th>Recall</th>\n",
+       "      <th>Accuracy</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <td>1</td>\n",
+       "      <td>0.167400</td>\n",
+       "      <td>0.117092</td>\n",
+       "      <td>0.365053</td>\n",
+       "      <td>0.112839</td>\n",
+       "      <td>0.775458</td>\n",
+       "      <td>0.238715</td>\n",
+       "      <td>0.239219</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>2</td>\n",
+       "      <td>0.124500</td>\n",
+       "      <td>0.098455</td>\n",
+       "      <td>0.487169</td>\n",
+       "      <td>0.199681</td>\n",
+       "      <td>0.705830</td>\n",
+       "      <td>0.371944</td>\n",
+       "      <td>0.365647</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>3</td>\n",
+       "      <td>0.069300</td>\n",
+       "      <td>0.094669</td>\n",
+       "      <td>0.524119</td>\n",
+       "      <td>0.308011</td>\n",
+       "      <td>0.688249</td>\n",
+       "      <td>0.423197</td>\n",
+       "      <td>0.404165</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>4</td>\n",
+       "      <td>0.101400</td>\n",
+       "      <td>0.094210</td>\n",
+       "      <td>0.524894</td>\n",
+       "      <td>0.329945</td>\n",
+       "      <td>0.682731</td>\n",
+       "      <td>0.426332</td>\n",
+       "      <td>0.405824</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>5</td>\n",
+       "      <td>0.115100</td>\n",
+       "      <td>0.097984</td>\n",
+       "      <td>0.534122</td>\n",
+       "      <td>0.351584</td>\n",
+       "      <td>0.636659</td>\n",
+       "      <td>0.460031</td>\n",
+       "      <td>0.429414</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>6</td>\n",
+       "      <td>0.030200</td>\n",
+       "      <td>0.101337</td>\n",
+       "      <td>0.527109</td>\n",
+       "      <td>0.364458</td>\n",
+       "      <td>0.626647</td>\n",
+       "      <td>0.454859</td>\n",
+       "      <td>0.423701</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>7</td>\n",
+       "      <td>0.052100</td>\n",
+       "      <td>0.103811</td>\n",
+       "      <td>0.527860</td>\n",
+       "      <td>0.365408</td>\n",
+       "      <td>0.614664</td>\n",
+       "      <td>0.462539</td>\n",
+       "      <td>0.427571</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>8</td>\n",
+       "      <td>0.009300</td>\n",
+       "      <td>0.105722</td>\n",
+       "      <td>0.530681</td>\n",
+       "      <td>0.371352</td>\n",
+       "      <td>0.608722</td>\n",
+       "      <td>0.470376</td>\n",
+       "      <td>0.431810</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>9</td>\n",
+       "      <td>0.008400</td>\n",
+       "      <td>0.107027</td>\n",
+       "      <td>0.531044</td>\n",
+       "      <td>0.374502</td>\n",
+       "      <td>0.606030</td>\n",
+       "      <td>0.472571</td>\n",
+       "      <td>0.432731</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>10</td>\n",
+       "      <td>0.040200</td>\n",
+       "      <td>0.107173</td>\n",
+       "      <td>0.530246</td>\n",
+       "      <td>0.375274</td>\n",
+       "      <td>0.604983</td>\n",
+       "      <td>0.471944</td>\n",
+       "      <td>0.432916</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table><p>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "TrainOutput(global_step=27140, training_loss=0.08366055827060757, metrics={'train_runtime': 1538.1289, 'train_samples_per_second': 282.226, 'train_steps_per_second': 17.645, 'total_flos': 8421320854320816.0, 'train_loss': 0.08366055827060757, 'epoch': 10.0})"
+      ]
+     },
+     "execution_count": 42,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from transformers import DataCollatorWithPadding\n",
+    "\n",
+    "data_collator = DataCollatorWithPadding(tokenizer=tokenizer)\n",
+    "\n",
+    "trainer = Trainer(\n",
+    "    model=model,\n",
+    "    args=training_args,\n",
+    "    train_dataset=train_dataset,\n",
+    "    eval_dataset=val_dataset,\n",
+    "    data_collator=data_collator,\n",
+    "    compute_metrics=compute_metrics,\n",
+    ")\n",
+    "\n",
+    "trainer.train()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ff46bdbd-e945-4abb-a39d-dca292b9856b",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "id": "eaceeaef-7286-48f3-9f79-b35d7d41da23",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tokenizer = AutoTokenizer.from_pretrained('emotions/my_model')\n",
+    "model = AutoModelForSequenceClassification.from_pretrained('emotions/my_model', num_labels=num_classes, problem_type=\"multi_label_classification\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "id": "c3c93f39-b481-4e4a-b1da-dd50f1e94742",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0a8fc03ea3144dc5b2093dcbc5953a57",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model.safetensors:   0%|          | 0.00/712M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b4f26250408b4b4ea332bee596aa14de",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "CommitInfo(commit_url='https://huggingface.co/alxvlsv/rubert-emotions/commit/d958f2338ac01e6fe177f0186124322d6d18114a', commit_message='Upload tokenizer', commit_description='', oid='d958f2338ac01e6fe177f0186124322d6d18114a', pr_url=None, repo_url=RepoUrl('https://huggingface.co/alxvlsv/rubert-emotions', endpoint='https://huggingface.co', repo_type='model', repo_id='alxvlsv/rubert-emotions'), pr_revision=None, pr_num=None)"
+      ]
+     },
+     "execution_count": 50,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.push_to_hub(\"alxvlsv/rubert-emotions\")\n",
+    "tokenizer.push_to_hub(\"alxvlsv/rubert-emotions\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ebb82e42-50c5-4338-ac57-bbffa85c25b1",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python (shad)",
+   "language": "python",
+   "name": "shad"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}