{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "BO7MEGbb6mtB" }, "source": [ "# Finetune \n", "Finetuning RuGPTs model with huggingface.\n", "\n", "## Install env" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true, "id": "Xyhc5yrzR75j" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Cloning into 'transformers'...\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Defaulting to user installation because normal site-packages is not writeable\n", "Processing /home/kamil/Documents/SHAD/ML/Part 2/Seminar 7/transformers\n", " Installing build dependencies: started\n", " Installing build dependencies: finished with status 'done'\n", " Getting requirements to build wheel: started\n", " Getting requirements to build wheel: finished with status 'done'\n", " Preparing metadata (pyproject.toml): started\n", " Preparing metadata (pyproject.toml): finished with status 'done'\n", "Requirement already satisfied: tqdm>=4.27 in /home/kamil/.local/lib/python3.10/site-packages (from transformers==4.29.0.dev0) (4.65.0)\n", "Requirement already satisfied: regex!=2019.12.17 in /home/kamil/.local/lib/python3.10/site-packages (from transformers==4.29.0.dev0) (2023.3.23)\n", "Requirement already satisfied: requests in /home/kamil/.local/lib/python3.10/site-packages (from transformers==4.29.0.dev0) (2.28.2)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/lib/python3/dist-packages (from transformers==4.29.0.dev0) (5.4.1)\n", "Requirement already satisfied: filelock in /home/kamil/.local/lib/python3.10/site-packages (from transformers==4.29.0.dev0) (3.10.6)\n", "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /home/kamil/.local/lib/python3.10/site-packages (from transformers==4.29.0.dev0) (0.13.3)\n", "Requirement already satisfied: packaging>=20.0 in /usr/lib/python3/dist-packages (from transformers==4.29.0.dev0) (21.3)\n", "Requirement already satisfied: numpy>=1.17 in /usr/lib/python3/dist-packages (from transformers==4.29.0.dev0) (1.21.5)\n", "Requirement already satisfied: huggingface-hub<1.0,>=0.11.0 in /home/kamil/.local/lib/python3.10/site-packages (from transformers==4.29.0.dev0) (0.13.4)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/kamil/.local/lib/python3.10/site-packages (from huggingface-hub<1.0,>=0.11.0->transformers==4.29.0.dev0) (4.5.0)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests->transformers==4.29.0.dev0) (2020.6.20)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /home/kamil/.local/lib/python3.10/site-packages (from requests->transformers==4.29.0.dev0) (3.0.1)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/lib/python3/dist-packages (from requests->transformers==4.29.0.dev0) (1.26.5)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests->transformers==4.29.0.dev0) (3.3)\n", "Building wheels for collected packages: transformers\n", " Building wheel for transformers (pyproject.toml): started\n", " Building wheel for transformers (pyproject.toml): finished with status 'done'\n", " Created wheel for transformers: filename=transformers-4.29.0.dev0-py3-none-any.whl size=6929166 sha256=280057264eb46bc68355d5c5a1a4d2caff1da9951d55bacbaa62463cbf73296c\n", " Stored in directory: /tmp/pip-ephem-wheel-cache-xt8a8mve/wheels/a5/d3/d1/e281e4412399bfd2f44bb86274ac4204a7d53b596a501f2ad1\n", "Successfully built transformers\n", "Installing collected packages: transformers\n", " Attempting uninstall: transformers\n", " Found existing installation: transformers 4.27.4\n", " Uninstalling transformers-4.27.4:\n", " Successfully uninstalled transformers-4.27.4\n", "Successfully installed transformers-4.29.0.dev0\n" ] } ], "source": [ "%%bash\n", "git clone https://github.com/huggingface/transformers\n", "cd transformers\n", "pip install ." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true, "id": "Os4vOL5LTOmk" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Defaulting to user installation because normal site-packages is not writeable\n", "Collecting datasets\n", " Downloading datasets-2.11.0-py3-none-any.whl (468 kB)\n", "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m468.7/468.7 kB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m[36m0:00:01\u001b[0mm eta \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/lib/python3/dist-packages (from datasets) (1.21.5)\n", "Requirement already satisfied: pyarrow>=8.0.0 in /home/kamil/.local/lib/python3.10/site-packages (from datasets) (11.0.0)\n", "Requirement already satisfied: tqdm>=4.62.1 in /home/kamil/.local/lib/python3.10/site-packages (from datasets) (4.65.0)\n", "Requirement already satisfied: packaging in /usr/lib/python3/dist-packages (from datasets) (21.3)\n", "Collecting dill<0.3.7,>=0.3.0\n", " Downloading dill-0.3.6-py3-none-any.whl (110 kB)\n", "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m110.5/110.5 kB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hRequirement already satisfied: pandas in /home/kamil/.local/lib/python3.10/site-packages (from datasets) (1.5.3)\n", "Requirement already satisfied: huggingface-hub<1.0.0,>=0.11.0 in /home/kamil/.local/lib/python3.10/site-packages (from datasets) (0.13.4)\n", "Collecting aiohttp\n", " Downloading aiohttp-3.8.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)\n", "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m9.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m0m eta \u001b[36m0:00:01\u001b[0m0:01\u001b[0m\n", "\u001b[?25hRequirement already satisfied: requests>=2.19.0 in /home/kamil/.local/lib/python3.10/site-packages (from datasets) (2.28.2)\n", "Collecting fsspec[http]>=2021.11.1\n", " Downloading fsspec-2023.4.0-py3-none-any.whl (153 kB)\n", "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m154.0/154.0 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hRequirement already satisfied: pyyaml>=5.1 in /usr/lib/python3/dist-packages (from datasets) (5.4.1)\n", "Collecting xxhash\n", " Downloading xxhash-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)\n", "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.5/212.5 kB\u001b[0m \u001b[31m4.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m31m10.9 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hCollecting responses<0.19\n", " Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n", "Collecting multiprocess\n", " Downloading multiprocess-0.70.14-py310-none-any.whl (134 kB)\n", "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.3/134.3 kB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hRequirement already satisfied: charset-normalizer<4.0,>=2.0 in /home/kamil/.local/lib/python3.10/site-packages (from aiohttp->datasets) (3.0.1)\n", "Collecting multidict<7.0,>=4.5\n", " Downloading multidict-6.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (114 kB)\n", "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m114.5/114.5 kB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m[36m0:00:01\u001b[0m\n", "\u001b[?25hRequirement already satisfied: attrs>=17.3.0 in /usr/lib/python3/dist-packages (from aiohttp->datasets) (21.2.0)\n", "Collecting async-timeout<5.0,>=4.0.0a3\n", " Downloading async_timeout-4.0.2-py3-none-any.whl (5.8 kB)\n", "Collecting frozenlist>=1.1.1\n", " Downloading frozenlist-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (149 kB)\n", "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m149.6/149.6 kB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hCollecting yarl<2.0,>=1.0\n", " Downloading yarl-1.8.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (264 kB)\n", "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m264.0/264.0 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m31m15.4 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hCollecting aiosignal>=1.1.2\n", " Downloading aiosignal-1.3.1-py3-none-any.whl (7.6 kB)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/kamil/.local/lib/python3.10/site-packages (from huggingface-hub<1.0.0,>=0.11.0->datasets) (4.5.0)\n", "Requirement already satisfied: filelock in /home/kamil/.local/lib/python3.10/site-packages (from huggingface-hub<1.0.0,>=0.11.0->datasets) (3.10.6)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/lib/python3/dist-packages (from requests>=2.19.0->datasets) (1.26.5)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests>=2.19.0->datasets) (3.3)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests>=2.19.0->datasets) (2020.6.20)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/lib/python3/dist-packages (from pandas->datasets) (2022.1)\n", "Requirement already satisfied: python-dateutil>=2.8.1 in /home/kamil/.local/lib/python3.10/site-packages (from pandas->datasets) (2.8.2)\n", "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.8.1->pandas->datasets) (1.16.0)\n", "Installing collected packages: xxhash, multidict, fsspec, frozenlist, dill, async-timeout, yarl, responses, multiprocess, aiosignal, aiohttp, datasets\n", "Successfully installed aiohttp-3.8.4 aiosignal-1.3.1 async-timeout-4.0.2 datasets-2.11.0 dill-0.3.6 frozenlist-1.3.3 fsspec-2023.4.0 multidict-6.0.4 multiprocess-0.70.14 responses-0.18.0 xxhash-3.2.0 yarl-1.8.2\n" ] } ], "source": [ "!pip install datasets" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true, "id": "m1P6WSIeTdV5" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Defaulting to user installation because normal site-packages is not writeable\n", "Collecting evaluate\n", " Downloading evaluate-0.4.0-py3-none-any.whl (81 kB)\n", "\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m81.4/81.4 kB\u001b[0m \u001b[31m1.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hRequirement already satisfied: xxhash in /home/kamil/.local/lib/python3.10/site-packages (from evaluate) (3.2.0)\n", "Requirement already satisfied: fsspec[http]>=2021.05.0 in /home/kamil/.local/lib/python3.10/site-packages (from evaluate) (2023.4.0)\n", "Requirement already satisfied: responses<0.19 in /home/kamil/.local/lib/python3.10/site-packages (from evaluate) (0.18.0)\n", "Requirement already satisfied: packaging in /usr/lib/python3/dist-packages (from evaluate) (21.3)\n", "Requirement already satisfied: tqdm>=4.62.1 in /home/kamil/.local/lib/python3.10/site-packages (from evaluate) (4.65.0)\n", "Requirement already satisfied: huggingface-hub>=0.7.0 in /home/kamil/.local/lib/python3.10/site-packages (from evaluate) (0.13.4)\n", "Requirement already satisfied: numpy>=1.17 in /usr/lib/python3/dist-packages (from evaluate) (1.21.5)\n", "Requirement already satisfied: dill in /home/kamil/.local/lib/python3.10/site-packages (from evaluate) (0.3.6)\n", "Requirement already satisfied: requests>=2.19.0 in /home/kamil/.local/lib/python3.10/site-packages (from evaluate) (2.28.2)\n", "Requirement already satisfied: datasets>=2.0.0 in /home/kamil/.local/lib/python3.10/site-packages (from evaluate) (2.11.0)\n", "Requirement already satisfied: multiprocess in /home/kamil/.local/lib/python3.10/site-packages (from evaluate) (0.70.14)\n", "Requirement already satisfied: pandas in /home/kamil/.local/lib/python3.10/site-packages (from evaluate) (1.5.3)\n", "Requirement already satisfied: aiohttp in /home/kamil/.local/lib/python3.10/site-packages (from datasets>=2.0.0->evaluate) (3.8.4)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/lib/python3/dist-packages (from datasets>=2.0.0->evaluate) (5.4.1)\n", "Requirement already satisfied: pyarrow>=8.0.0 in /home/kamil/.local/lib/python3.10/site-packages (from datasets>=2.0.0->evaluate) (11.0.0)\n", "Requirement already satisfied: filelock in /home/kamil/.local/lib/python3.10/site-packages (from huggingface-hub>=0.7.0->evaluate) (3.10.6)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/kamil/.local/lib/python3.10/site-packages (from huggingface-hub>=0.7.0->evaluate) (4.5.0)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /home/kamil/.local/lib/python3.10/site-packages (from requests>=2.19.0->evaluate) (3.0.1)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/lib/python3/dist-packages (from requests>=2.19.0->evaluate) (1.26.5)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests>=2.19.0->evaluate) (2020.6.20)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests>=2.19.0->evaluate) (3.3)\n", "Requirement already satisfied: python-dateutil>=2.8.1 in /home/kamil/.local/lib/python3.10/site-packages (from pandas->evaluate) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/lib/python3/dist-packages (from pandas->evaluate) (2022.1)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /home/kamil/.local/lib/python3.10/site-packages (from aiohttp->datasets>=2.0.0->evaluate) (1.3.1)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /home/kamil/.local/lib/python3.10/site-packages (from aiohttp->datasets>=2.0.0->evaluate) (1.3.3)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /home/kamil/.local/lib/python3.10/site-packages (from aiohttp->datasets>=2.0.0->evaluate) (6.0.4)\n", "Requirement already satisfied: attrs>=17.3.0 in /usr/lib/python3/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (21.2.0)\n", "Requirement already satisfied: yarl<2.0,>=1.0 in /home/kamil/.local/lib/python3.10/site-packages (from aiohttp->datasets>=2.0.0->evaluate) (1.8.2)\n", "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /home/kamil/.local/lib/python3.10/site-packages (from aiohttp->datasets>=2.0.0->evaluate) (4.0.2)\n", "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.8.1->pandas->evaluate) (1.16.0)\n", "Installing collected packages: evaluate\n", "Successfully installed evaluate-0.4.0\n" ] } ], "source": [ "!pip install evaluate" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "id": "WJZtWu8u6nwL" }, "outputs": [], "source": [ "!mkdir models/" ] }, { "cell_type": "markdown", "metadata": { "id": "WqwZiumW8WbZ" }, "source": [ "## Download files" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": true, "id": "j51bKtQW6nyY" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--2023-04-16 19:47:12-- https://www.dropbox.com/s/oa3v9c7g9bp40xw/train.txt?dl=0\n", "Resolving www.dropbox.com (www.dropbox.com)... 162.125.70.18, 2620:100:6027:18::a27d:4812\n", "Connecting to www.dropbox.com (www.dropbox.com)|162.125.70.18|:443... connected.\n", "HTTP request sent, awaiting response... 302 Found\n", "Location: /s/raw/oa3v9c7g9bp40xw/train.txt [following]\n", "--2023-04-16 19:47:13-- https://www.dropbox.com/s/raw/oa3v9c7g9bp40xw/train.txt\n", "Reusing existing connection to www.dropbox.com:443.\n", "HTTP request sent, awaiting response... 302 Found\n", "Location: https://uc5788429f15c026c306ed6aa7c0.dl.dropboxusercontent.com/cd/0/inline/B6QRy9JQtzcR-y7uMF3TBS26D_9WsPQhmzXoWmGuHLgFMVq5YeUy4XIvymTf-coW8njd463mquV6DZB7LKdlznygflsCZHNIJ0A8Hf_yyRl2y5rb63wSIyvyBbANSc5DBKvhD4HSmZ-G8GDlRmEf3CXz-PP4jpoQFXwvDZCbIGlStw/file# [following]\n", "--2023-04-16 19:47:13-- https://uc5788429f15c026c306ed6aa7c0.dl.dropboxusercontent.com/cd/0/inline/B6QRy9JQtzcR-y7uMF3TBS26D_9WsPQhmzXoWmGuHLgFMVq5YeUy4XIvymTf-coW8njd463mquV6DZB7LKdlznygflsCZHNIJ0A8Hf_yyRl2y5rb63wSIyvyBbANSc5DBKvhD4HSmZ-G8GDlRmEf3CXz-PP4jpoQFXwvDZCbIGlStw/file\n", "Resolving uc5788429f15c026c306ed6aa7c0.dl.dropboxusercontent.com (uc5788429f15c026c306ed6aa7c0.dl.dropboxusercontent.com)... 162.125.70.15, 2620:100:6028:15::a27d:470f\n", "Connecting to uc5788429f15c026c306ed6aa7c0.dl.dropboxusercontent.com (uc5788429f15c026c306ed6aa7c0.dl.dropboxusercontent.com)|162.125.70.15|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 1654900 (1,6M) [text/plain]\n", "Saving to: ‘train.txt’\n", "\n", "train.txt 100%[===================>] 1,58M 8,43MB/s in 0,2s \n", "\n", "2023-04-16 19:47:14 (8,43 MB/s) - ‘train.txt’ saved [1654900/1654900]\n", "\n", "--2023-04-16 19:47:14-- https://www.dropbox.com/s/mworl3ld6r3bg62/valid.txt?dl=0\n", "Resolving www.dropbox.com (www.dropbox.com)... 162.125.70.18, 2620:100:6027:18::a27d:4812\n", "Connecting to www.dropbox.com (www.dropbox.com)|162.125.70.18|:443... connected.\n", "HTTP request sent, awaiting response... 302 Found\n", "Location: /s/raw/mworl3ld6r3bg62/valid.txt [following]\n", "--2023-04-16 19:47:14-- https://www.dropbox.com/s/raw/mworl3ld6r3bg62/valid.txt\n", "Reusing existing connection to www.dropbox.com:443.\n", "HTTP request sent, awaiting response... 302 Found\n", "Location: https://uc5ee48fa1d36195fd1fe094947e.dl.dropboxusercontent.com/cd/0/inline/B6QZm3htPxEoOiKlbNIGQz27I0gnkhm3CfT9DoU9qR3VUmFjo8_GWcsquYc01t4LT6WYRj4t70Sw9Z9DhdBPq4ZFpgiGfN4TyCf4Hav48iIButfo1Aaa31uqnVavn3dRVXKM2CZ5ewiMDDEGDexFnB-ZPHZyomgPCjDRtkdkMvfP7g/file# [following]\n", "--2023-04-16 19:47:15-- https://uc5ee48fa1d36195fd1fe094947e.dl.dropboxusercontent.com/cd/0/inline/B6QZm3htPxEoOiKlbNIGQz27I0gnkhm3CfT9DoU9qR3VUmFjo8_GWcsquYc01t4LT6WYRj4t70Sw9Z9DhdBPq4ZFpgiGfN4TyCf4Hav48iIButfo1Aaa31uqnVavn3dRVXKM2CZ5ewiMDDEGDexFnB-ZPHZyomgPCjDRtkdkMvfP7g/file\n", "Resolving uc5ee48fa1d36195fd1fe094947e.dl.dropboxusercontent.com (uc5ee48fa1d36195fd1fe094947e.dl.dropboxusercontent.com)... 162.125.70.15, 2620:100:6026:15::a27d:460f\n", "Connecting to uc5ee48fa1d36195fd1fe094947e.dl.dropboxusercontent.com (uc5ee48fa1d36195fd1fe094947e.dl.dropboxusercontent.com)|162.125.70.15|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 167021 (163K) [text/plain]\n", "Saving to: ‘valid.txt’\n", "\n", "valid.txt 100%[===================>] 163,11K --.-KB/s in 0,08s \n", "\n", "2023-04-16 19:47:15 (2,02 MB/s) - ‘valid.txt’ saved [167021/167021]\n", "\n" ] } ], "source": [ "!wget -O train.txt https://www.dropbox.com/s/oa3v9c7g9bp40xw/train.txt?dl=0\n", "!wget -O valid.txt https://www.dropbox.com/s/mworl3ld6r3bg62/valid.txt?dl=0" ] }, { "cell_type": "markdown", "metadata": { "id": "zoyX62qN_38l" }, "source": [ "## Train \n", "The following code download model and tokenizer from huggingface and finetune model for generating essays." ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": true, "id": "OCIERP8AS1Dl" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "04/16/2023 19:47:40 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n", "04/16/2023 19:47:40 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n", "_n_gpu=1,\n", "adafactor=False,\n", "adam_beta1=0.9,\n", "adam_beta2=0.999,\n", "adam_epsilon=1e-08,\n", "auto_find_batch_size=False,\n", "bf16=False,\n", "bf16_full_eval=False,\n", "data_seed=None,\n", "dataloader_drop_last=False,\n", "dataloader_num_workers=0,\n", "dataloader_pin_memory=True,\n", "ddp_bucket_cap_mb=None,\n", "ddp_find_unused_parameters=None,\n", "ddp_timeout=1800,\n", "debug=[],\n", "deepspeed=None,\n", "disable_tqdm=False,\n", "do_eval=True,\n", "do_predict=False,\n", "do_train=True,\n", "eval_accumulation_steps=None,\n", "eval_delay=0,\n", "eval_steps=None,\n", "evaluation_strategy=no,\n", "fp16=False,\n", "fp16_backend=auto,\n", "fp16_full_eval=False,\n", "fp16_opt_level=O1,\n", "fsdp=[],\n", "fsdp_config={'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False},\n", "fsdp_min_num_params=0,\n", "fsdp_transformer_layer_cls_to_wrap=None,\n", "full_determinism=False,\n", "gradient_accumulation_steps=1,\n", "gradient_checkpointing=False,\n", "greater_is_better=None,\n", "group_by_length=False,\n", "half_precision_backend=auto,\n", "hub_model_id=None,\n", "hub_private_repo=False,\n", "hub_strategy=every_save,\n", "hub_token=,\n", "ignore_data_skip=False,\n", "include_inputs_for_metrics=False,\n", "jit_mode_eval=False,\n", "label_names=None,\n", "label_smoothing_factor=0.0,\n", "learning_rate=5e-05,\n", "length_column_name=length,\n", "load_best_model_at_end=False,\n", "local_rank=-1,\n", "log_level=passive,\n", "log_level_replica=warning,\n", "log_on_each_node=True,\n", "logging_dir=models/essays2/runs/Apr16_19-47-40_kamil-desktop,\n", "logging_first_step=False,\n", "logging_nan_inf_filter=True,\n", "logging_steps=500,\n", "logging_strategy=steps,\n", "lr_scheduler_type=linear,\n", "max_grad_norm=1.0,\n", "max_steps=-1,\n", "metric_for_best_model=None,\n", "mp_parameters=,\n", "no_cuda=False,\n", "num_train_epochs=3.0,\n", "optim=adamw_hf,\n", "optim_args=None,\n", "output_dir=models/essays2,\n", "overwrite_output_dir=False,\n", "past_index=-1,\n", "per_device_eval_batch_size=1,\n", "per_device_train_batch_size=1,\n", "prediction_loss_only=False,\n", "push_to_hub=False,\n", "push_to_hub_model_id=None,\n", "push_to_hub_organization=None,\n", "push_to_hub_token=,\n", "ray_scope=last,\n", "remove_unused_columns=True,\n", "report_to=[],\n", "resume_from_checkpoint=None,\n", "run_name=models/essays2,\n", "save_on_each_node=False,\n", "save_safetensors=False,\n", "save_steps=500,\n", "save_strategy=steps,\n", "save_total_limit=None,\n", "seed=42,\n", "sharded_ddp=[],\n", "skip_memory_metrics=True,\n", "tf32=None,\n", "torch_compile=False,\n", "torch_compile_backend=None,\n", "torch_compile_mode=None,\n", "torchdynamo=None,\n", "tpu_metrics_debug=False,\n", "tpu_num_cores=None,\n", "use_ipex=False,\n", "use_legacy_prediction_loop=False,\n", "use_mps_device=False,\n", "warmup_ratio=0.0,\n", "warmup_steps=0,\n", "weight_decay=0.0,\n", "xpu_backend=None,\n", ")\n", "04/16/2023 19:47:40 - INFO - datasets.builder - Using custom data configuration default-94a5e2bc6bcfdc2e\n", "04/16/2023 19:47:40 - INFO - datasets.info - Loading Dataset Infos from /home/kamil/.local/lib/python3.10/site-packages/datasets/packaged_modules/text\n", "04/16/2023 19:47:40 - INFO - datasets.builder - Generating dataset text (/home/kamil/.cache/huggingface/datasets/text/default-94a5e2bc6bcfdc2e/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2)\n", "Downloading and preparing dataset text/default to /home/kamil/.cache/huggingface/datasets/text/default-94a5e2bc6bcfdc2e/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2...\n", "Downloading data files: 100%|██████████████████| 2/2 [00:00<00:00, 18517.90it/s]\n", "04/16/2023 19:47:40 - INFO - datasets.download.download_manager - Downloading took 0.0 min\n", "04/16/2023 19:47:40 - INFO - datasets.download.download_manager - Checksum Computation took 0.0 min\n", "Extracting data files: 100%|█████████████████████| 2/2 [00:00<00:00, 228.71it/s]\n", "04/16/2023 19:47:40 - INFO - datasets.builder - Generating train split\n", "04/16/2023 19:47:40 - INFO - datasets.builder - Generating validation split\n", "04/16/2023 19:47:40 - INFO - datasets.utils.info_utils - Unable to verify splits sizes.\n", "Dataset text downloaded and prepared to /home/kamil/.cache/huggingface/datasets/text/default-94a5e2bc6bcfdc2e/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2. Subsequent calls will reuse this data.\n", "100%|███████████████████████████████████████████| 2/2 [00:00<00:00, 1228.20it/s]\n", "Downloading (…)lve/main/config.json: 100%|██████| 608/608 [00:00<00:00, 832kB/s]\n", "[INFO|configuration_utils.py:668] 2023-04-16 19:47:41,750 >> loading configuration file config.json from cache at /home/kamil/.cache/huggingface/hub/models--sberbank-ai--rugpt3small_based_on_gpt2/snapshots/d64244b316057f71e745cc92be1dcfe7853d9d18/config.json\n", "[INFO|configuration_utils.py:720] 2023-04-16 19:47:41,751 >> Model config GPT2Config {\n", " \"_name_or_path\": \"sberbank-ai/rugpt3small_based_on_gpt2\",\n", " \"activation_function\": \"gelu_new\",\n", " \"architectures\": [\n", " \"GPT2LMHeadModel\"\n", " ],\n", " \"attn_pdrop\": 0.1,\n", " \"bos_token_id\": 50256,\n", " \"embd_pdrop\": 0.1,\n", " \"eos_token_id\": 50256,\n", " \"gradient_checkpointing\": false,\n", " \"initializer_range\": 0.02,\n", " \"layer_norm_epsilon\": 1e-05,\n", " \"model_type\": \"gpt2\",\n", " \"n_ctx\": 2048,\n", " \"n_embd\": 768,\n", " \"n_head\": 12,\n", " \"n_inner\": null,\n", " \"n_layer\": 12,\n", " \"n_positions\": 2048,\n", " \"reorder_and_upcast_attn\": false,\n", " \"resid_pdrop\": 0.1,\n", " \"scale_attn_by_inverse_layer_idx\": false,\n", " \"scale_attn_weights\": true,\n", " \"summary_activation\": null,\n", " \"summary_first_dropout\": 0.1,\n", " \"summary_proj_to_labels\": true,\n", " \"summary_type\": \"cls_index\",\n", " \"summary_use_proj\": true,\n", " \"transformers_version\": \"4.29.0.dev0\",\n", " \"use_cache\": true,\n", " \"vocab_size\": 50264\n", "}\n", "\n", "[INFO|tokenization_auto.py:502] 2023-04-16 19:47:42,302 >> Could not locate the tokenizer configuration file, will try to use the model config instead.\n", "[INFO|configuration_utils.py:668] 2023-04-16 19:47:42,851 >> loading configuration file config.json from cache at /home/kamil/.cache/huggingface/hub/models--sberbank-ai--rugpt3small_based_on_gpt2/snapshots/d64244b316057f71e745cc92be1dcfe7853d9d18/config.json\n", "[INFO|configuration_utils.py:720] 2023-04-16 19:47:42,852 >> Model config GPT2Config {\n", " \"_name_or_path\": \"sberbank-ai/rugpt3small_based_on_gpt2\",\n", " \"activation_function\": \"gelu_new\",\n", " \"architectures\": [\n", " \"GPT2LMHeadModel\"\n", " ],\n", " \"attn_pdrop\": 0.1,\n", " \"bos_token_id\": 50256,\n", " \"embd_pdrop\": 0.1,\n", " \"eos_token_id\": 50256,\n", " \"gradient_checkpointing\": false,\n", " \"initializer_range\": 0.02,\n", " \"layer_norm_epsilon\": 1e-05,\n", " \"model_type\": \"gpt2\",\n", " \"n_ctx\": 2048,\n", " \"n_embd\": 768,\n", " \"n_head\": 12,\n", " \"n_inner\": null,\n", " \"n_layer\": 12,\n", " \"n_positions\": 2048,\n", " \"reorder_and_upcast_attn\": false,\n", " \"resid_pdrop\": 0.1,\n", " \"scale_attn_by_inverse_layer_idx\": false,\n", " \"scale_attn_weights\": true,\n", " \"summary_activation\": null,\n", " \"summary_first_dropout\": 0.1,\n", " \"summary_proj_to_labels\": true,\n", " \"summary_type\": \"cls_index\",\n", " \"summary_use_proj\": true,\n", " \"transformers_version\": \"4.29.0.dev0\",\n", " \"use_cache\": true,\n", " \"vocab_size\": 50264\n", "}\n", "\n", "Downloading (…)olve/main/vocab.json: 100%|█| 1.71M/1.71M [00:00<00:00, 3.73MB/s]\n", "Downloading (…)olve/main/merges.txt: 100%|█| 1.27M/1.27M [00:00<00:00, 5.74MB/s]\n", "[INFO|tokenization_utils_base.py:1809] 2023-04-16 19:47:47,652 >> loading file vocab.json from cache at /home/kamil/.cache/huggingface/hub/models--sberbank-ai--rugpt3small_based_on_gpt2/snapshots/d64244b316057f71e745cc92be1dcfe7853d9d18/vocab.json\n", "[INFO|tokenization_utils_base.py:1809] 2023-04-16 19:47:47,652 >> loading file merges.txt from cache at /home/kamil/.cache/huggingface/hub/models--sberbank-ai--rugpt3small_based_on_gpt2/snapshots/d64244b316057f71e745cc92be1dcfe7853d9d18/merges.txt\n", "[INFO|tokenization_utils_base.py:1809] 2023-04-16 19:47:47,652 >> loading file tokenizer.json from cache at None\n", "[INFO|tokenization_utils_base.py:1809] 2023-04-16 19:47:47,652 >> loading file added_tokens.json from cache at None\n", "[INFO|tokenization_utils_base.py:1809] 2023-04-16 19:47:47,652 >> loading file special_tokens_map.json from cache at None\n", "[INFO|tokenization_utils_base.py:1809] 2023-04-16 19:47:47,652 >> loading file tokenizer_config.json from cache at None\n", "[INFO|configuration_utils.py:668] 2023-04-16 19:47:47,652 >> loading configuration file config.json from cache at /home/kamil/.cache/huggingface/hub/models--sberbank-ai--rugpt3small_based_on_gpt2/snapshots/d64244b316057f71e745cc92be1dcfe7853d9d18/config.json\n", "[INFO|configuration_utils.py:720] 2023-04-16 19:47:47,653 >> Model config GPT2Config {\n", " \"_name_or_path\": \"sberbank-ai/rugpt3small_based_on_gpt2\",\n", " \"activation_function\": \"gelu_new\",\n", " \"architectures\": [\n", " \"GPT2LMHeadModel\"\n", " ],\n", " \"attn_pdrop\": 0.1,\n", " \"bos_token_id\": 50256,\n", " \"embd_pdrop\": 0.1,\n", " \"eos_token_id\": 50256,\n", " \"gradient_checkpointing\": false,\n", " \"initializer_range\": 0.02,\n", " \"layer_norm_epsilon\": 1e-05,\n", " \"model_type\": \"gpt2\",\n", " \"n_ctx\": 2048,\n", " \"n_embd\": 768,\n", " \"n_head\": 12,\n", " \"n_inner\": null,\n", " \"n_layer\": 12,\n", " \"n_positions\": 2048,\n", " \"reorder_and_upcast_attn\": false,\n", " \"resid_pdrop\": 0.1,\n", " \"scale_attn_by_inverse_layer_idx\": false,\n", " \"scale_attn_weights\": true,\n", " \"summary_activation\": null,\n", " \"summary_first_dropout\": 0.1,\n", " \"summary_proj_to_labels\": true,\n", " \"summary_type\": \"cls_index\",\n", " \"summary_use_proj\": true,\n", " \"transformers_version\": \"4.29.0.dev0\",\n", " \"use_cache\": true,\n", " \"vocab_size\": 50264\n", "}\n", "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[INFO|configuration_utils.py:668] 2023-04-16 19:47:47,725 >> loading configuration file config.json from cache at /home/kamil/.cache/huggingface/hub/models--sberbank-ai--rugpt3small_based_on_gpt2/snapshots/d64244b316057f71e745cc92be1dcfe7853d9d18/config.json\n", "[INFO|configuration_utils.py:720] 2023-04-16 19:47:47,725 >> Model config GPT2Config {\n", " \"_name_or_path\": \"sberbank-ai/rugpt3small_based_on_gpt2\",\n", " \"activation_function\": \"gelu_new\",\n", " \"architectures\": [\n", " \"GPT2LMHeadModel\"\n", " ],\n", " \"attn_pdrop\": 0.1,\n", " \"bos_token_id\": 50256,\n", " \"embd_pdrop\": 0.1,\n", " \"eos_token_id\": 50256,\n", " \"gradient_checkpointing\": false,\n", " \"initializer_range\": 0.02,\n", " \"layer_norm_epsilon\": 1e-05,\n", " \"model_type\": \"gpt2\",\n", " \"n_ctx\": 2048,\n", " \"n_embd\": 768,\n", " \"n_head\": 12,\n", " \"n_inner\": null,\n", " \"n_layer\": 12,\n", " \"n_positions\": 2048,\n", " \"reorder_and_upcast_attn\": false,\n", " \"resid_pdrop\": 0.1,\n", " \"scale_attn_by_inverse_layer_idx\": false,\n", " \"scale_attn_weights\": true,\n", " \"summary_activation\": null,\n", " \"summary_first_dropout\": 0.1,\n", " \"summary_proj_to_labels\": true,\n", " \"summary_type\": \"cls_index\",\n", " \"summary_use_proj\": true,\n", " \"transformers_version\": \"4.29.0.dev0\",\n", " \"use_cache\": true,\n", " \"vocab_size\": 50264\n", "}\n", "\n", "[WARNING|logging.py:280] 2023-04-16 19:47:47,765 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", "Downloading pytorch_model.bin: 100%|█████████| 551M/551M [00:36<00:00, 15.2MB/s]\n", "[INFO|modeling_utils.py:2534] 2023-04-16 19:48:24,907 >> loading weights file pytorch_model.bin from cache at /home/kamil/.cache/huggingface/hub/models--sberbank-ai--rugpt3small_based_on_gpt2/snapshots/d64244b316057f71e745cc92be1dcfe7853d9d18/pytorch_model.bin\n", "[INFO|configuration_utils.py:575] 2023-04-16 19:48:25,102 >> Generate config GenerationConfig {\n", " \"_from_model_config\": true,\n", " \"bos_token_id\": 50256,\n", " \"eos_token_id\": 50256,\n", " \"transformers_version\": \"4.29.0.dev0\"\n", "}\n", "\n", "[INFO|modeling_utils.py:3190] 2023-04-16 19:48:26,046 >> All model checkpoint weights were used when initializing GPT2LMHeadModel.\n", "\n", "[INFO|modeling_utils.py:3198] 2023-04-16 19:48:26,046 >> All the weights of GPT2LMHeadModel were initialized from the model checkpoint at sberbank-ai/rugpt3small_based_on_gpt2.\n", "If your task is similar to the task the model of the checkpoint was trained on, you can already use GPT2LMHeadModel for predictions without further training.\n", "[INFO|modeling_utils.py:2839] 2023-04-16 19:48:26,570 >> Generation config file not found, using a generation config created from the model config.\n", "Running tokenizer on dataset: 0%| | 0/720 [00:00> ***** Running training *****\n", "[INFO|trainer.py:1770] 2023-04-16 19:48:28,570 >> Num examples = 92\n", "[INFO|trainer.py:1771] 2023-04-16 19:48:28,570 >> Num Epochs = 3\n", "[INFO|trainer.py:1772] 2023-04-16 19:48:28,570 >> Instantaneous batch size per device = 1\n", "[INFO|trainer.py:1773] 2023-04-16 19:48:28,570 >> Total train batch size (w. parallel, distributed & accumulation) = 1\n", "[INFO|trainer.py:1774] 2023-04-16 19:48:28,570 >> Gradient Accumulation steps = 1\n", "[INFO|trainer.py:1775] 2023-04-16 19:48:28,570 >> Total optimization steps = 276\n", "[INFO|trainer.py:1776] 2023-04-16 19:48:28,570 >> Number of trainable parameters = 125,231,616\n", " 0%| | 0/276 [00:00\n", " main()\n", " File \"/home/kamil/Documents/SHAD/ML/Part 2/Seminar 7/run_clm.py\", line 583, in main\n", " train_result = trainer.train(resume_from_checkpoint=checkpoint)\n", " File \"/home/kamil/.local/lib/python3.10/site-packages/transformers/trainer.py\", line 1662, in train\n", " return inner_training_loop(\n", " File \"/home/kamil/.local/lib/python3.10/site-packages/transformers/trainer.py\", line 1929, in _inner_training_loop\n", " tr_loss_step = self.training_step(model, inputs)\n", " File \"/home/kamil/.local/lib/python3.10/site-packages/transformers/trainer.py\", line 2699, in training_step\n", " loss = self.compute_loss(model, inputs)\n", " File \"/home/kamil/.local/lib/python3.10/site-packages/transformers/trainer.py\", line 2731, in compute_loss\n", " outputs = model(**inputs)\n", " File \"/home/kamil/.local/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1501, in _call_impl\n", " return forward_call(*args, **kwargs)\n", " File \"/home/kamil/.local/lib/python3.10/site-packages/transformers/models/gpt2/modeling_gpt2.py\", line 1075, in forward\n", " transformer_outputs = self.transformer(\n", " File \"/home/kamil/.local/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1501, in _call_impl\n", " return forward_call(*args, **kwargs)\n", " File \"/home/kamil/.local/lib/python3.10/site-packages/transformers/models/gpt2/modeling_gpt2.py\", line 899, in forward\n", " outputs = block(\n", " File \"/home/kamil/.local/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1501, in _call_impl\n", " return forward_call(*args, **kwargs)\n", " File \"/home/kamil/.local/lib/python3.10/site-packages/transformers/models/gpt2/modeling_gpt2.py\", line 389, in forward\n", " attn_outputs = self.attn(\n", " File \"/home/kamil/.local/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1501, in _call_impl\n", " return forward_call(*args, **kwargs)\n", " File \"/home/kamil/.local/lib/python3.10/site-packages/transformers/models/gpt2/modeling_gpt2.py\", line 330, in forward\n", " attn_output, attn_weights = self._attn(query, key, value, attention_mask, head_mask)\n", " File \"/home/kamil/.local/lib/python3.10/site-packages/transformers/models/gpt2/modeling_gpt2.py\", line 185, in _attn\n", " attn_weights = attn_weights / torch.full(\n", "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 192.00 MiB (GPU 0; 7.79 GiB total capacity; 6.07 GiB already allocated; 171.81 MiB free; 6.09 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", " 0%| | 0/276 [00:01" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.random.seed(42)\n", "torch.manual_seed(42)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "id": "AkUrzKsy_16F" }, "outputs": [], "source": [ "from transformers import GPT2LMHeadModel, GPT2Tokenizer" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "id": "x_EMbgO0BTvb" }, "outputs": [], "source": [ "tok = GPT2Tokenizer.from_pretrained(\"models/essays\")" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "id": "Fjy0GAuQBYpA" }, "outputs": [], "source": [ "model = GPT2LMHeadModel.from_pretrained(\"models/essays\")" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": true, "id": "irh4H-HDBb6V" }, "outputs": [ { "data": { "text/plain": [ "GPT2LMHeadModel(\n", " (transformer): GPT2Model(\n", " (wte): Embedding(50264, 768)\n", " (wpe): Embedding(2048, 768)\n", " (drop): Dropout(p=0.1, inplace=False)\n", " (h): ModuleList(\n", " (0-11): 12 x GPT2Block(\n", " (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", " (attn): GPT2Attention(\n", " (c_attn): Conv1D()\n", " (c_proj): Conv1D()\n", " (attn_dropout): Dropout(p=0.1, inplace=False)\n", " (resid_dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", " (mlp): GPT2MLP(\n", " (c_fc): Conv1D()\n", " (c_proj): Conv1D()\n", " (act): NewGELUActivation()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " )\n", " )\n", " (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", " )\n", " (lm_head): Linear(in_features=768, out_features=50264, bias=False)\n", ")" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.cuda()" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "id": "hQY6A5q7Bd4O" }, "outputs": [], "source": [ "text = \"Тема: «В чем смысл жизни?»\\nСочинение: \"\n", "inpt = tok.encode(text, return_tensors=\"pt\")" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "id": "1gfJFmeOBj_t" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n", "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n" ] } ], "source": [ "out = model.generate(inpt.cuda(), max_length=200, repetition_penalty=5.0, do_sample=True, top_k=5, top_p=0.95, temperature=1)" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 123 }, "id": "gWZ9SUCxB2Ki", "outputId": "31d8e1a3-376f-4f27-bd11-ba59a44983eb" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Тема: «В чем смысл жизни?»\n", "Сочинение: 📹Как часто в наше время мы слышим фразу \"жить надо так, чтобы было хорошо всем\". Однако не все могут себе позволить жить по-другому. В современном мире многие люди хотят изменить свою жизнь к лучшему и сделать ее комфортной для всех без исключения граждан нашей страны.