{ "cells": [ { "cell_type": "code", "execution_count": 3, "id": "464f59f6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Defaulting to user installation because normal site-packages is not writeable\n", "Collecting accelerate\n", " Downloading accelerate-0.29.3-py3-none-any.whl (297 kB)\n", "\u001b[K |████████████████████████████████| 297 kB 3.7 MB/s eta 0:00:01\n", "\u001b[?25hRequirement already satisfied: pyyaml in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/pyyaml/5.4.1/lib/python3.9/site-packages (from accelerate) (5.4.1)\n", "Requirement already satisfied: numpy>=1.17 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/MPI/gcc/11.2.0/openmpi/4.1.1/scipy-bundle/2021.10/lib/python3.9/site-packages (from accelerate) (1.21.3)\n", "Requirement already satisfied: psutil in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from accelerate) (5.8.0)\n", "Requirement already satisfied: huggingface-hub in ./.local/lib/python3.9/site-packages (from accelerate) (0.22.2)\n", "Requirement already satisfied: torch>=1.10.0 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/MPI/gcc/11.2.0/openmpi/4.1.1/pytorch/1.13.1-CUDA-11.8.0/lib/python3.9/site-packages (from accelerate) (1.13.1)\n", "Requirement already satisfied: safetensors>=0.3.1 in ./.local/lib/python3.9/site-packages (from accelerate) (0.4.3)\n", "Requirement already satisfied: packaging>=20.0 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from accelerate) (20.9)\n", "Requirement already satisfied: pyparsing>=2.0.2 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from packaging>=20.0->accelerate) (2.4.7)\n", "Requirement already satisfied: typing_extensions in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/typing-extensions/4.3.0/lib/python3.9/site-packages (from torch>=1.10.0->accelerate) (4.3.0)\n", "Requirement already satisfied: tqdm>=4.42.1 in ./.local/lib/python3.9/site-packages (from huggingface-hub->accelerate) (4.66.2)\n", "Requirement already satisfied: filelock in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from huggingface-hub->accelerate) (3.0.12)\n", "Requirement already satisfied: fsspec>=2023.5.0 in ./.local/lib/python3.9/site-packages (from huggingface-hub->accelerate) (2024.3.1)\n", "Requirement already satisfied: requests in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from huggingface-hub->accelerate) (2.26.0)\n", "Requirement already satisfied: idna<4,>=2.5 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from requests->huggingface-hub->accelerate) (3.2)\n", "Requirement already satisfied: charset-normalizer~=2.0.0 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from requests->huggingface-hub->accelerate) (2.0.4)\n", "Requirement already satisfied: certifi>=2017.4.17 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from requests->huggingface-hub->accelerate) (2021.5.30)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from requests->huggingface-hub->accelerate) (1.26.6)\n", "Installing collected packages: accelerate\n", "\u001b[33m WARNING: The scripts accelerate, accelerate-config, accelerate-estimate-memory and accelerate-launch are installed in '/user/bhanucha/.local/bin' which is not on PATH.\n", " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\n", "Successfully installed accelerate-0.29.3\n", "\u001b[33mWARNING: You are using pip version 21.2.2; however, version 24.0 is available.\n", "You should consider upgrading via the '/cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/bin/python -m pip install --upgrade pip' command.\u001b[0m\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "pip install accelerate" ] }, { "cell_type": "code", "execution_count": 4, "id": "008ef190", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Defaulting to user installation because normal site-packages is not writeable\n", "Requirement already satisfied: transformers in ./.local/lib/python3.9/site-packages (4.40.0)\n", "Requirement already satisfied: pyyaml>=5.1 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/pyyaml/5.4.1/lib/python3.9/site-packages (from transformers) (5.4.1)\n", "Requirement already satisfied: regex!=2019.12.17 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from transformers) (2021.8.3)\n", "Requirement already satisfied: requests in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from transformers) (2.26.0)\n", "Requirement already satisfied: safetensors>=0.4.1 in ./.local/lib/python3.9/site-packages (from transformers) (0.4.3)\n", "Requirement already satisfied: numpy>=1.17 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/MPI/gcc/11.2.0/openmpi/4.1.1/scipy-bundle/2021.10/lib/python3.9/site-packages (from transformers) (1.21.3)\n", "Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in ./.local/lib/python3.9/site-packages (from transformers) (0.22.2)\n", "Requirement already satisfied: tqdm>=4.27 in ./.local/lib/python3.9/site-packages (from transformers) (4.66.2)\n", "Requirement already satisfied: tokenizers<0.20,>=0.19 in ./.local/lib/python3.9/site-packages (from transformers) (0.19.1)\n", "Requirement already satisfied: filelock in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from transformers) (3.0.12)\n", "Requirement already satisfied: packaging>=20.0 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from transformers) (20.9)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/typing-extensions/4.3.0/lib/python3.9/site-packages (from huggingface-hub<1.0,>=0.19.3->transformers) (4.3.0)\n", "Requirement already satisfied: fsspec>=2023.5.0 in ./.local/lib/python3.9/site-packages (from huggingface-hub<1.0,>=0.19.3->transformers) (2024.3.1)\n", "Requirement already satisfied: pyparsing>=2.0.2 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from packaging>=20.0->transformers) (2.4.7)\n", "Requirement already satisfied: idna<4,>=2.5 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from requests->transformers) (3.2)\n", "Requirement already satisfied: certifi>=2017.4.17 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from requests->transformers) (2021.5.30)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from requests->transformers) (1.26.6)\n", "Requirement already satisfied: charset-normalizer~=2.0.0 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from requests->transformers) (2.0.4)\n", "\u001b[33mWARNING: You are using pip version 21.2.2; however, version 24.0 is available.\n", "You should consider upgrading via the '/cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/bin/python -m pip install --upgrade pip' command.\u001b[0m\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "pip install transformers" ] }, { "cell_type": "code", "execution_count": 2, "id": "1be7c37e", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-04-21 23:00:27.918259: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX512_VNNI AVX512_BF16 AVX_VNNI AMX_TILE AMX_INT8 AMX_BF16\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", "2024-04-21 23:00:33.682894: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n" ] } ], "source": [ "from transformers import BartTokenizer, BartForConditionalGeneration\n", "import torch\n", "from torch.utils.data import DataLoader, TensorDataset, random_split\n", "from transformers import Trainer, TrainingArguments\n", "from torch.utils.data import Dataset, DataLoader, random_split" ] }, { "cell_type": "code", "execution_count": 3, "id": "976e0258", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Sun Apr 21 23:00:40 2024 \r\n", "+-----------------------------------------------------------------------------+\r\n", "| NVIDIA-SMI 525.89.02 Driver Version: 525.89.02 CUDA Version: 12.0 |\r\n", "|-------------------------------+----------------------+----------------------+\r\n", "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\r\n", "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\r\n", "| | | MIG M. |\r\n", "|===============================+======================+======================|\r\n", "| 0 NVIDIA H100 PCIe On | 00000000:B5:00.0 Off | 0 |\r\n", "| N/A 39C P0 52W / 350W | 0MiB / 81559MiB | 0% Default |\r\n", "| | | Disabled |\r\n", "+-------------------------------+----------------------+----------------------+\r\n", " \r\n", "+-----------------------------------------------------------------------------+\r\n", "| Processes: |\r\n", "| GPU GI CI PID Type Process name GPU Memory |\r\n", "| ID ID Usage |\r\n", "|=============================================================================|\r\n", "| No running processes found |\r\n", "+-----------------------------------------------------------------------------+\r\n" ] } ], "source": [ "!nvidia-smi\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "c1bd2d96", "metadata": {}, "outputs": [], "source": [ "import os\n", "import numpy as np\n", "import gc" ] }, { "cell_type": "code", "execution_count": 5, "id": "4ab4a4db", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['logs', 'logsh', 'results', 'resultsh', 'train_data_v2.npy']" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "os.listdir('/projects/academic/courses/cse676s24/bhanucha')" ] }, { "cell_type": "code", "execution_count": 6, "id": "c23d50ec", "metadata": {}, "outputs": [], "source": [ "train_data_path = '/projects/academic/courses/cse676s24/bhanucha/train_data_v2.npy'\n", "train_data = np.load(train_data_path, mmap_mode='r')" ] }, { "cell_type": "code", "execution_count": 7, "id": "2baa2aeb", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Data type: \n", "Data shape: (1405634, 512)\n", "Contents of the array: [[ 0 48539 35 22 134 740 4 10523 6515 6219 4696 1297\n", " 22 134 73 176 740 4 27805 1070 5803 1297 22 134\n", " 73 176 26141 4 21857 1297 22 134 73 176 740 4\n", " 3187 15092 36 26512 1253 45894 22 176 255 39596 4 9050\n", " 50 31417 27323 1297 22 246 112 73 176 740 4 10970\n", " 1836 30274 7666 31729 113 13497 35 440 12 387 5113 14208\n", " 41200 38490 35 22 1121 10 2016 132 12 45252 8929 12560\n", " 6 3344 6219 4696 6 15092 6 27805 1070 5803 8 9050\n", " 50 31417 27323 45863 22 5320 853 81 4761 2859 454 12652\n", " 26054 70 81 299 45863 22 387 14189 8 14351 195 728\n", " 55 4 4624 160 2859 45863 22 5320 853 11 21857 8\n", " 25629 131 3344 157 45863 22 36949 132 37031 6 1874 8\n", " 3989 88 389 28255 15 19957 2225 45863 22 7939 1413 454\n", " 933 6 59 389 728 72 2 1 1 1 1 1\n", " 1 1 1 1 1 1 1 1 1 1 1 1\n", " 1 1 1 1 1 1 1 1 1 1 1 1\n", " 1 1 1 1 1 1 1 1 1 1 1 1\n", " 1 1 1 1 1 1 1 1 1 1 1 1\n", " 1 1 1 1 1 1 1 1 1 1 1 1\n", " 1 1 1 1 1 1 1 1 1 1 1 1\n", " 1 1 1 1 1 1 1 1 1 1 1 1\n", " 1 1 1 1 1 1 1 1 1 1 1 1\n", " 1 1 1 1 1 1 1 1 1 1 1 1\n", " 1 1 1 1 1 1 1 1 1 1 1 1\n", " 1 1 1 1 1 1 1 1 1 1 1 1\n", " 1 1 1 1 1 1 1 1 1 1 1 1\n", " 1 1 1 1 1 1 1 1 1 1 1 1\n", " 1 1 1 1 1 1 1 1 1 1 1 1\n", " 1 1 1 1 1 1 1 1 1 1 1 1\n", " 1 1 1 1 1 1 1 1 1 1 1 1\n", " 1 1 1 1 1 1 1 1 1 1 1 1\n", " 1 1 1 1 1 1 1 1 1 1 1 1\n", " 1 1 1 1 1 1 1 1 1 1 1 1\n", " 1 1 1 1 1 1 1 1 1 1 1 1\n", " 1 1 1 1 1 1 1 1 1 1 1 1\n", " 1 1 1 1 1 1 1 1 1 1 1 1\n", " 1 1 1 1 1 1 1 1 1 1 1 1\n", " 1 1 1 1 1 1 1 1 1 1 1 1\n", " 1 1 1 1 1 1 1 1 1 1 1 1\n", " 1 1 1 1 1 1 1 1 1 1 1 1\n", " 1 1 1 1 1 1 1 1 1 1 1 1\n", " 1 1 1 1 1 1 1 1 1 1 1 1\n", " 1 1 1 1 1 1 1 1]]\n" ] } ], "source": [ "print(\"Data type:\", type(train_data))\n", "print(\"Data shape:\", train_data.shape)\n", "if isinstance(train_data, np.ndarray) and train_data.dtype.names is not None:\n", " print(\"Data field names:\", train_data.dtype.names)\n", "else:\n", " print(\"Contents of the array:\", train_data[:1])" ] }, { "cell_type": "code", "execution_count": 8, "id": "000b31cb", "metadata": {}, "outputs": [], "source": [ "class TokenizedDataset(Dataset):\n", " def __init__(self, numpy_data):\n", " self.input_ids = torch.tensor(numpy_data, dtype=torch.long)\n", " self.attention_mask = (self.input_ids != 1).long()\n", "\n", " # Assuming labels are the same as input_ids for an autoencoding task\n", " # If different, you would need to adjust this\n", " self.labels = torch.tensor(numpy_data, dtype=torch.long)\n", "\n", " def __len__(self):\n", " return len(self.input_ids)\n", " \n", " def __getitem__(self, idx):\n", " return {\n", " 'input_ids': self.input_ids[idx],\n", " 'attention_mask': self.attention_mask[idx],\n", " 'labels': self.labels[idx] # This line is critical\n", " }\n", "\n", " \n", "dataset = TokenizedDataset(train_data)\n", "\n", "# Split the dataset into training and validation sets\n", "train_size = int(0.9 * len(dataset))\n", "val_size = len(dataset) - train_size\n", "train_dataset, val_dataset = random_split(dataset, [train_size, val_size])\n" ] }, { "cell_type": "code", "execution_count": 9, "id": "28a80ef5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "11" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# train_data = None\n", "dataset = None\n", "gc.collect() " ] }, { "cell_type": "code", "execution_count": 10, "id": "127736dc", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Using GPU: NVIDIA H100 PCIe\n" ] } ], "source": [ "# Check GPU availability\n", "if torch.cuda.is_available():\n", " device = torch.device(\"cuda\")\n", " print(\"Using GPU:\", torch.cuda.get_device_name(0))\n", "else:\n", " device = torch.device(\"cpu\")\n", " print(\"GPU not available, using CPU instead.\")" ] }, { "cell_type": "code", "execution_count": 11, "id": "b794f649", "metadata": {}, "outputs": [], "source": [ "model_checkpoint = \"facebook/bart-base\"\n", "model = BartForConditionalGeneration.from_pretrained(model_checkpoint)\n" ] }, { "cell_type": "code", "execution_count": 12, "id": "b426b6b2", "metadata": {}, "outputs": [], "source": [ "\n", "training_args = TrainingArguments(\n", " output_dir='/projects/academic/courses/cse676s24/bhanucha/results',\n", " num_train_epochs=1,\n", "# per_device_train_batch_size=16,\n", " per_device_eval_batch_size=8,\n", " warmup_steps=500,\n", " weight_decay=0.01,\n", " logging_dir='/projects/academic/courses/cse676s24/bhanucha/logs',\n", " logging_steps=10000,\n", " evaluation_strategy=\"epoch\",\n", " save_strategy=\"steps\", \n", " save_steps=100000, \n", " save_total_limit=2, \n", " per_device_train_batch_size=4, \n", " gradient_accumulation_steps=2, \n", " fp16=True,\n", ")\n", "\n", "trainer = Trainer(\n", " model=model,\n", " args=training_args,\n", " train_dataset=train_dataset,\n", " eval_dataset=val_dataset\n", ")\n" ] }, { "cell_type": "code", "execution_count": 13, "id": "22d08365", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " \n", " [158134/158134 4:40:32, Epoch 1/1]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EpochTraining LossValidation Loss
10.0000000.000001

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.\n", "Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}\n" ] }, { "data": { "text/plain": [ "TrainOutput(global_step=158134, training_loss=0.005809515866546127, metrics={'train_runtime': 16834.5077, 'train_samples_per_second': 75.147, 'train_steps_per_second': 9.393, 'total_flos': 3.856796506128384e+17, 'train_loss': 0.005809515866546127, 'epoch': 1.0})" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "trainer.train()\n" ] }, { "cell_type": "code", "execution_count": 14, "id": "e5012aa5", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.\n", "Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}\n" ] } ], "source": [ "model.save_pretrained('/projects/academic/courses/cse676s24/bhanucha/saved_model')\n" ] }, { "cell_type": "code", "execution_count": 17, "id": "a1dcb25d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [158134/158134 4:38:50, Epoch 1/1]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EpochTraining LossValidation Loss
10.0000000.000001

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.\n", "Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}\n" ] }, { "data": { "text/plain": [ "TrainOutput(global_step=158134, training_loss=0.00010101553865172577, metrics={'train_runtime': 16730.7583, 'train_samples_per_second': 75.613, 'train_steps_per_second': 9.452, 'total_flos': 3.86710105227264e+17, 'train_loss': 0.00010101553865172577, 'epoch': 1.0})" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "trainer.train()\n" ] }, { "cell_type": "code", "execution_count": 16, "id": "576bdaa6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'eval_loss': 1.690886506366951e-06}\n" ] } ], "source": [ "evaluation_results = trainer.evaluate()\n", "print(evaluation_results)" ] }, { "cell_type": "code", "execution_count": 18, "id": "c82c318f", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.\n", "Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}\n" ] } ], "source": [ "model.save_pretrained('/projects/academic/courses/cse676s24/bhanucha/saved_model2')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.6" } }, "nbformat": 4, "nbformat_minor": 5 }