{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "464f59f6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Defaulting to user installation because normal site-packages is not writeable\n",
      "Collecting accelerate\n",
      "  Downloading accelerate-0.29.3-py3-none-any.whl (297 kB)\n",
      "\u001b[K     |████████████████████████████████| 297 kB 3.7 MB/s eta 0:00:01\n",
      "\u001b[?25hRequirement already satisfied: pyyaml in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/pyyaml/5.4.1/lib/python3.9/site-packages (from accelerate) (5.4.1)\n",
      "Requirement already satisfied: numpy>=1.17 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/MPI/gcc/11.2.0/openmpi/4.1.1/scipy-bundle/2021.10/lib/python3.9/site-packages (from accelerate) (1.21.3)\n",
      "Requirement already satisfied: psutil in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from accelerate) (5.8.0)\n",
      "Requirement already satisfied: huggingface-hub in ./.local/lib/python3.9/site-packages (from accelerate) (0.22.2)\n",
      "Requirement already satisfied: torch>=1.10.0 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/MPI/gcc/11.2.0/openmpi/4.1.1/pytorch/1.13.1-CUDA-11.8.0/lib/python3.9/site-packages (from accelerate) (1.13.1)\n",
      "Requirement already satisfied: safetensors>=0.3.1 in ./.local/lib/python3.9/site-packages (from accelerate) (0.4.3)\n",
      "Requirement already satisfied: packaging>=20.0 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from accelerate) (20.9)\n",
      "Requirement already satisfied: pyparsing>=2.0.2 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from packaging>=20.0->accelerate) (2.4.7)\n",
      "Requirement already satisfied: typing_extensions in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/typing-extensions/4.3.0/lib/python3.9/site-packages (from torch>=1.10.0->accelerate) (4.3.0)\n",
      "Requirement already satisfied: tqdm>=4.42.1 in ./.local/lib/python3.9/site-packages (from huggingface-hub->accelerate) (4.66.2)\n",
      "Requirement already satisfied: filelock in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from huggingface-hub->accelerate) (3.0.12)\n",
      "Requirement already satisfied: fsspec>=2023.5.0 in ./.local/lib/python3.9/site-packages (from huggingface-hub->accelerate) (2024.3.1)\n",
      "Requirement already satisfied: requests in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from huggingface-hub->accelerate) (2.26.0)\n",
      "Requirement already satisfied: idna<4,>=2.5 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from requests->huggingface-hub->accelerate) (3.2)\n",
      "Requirement already satisfied: charset-normalizer~=2.0.0 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from requests->huggingface-hub->accelerate) (2.0.4)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from requests->huggingface-hub->accelerate) (2021.5.30)\n",
      "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from requests->huggingface-hub->accelerate) (1.26.6)\n",
      "Installing collected packages: accelerate\n",
      "\u001b[33m  WARNING: The scripts accelerate, accelerate-config, accelerate-estimate-memory and accelerate-launch are installed in '/user/bhanucha/.local/bin' which is not on PATH.\n",
      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\n",
      "Successfully installed accelerate-0.29.3\n",
      "\u001b[33mWARNING: You are using pip version 21.2.2; however, version 24.0 is available.\n",
      "You should consider upgrading via the '/cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/bin/python -m pip install --upgrade pip' command.\u001b[0m\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "source": [
    "pip install accelerate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "008ef190",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Defaulting to user installation because normal site-packages is not writeable\n",
      "Requirement already satisfied: transformers in ./.local/lib/python3.9/site-packages (4.40.0)\n",
      "Requirement already satisfied: pyyaml>=5.1 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/pyyaml/5.4.1/lib/python3.9/site-packages (from transformers) (5.4.1)\n",
      "Requirement already satisfied: regex!=2019.12.17 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from transformers) (2021.8.3)\n",
      "Requirement already satisfied: requests in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from transformers) (2.26.0)\n",
      "Requirement already satisfied: safetensors>=0.4.1 in ./.local/lib/python3.9/site-packages (from transformers) (0.4.3)\n",
      "Requirement already satisfied: numpy>=1.17 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/MPI/gcc/11.2.0/openmpi/4.1.1/scipy-bundle/2021.10/lib/python3.9/site-packages (from transformers) (1.21.3)\n",
      "Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in ./.local/lib/python3.9/site-packages (from transformers) (0.22.2)\n",
      "Requirement already satisfied: tqdm>=4.27 in ./.local/lib/python3.9/site-packages (from transformers) (4.66.2)\n",
      "Requirement already satisfied: tokenizers<0.20,>=0.19 in ./.local/lib/python3.9/site-packages (from transformers) (0.19.1)\n",
      "Requirement already satisfied: filelock in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from transformers) (3.0.12)\n",
      "Requirement already satisfied: packaging>=20.0 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from transformers) (20.9)\n",
      "Requirement already satisfied: typing-extensions>=3.7.4.3 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/typing-extensions/4.3.0/lib/python3.9/site-packages (from huggingface-hub<1.0,>=0.19.3->transformers) (4.3.0)\n",
      "Requirement already satisfied: fsspec>=2023.5.0 in ./.local/lib/python3.9/site-packages (from huggingface-hub<1.0,>=0.19.3->transformers) (2024.3.1)\n",
      "Requirement already satisfied: pyparsing>=2.0.2 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from packaging>=20.0->transformers) (2.4.7)\n",
      "Requirement already satisfied: idna<4,>=2.5 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from requests->transformers) (3.2)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from requests->transformers) (2021.5.30)\n",
      "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from requests->transformers) (1.26.6)\n",
      "Requirement already satisfied: charset-normalizer~=2.0.0 in /cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/lib/python3.9/site-packages (from requests->transformers) (2.0.4)\n",
      "\u001b[33mWARNING: You are using pip version 21.2.2; however, version 24.0 is available.\n",
      "You should consider upgrading via the '/cvmfs/soft.ccr.buffalo.edu/versions/2023.01/easybuild/software/avx512/Compiler/gcccore/11.2.0/python/3.9.6/bin/python -m pip install --upgrade pip' command.\u001b[0m\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "source": [
    "pip install transformers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "1be7c37e",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024-04-21 23:00:27.918259: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX512_VNNI AVX512_BF16 AVX_VNNI AMX_TILE AMX_INT8 AMX_BF16\n",
      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
      "2024-04-21 23:00:33.682894: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n"
     ]
    }
   ],
   "source": [
    "from transformers import BartTokenizer, BartForConditionalGeneration\n",
    "import torch\n",
    "from torch.utils.data import DataLoader, TensorDataset, random_split\n",
    "from transformers import Trainer, TrainingArguments\n",
    "from torch.utils.data import Dataset, DataLoader, random_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "976e0258",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sun Apr 21 23:00:40 2024       \r\n",
      "+-----------------------------------------------------------------------------+\r\n",
      "| NVIDIA-SMI 525.89.02    Driver Version: 525.89.02    CUDA Version: 12.0     |\r\n",
      "|-------------------------------+----------------------+----------------------+\r\n",
      "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\r\n",
      "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\r\n",
      "|                               |                      |               MIG M. |\r\n",
      "|===============================+======================+======================|\r\n",
      "|   0  NVIDIA H100 PCIe    On   | 00000000:B5:00.0 Off |                    0 |\r\n",
      "| N/A   39C    P0    52W / 350W |      0MiB / 81559MiB |      0%      Default |\r\n",
      "|                               |                      |             Disabled |\r\n",
      "+-------------------------------+----------------------+----------------------+\r\n",
      "                                                                               \r\n",
      "+-----------------------------------------------------------------------------+\r\n",
      "| Processes:                                                                  |\r\n",
      "|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\r\n",
      "|        ID   ID                                                   Usage      |\r\n",
      "|=============================================================================|\r\n",
      "|  No running processes found                                                 |\r\n",
      "+-----------------------------------------------------------------------------+\r\n"
     ]
    }
   ],
   "source": [
    "!nvidia-smi\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "c1bd2d96",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import numpy as np\n",
    "import gc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "4ab4a4db",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['logs', 'logsh', 'results', 'resultsh', 'train_data_v2.npy']"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "os.listdir('/projects/academic/courses/cse676s24/bhanucha')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "c23d50ec",
   "metadata": {},
   "outputs": [],
   "source": [
    "train_data_path = '/projects/academic/courses/cse676s24/bhanucha/train_data_v2.npy'\n",
    "train_data = np.load(train_data_path, mmap_mode='r')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "2baa2aeb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Data type: <class 'numpy.memmap'>\n",
      "Data shape: (1405634, 512)\n",
      "Contents of the array: [[    0 48539    35    22   134   740     4 10523  6515  6219  4696  1297\n",
      "     22   134    73   176   740     4 27805  1070  5803  1297    22   134\n",
      "     73   176 26141     4 21857  1297    22   134    73   176   740     4\n",
      "   3187 15092    36 26512  1253 45894    22   176   255 39596     4  9050\n",
      "     50 31417 27323  1297    22   246   112    73   176   740     4 10970\n",
      "   1836 30274  7666 31729   113 13497    35   440    12   387  5113 14208\n",
      "  41200 38490    35    22  1121    10  2016   132    12 45252  8929 12560\n",
      "      6  3344  6219  4696     6 15092     6 27805  1070  5803     8  9050\n",
      "     50 31417 27323 45863    22  5320   853    81  4761  2859   454 12652\n",
      "  26054    70    81   299 45863    22   387 14189     8 14351   195   728\n",
      "     55     4  4624   160  2859 45863    22  5320   853    11 21857     8\n",
      "  25629   131  3344   157 45863    22 36949   132 37031     6  1874     8\n",
      "   3989    88   389 28255    15 19957  2225 45863    22  7939  1413   454\n",
      "    933     6    59   389   728    72     2     1     1     1     1     1\n",
      "      1     1     1     1     1     1     1     1     1     1     1     1\n",
      "      1     1     1     1     1     1     1     1     1     1     1     1\n",
      "      1     1     1     1     1     1     1     1     1     1     1     1\n",
      "      1     1     1     1     1     1     1     1     1     1     1     1\n",
      "      1     1     1     1     1     1     1     1     1     1     1     1\n",
      "      1     1     1     1     1     1     1     1     1     1     1     1\n",
      "      1     1     1     1     1     1     1     1     1     1     1     1\n",
      "      1     1     1     1     1     1     1     1     1     1     1     1\n",
      "      1     1     1     1     1     1     1     1     1     1     1     1\n",
      "      1     1     1     1     1     1     1     1     1     1     1     1\n",
      "      1     1     1     1     1     1     1     1     1     1     1     1\n",
      "      1     1     1     1     1     1     1     1     1     1     1     1\n",
      "      1     1     1     1     1     1     1     1     1     1     1     1\n",
      "      1     1     1     1     1     1     1     1     1     1     1     1\n",
      "      1     1     1     1     1     1     1     1     1     1     1     1\n",
      "      1     1     1     1     1     1     1     1     1     1     1     1\n",
      "      1     1     1     1     1     1     1     1     1     1     1     1\n",
      "      1     1     1     1     1     1     1     1     1     1     1     1\n",
      "      1     1     1     1     1     1     1     1     1     1     1     1\n",
      "      1     1     1     1     1     1     1     1     1     1     1     1\n",
      "      1     1     1     1     1     1     1     1     1     1     1     1\n",
      "      1     1     1     1     1     1     1     1     1     1     1     1\n",
      "      1     1     1     1     1     1     1     1     1     1     1     1\n",
      "      1     1     1     1     1     1     1     1     1     1     1     1\n",
      "      1     1     1     1     1     1     1     1     1     1     1     1\n",
      "      1     1     1     1     1     1     1     1     1     1     1     1\n",
      "      1     1     1     1     1     1     1     1     1     1     1     1\n",
      "      1     1     1     1     1     1     1     1     1     1     1     1\n",
      "      1     1     1     1     1     1     1     1]]\n"
     ]
    }
   ],
   "source": [
    "print(\"Data type:\", type(train_data))\n",
    "print(\"Data shape:\", train_data.shape)\n",
    "if isinstance(train_data, np.ndarray) and train_data.dtype.names is not None:\n",
    "    print(\"Data field names:\", train_data.dtype.names)\n",
    "else:\n",
    "    print(\"Contents of the array:\", train_data[:1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "000b31cb",
   "metadata": {},
   "outputs": [],
   "source": [
    "class TokenizedDataset(Dataset):\n",
    "    def __init__(self, numpy_data):\n",
    "        self.input_ids = torch.tensor(numpy_data, dtype=torch.long)\n",
    "        self.attention_mask = (self.input_ids != 1).long()\n",
    "\n",
    "        # Assuming labels are the same as input_ids for an autoencoding task\n",
    "        # If different, you would need to adjust this\n",
    "        self.labels = torch.tensor(numpy_data, dtype=torch.long)\n",
    "\n",
    "    def __len__(self):\n",
    "        return len(self.input_ids)\n",
    "    \n",
    "    def __getitem__(self, idx):\n",
    "        return {\n",
    "            'input_ids': self.input_ids[idx],\n",
    "            'attention_mask': self.attention_mask[idx],\n",
    "            'labels': self.labels[idx]  # This line is critical\n",
    "        }\n",
    "\n",
    "    \n",
    "dataset = TokenizedDataset(train_data)\n",
    "\n",
    "# Split the dataset into training and validation sets\n",
    "train_size = int(0.9 * len(dataset))\n",
    "val_size = len(dataset) - train_size\n",
    "train_dataset, val_dataset = random_split(dataset, [train_size, val_size])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "28a80ef5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "11"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# train_data = None\n",
    "dataset = None\n",
    "gc.collect() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "127736dc",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Using GPU: NVIDIA H100 PCIe\n"
     ]
    }
   ],
   "source": [
    "# Check GPU availability\n",
    "if torch.cuda.is_available():\n",
    "    device = torch.device(\"cuda\")\n",
    "    print(\"Using GPU:\", torch.cuda.get_device_name(0))\n",
    "else:\n",
    "    device = torch.device(\"cpu\")\n",
    "    print(\"GPU not available, using CPU instead.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "b794f649",
   "metadata": {},
   "outputs": [],
   "source": [
    "model_checkpoint = \"facebook/bart-base\"\n",
    "model = BartForConditionalGeneration.from_pretrained(model_checkpoint)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "b426b6b2",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "training_args = TrainingArguments(\n",
    "    output_dir='/projects/academic/courses/cse676s24/bhanucha/results',\n",
    "    num_train_epochs=1,\n",
    "#     per_device_train_batch_size=16,\n",
    "    per_device_eval_batch_size=8,\n",
    "    warmup_steps=500,\n",
    "    weight_decay=0.01,\n",
    "    logging_dir='/projects/academic/courses/cse676s24/bhanucha/logs',\n",
    "    logging_steps=10000,\n",
    "    evaluation_strategy=\"epoch\",\n",
    "    save_strategy=\"steps\",  \n",
    "    save_steps=100000,  \n",
    "    save_total_limit=2,  \n",
    "    per_device_train_batch_size=4,  \n",
    "    gradient_accumulation_steps=2,  \n",
    "    fp16=True,\n",
    ")\n",
    "\n",
    "trainer = Trainer(\n",
    "    model=model,\n",
    "    args=training_args,\n",
    "    train_dataset=train_dataset,\n",
    "    eval_dataset=val_dataset\n",
    ")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "22d08365",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='158134' max='158134' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [158134/158134 4:40:32, Epoch 1/1]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       " <tr style=\"text-align: left;\">\n",
       "      <th>Epoch</th>\n",
       "      <th>Training Loss</th>\n",
       "      <th>Validation Loss</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000001</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.\n",
      "Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "TrainOutput(global_step=158134, training_loss=0.005809515866546127, metrics={'train_runtime': 16834.5077, 'train_samples_per_second': 75.147, 'train_steps_per_second': 9.393, 'total_flos': 3.856796506128384e+17, 'train_loss': 0.005809515866546127, 'epoch': 1.0})"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "trainer.train()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "e5012aa5",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.\n",
      "Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}\n"
     ]
    }
   ],
   "source": [
    "model.save_pretrained('/projects/academic/courses/cse676s24/bhanucha/saved_model')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "a1dcb25d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='158134' max='158134' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [158134/158134 4:38:50, Epoch 1/1]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       " <tr style=\"text-align: left;\">\n",
       "      <th>Epoch</th>\n",
       "      <th>Training Loss</th>\n",
       "      <th>Validation Loss</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000001</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.\n",
      "Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "TrainOutput(global_step=158134, training_loss=0.00010101553865172577, metrics={'train_runtime': 16730.7583, 'train_samples_per_second': 75.613, 'train_steps_per_second': 9.452, 'total_flos': 3.86710105227264e+17, 'train_loss': 0.00010101553865172577, 'epoch': 1.0})"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "trainer.train()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "576bdaa6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 1.690886506366951e-06}\n"
     ]
    }
   ],
   "source": [
    "evaluation_results = trainer.evaluate()\n",
    "print(evaluation_results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "c82c318f",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.\n",
      "Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}\n"
     ]
    }
   ],
   "source": [
    "model.save_pretrained('/projects/academic/courses/cse676s24/bhanucha/saved_model2')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}