Upload flant5-viva.ipynb
Browse files- flant5-viva.ipynb +1 -0
flant5-viva.ipynb
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.13","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"# This Python 3 environment comes with many helpful analytics libraries installed\n# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n# For example, here's several helpful packages to load\n\nimport numpy as np # linear algebra\nimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n\n# Input data files are available in the read-only \"../input/\" directory\n# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n\nimport os\nfor dirname, _, filenames in os.walk('/kaggle/input'):\n for filename in filenames:\n print(os.path.join(dirname, filename))\n\n# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"!pip install simplet5","metadata":{"execution":{"iopub.status.busy":"2024-02-09T16:56:06.137920Z","iopub.execute_input":"2024-02-09T16:56:06.138271Z","iopub.status.idle":"2024-02-09T16:56:31.709216Z","shell.execute_reply.started":"2024-02-09T16:56:06.138219Z","shell.execute_reply":"2024-02-09T16:56:31.708200Z"},"trusted":true},"execution_count":2,"outputs":[{"name":"stdout","text":"Collecting simplet5\n Downloading simplet5-0.1.4.tar.gz (7.3 kB)\n Preparing metadata (setup.py) ... \u001b[?25ldone\n\u001b[?25hRequirement already satisfied: numpy in /opt/conda/lib/python3.10/site-packages (from simplet5) (1.24.4)\nRequirement already satisfied: pandas in /opt/conda/lib/python3.10/site-packages (from simplet5) (2.1.4)\nRequirement already satisfied: sentencepiece in /opt/conda/lib/python3.10/site-packages (from simplet5) (0.1.99)\nRequirement already satisfied: torch!=1.8.0,>=1.7.0 in /opt/conda/lib/python3.10/site-packages (from simplet5) (2.1.2)\nCollecting transformers==4.16.2 (from simplet5)\n Downloading transformers-4.16.2-py3-none-any.whl (3.5 MB)\n\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m3.5/3.5 MB\u001b[0m \u001b[31m65.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n\u001b[?25hCollecting pytorch-lightning==1.5.10 (from simplet5)\n Downloading pytorch_lightning-1.5.10-py3-none-any.whl (527 kB)\n\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m527.7/527.7 kB\u001b[0m \u001b[31m30.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hRequirement already satisfied: future>=0.17.1 in /opt/conda/lib/python3.10/site-packages (from pytorch-lightning==1.5.10->simplet5) (0.18.3)\nRequirement already satisfied: tqdm>=4.41.0 in /opt/conda/lib/python3.10/site-packages (from pytorch-lightning==1.5.10->simplet5) (4.66.1)\nRequirement already satisfied: PyYAML>=5.1 in /opt/conda/lib/python3.10/site-packages (from pytorch-lightning==1.5.10->simplet5) (6.0.1)\nRequirement already satisfied: fsspec!=2021.06.0,>=2021.05.0 in /opt/conda/lib/python3.10/site-packages (from fsspec[http]!=2021.06.0,>=2021.05.0->pytorch-lightning==1.5.10->simplet5) (2023.10.0)\nRequirement already satisfied: tensorboard>=2.2.0 in /opt/conda/lib/python3.10/site-packages (from pytorch-lightning==1.5.10->simplet5) (2.15.1)\nRequirement already satisfied: torchmetrics>=0.4.1 in /opt/conda/lib/python3.10/site-packages (from pytorch-lightning==1.5.10->simplet5) (1.3.0.post0)\nCollecting pyDeprecate==0.3.1 (from pytorch-lightning==1.5.10->simplet5)\n Downloading pyDeprecate-0.3.1-py3-none-any.whl (10 kB)\nRequirement already satisfied: packaging>=17.0 in /opt/conda/lib/python3.10/site-packages (from pytorch-lightning==1.5.10->simplet5) (21.3)\nRequirement already satisfied: typing-extensions in /opt/conda/lib/python3.10/site-packages (from pytorch-lightning==1.5.10->simplet5) (4.9.0)\nCollecting setuptools==59.5.0 (from pytorch-lightning==1.5.10->simplet5)\n Downloading setuptools-59.5.0-py3-none-any.whl (952 kB)\n\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m952.4/952.4 kB\u001b[0m \u001b[31m44.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hRequirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from transformers==4.16.2->simplet5) (3.13.1)\nRequirement already satisfied: huggingface-hub<1.0,>=0.1.0 in /opt/conda/lib/python3.10/site-packages (from transformers==4.16.2->simplet5) (0.20.3)\nRequirement already satisfied: regex!=2019.12.17 in /opt/conda/lib/python3.10/site-packages (from transformers==4.16.2->simplet5) (2023.12.25)\nRequirement already satisfied: requests in /opt/conda/lib/python3.10/site-packages (from transformers==4.16.2->simplet5) (2.31.0)\nCollecting sacremoses (from transformers==4.16.2->simplet5)\n Downloading sacremoses-0.1.1-py3-none-any.whl.metadata (8.3 kB)\nRequirement already satisfied: tokenizers!=0.11.3,>=0.10.1 in /opt/conda/lib/python3.10/site-packages (from transformers==4.16.2->simplet5) (0.15.1)\nRequirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from torch!=1.8.0,>=1.7.0->simplet5) (1.12)\nRequirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from torch!=1.8.0,>=1.7.0->simplet5) (3.2.1)\nRequirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from torch!=1.8.0,>=1.7.0->simplet5) (3.1.2)\nRequirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.10/site-packages (from pandas->simplet5) (2.8.2)\nRequirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.10/site-packages (from pandas->simplet5) (2023.3.post1)\nRequirement already satisfied: tzdata>=2022.1 in /opt/conda/lib/python3.10/site-packages (from pandas->simplet5) (2023.4)\nRequirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in /opt/conda/lib/python3.10/site-packages (from fsspec[http]!=2021.06.0,>=2021.05.0->pytorch-lightning==1.5.10->simplet5) (3.9.1)\nRequirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/conda/lib/python3.10/site-packages (from packaging>=17.0->pytorch-lightning==1.5.10->simplet5) (3.1.1)\nRequirement already satisfied: six>=1.5 in /opt/conda/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->simplet5) (1.16.0)\nRequirement already satisfied: absl-py>=0.4 in /opt/conda/lib/python3.10/site-packages (from tensorboard>=2.2.0->pytorch-lightning==1.5.10->simplet5) (1.4.0)\nRequirement already satisfied: grpcio>=1.48.2 in /opt/conda/lib/python3.10/site-packages (from tensorboard>=2.2.0->pytorch-lightning==1.5.10->simplet5) (1.51.1)\nRequirement already satisfied: google-auth<3,>=1.6.3 in /opt/conda/lib/python3.10/site-packages (from tensorboard>=2.2.0->pytorch-lightning==1.5.10->simplet5) (2.26.1)\nRequirement already satisfied: google-auth-oauthlib<2,>=0.5 in /opt/conda/lib/python3.10/site-packages (from tensorboard>=2.2.0->pytorch-lightning==1.5.10->simplet5) (1.2.0)\nRequirement already satisfied: markdown>=2.6.8 in /opt/conda/lib/python3.10/site-packages (from tensorboard>=2.2.0->pytorch-lightning==1.5.10->simplet5) (3.5.2)\nRequirement already satisfied: protobuf<4.24,>=3.19.6 in /opt/conda/lib/python3.10/site-packages (from tensorboard>=2.2.0->pytorch-lightning==1.5.10->simplet5) (3.20.3)\nRequirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /opt/conda/lib/python3.10/site-packages (from tensorboard>=2.2.0->pytorch-lightning==1.5.10->simplet5) (0.7.2)\nRequirement already satisfied: werkzeug>=1.0.1 in /opt/conda/lib/python3.10/site-packages (from tensorboard>=2.2.0->pytorch-lightning==1.5.10->simplet5) (3.0.1)\nRequirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests->transformers==4.16.2->simplet5) (3.3.2)\nRequirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests->transformers==4.16.2->simplet5) (3.6)\nRequirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests->transformers==4.16.2->simplet5) (1.26.18)\nRequirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests->transformers==4.16.2->simplet5) (2023.11.17)\nRequirement already satisfied: lightning-utilities>=0.8.0 in /opt/conda/lib/python3.10/site-packages (from torchmetrics>=0.4.1->pytorch-lightning==1.5.10->simplet5) (0.10.1)\nRequirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->torch!=1.8.0,>=1.7.0->simplet5) (2.1.3)\nRequirement already satisfied: click in /opt/conda/lib/python3.10/site-packages (from sacremoses->transformers==4.16.2->simplet5) (8.1.7)\nRequirement already satisfied: joblib in /opt/conda/lib/python3.10/site-packages (from sacremoses->transformers==4.16.2->simplet5) (1.3.2)\nRequirement already satisfied: mpmath>=0.19 in /opt/conda/lib/python3.10/site-packages (from sympy->torch!=1.8.0,>=1.7.0->simplet5) (1.3.0)\nRequirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]!=2021.06.0,>=2021.05.0->pytorch-lightning==1.5.10->simplet5) (23.2.0)\nRequirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.10/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]!=2021.06.0,>=2021.05.0->pytorch-lightning==1.5.10->simplet5) (6.0.4)\nRequirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]!=2021.06.0,>=2021.05.0->pytorch-lightning==1.5.10->simplet5) (1.9.3)\nRequirement already satisfied: frozenlist>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]!=2021.06.0,>=2021.05.0->pytorch-lightning==1.5.10->simplet5) (1.4.1)\nRequirement already satisfied: aiosignal>=1.1.2 in /opt/conda/lib/python3.10/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]!=2021.06.0,>=2021.05.0->pytorch-lightning==1.5.10->simplet5) (1.3.1)\nRequirement already satisfied: async-timeout<5.0,>=4.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]!=2021.06.0,>=2021.05.0->pytorch-lightning==1.5.10->simplet5) (4.0.3)\nRequirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/conda/lib/python3.10/site-packages (from google-auth<3,>=1.6.3->tensorboard>=2.2.0->pytorch-lightning==1.5.10->simplet5) (4.2.4)\nRequirement already satisfied: pyasn1-modules>=0.2.1 in /opt/conda/lib/python3.10/site-packages (from google-auth<3,>=1.6.3->tensorboard>=2.2.0->pytorch-lightning==1.5.10->simplet5) (0.3.0)\nRequirement already satisfied: rsa<5,>=3.1.4 in /opt/conda/lib/python3.10/site-packages (from google-auth<3,>=1.6.3->tensorboard>=2.2.0->pytorch-lightning==1.5.10->simplet5) (4.9)\nRequirement already satisfied: requests-oauthlib>=0.7.0 in /opt/conda/lib/python3.10/site-packages (from google-auth-oauthlib<2,>=0.5->tensorboard>=2.2.0->pytorch-lightning==1.5.10->simplet5) (1.3.1)\nRequirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /opt/conda/lib/python3.10/site-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard>=2.2.0->pytorch-lightning==1.5.10->simplet5) (0.5.1)\nRequirement already satisfied: oauthlib>=3.0.0 in /opt/conda/lib/python3.10/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<2,>=0.5->tensorboard>=2.2.0->pytorch-lightning==1.5.10->simplet5) (3.2.2)\nDownloading sacremoses-0.1.1-py3-none-any.whl (897 kB)\n\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m897.5/897.5 kB\u001b[0m \u001b[31m43.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n\u001b[?25hBuilding wheels for collected packages: simplet5\n Building wheel for simplet5 (setup.py) ... \u001b[?25ldone\n\u001b[?25h Created wheel for simplet5: filename=simplet5-0.1.4-py3-none-any.whl size=6853 sha256=3d35bd8e6d8282b0d81ff8c91e4c51b629356be16fa11d94c68b7b6ed9dc025f\n Stored in directory: /root/.cache/pip/wheels/b4/7d/af/743765400878438a7593f13f89fdf4004dcde0f2a8e6cb6684\nSuccessfully built simplet5\n\u001b[33mDEPRECATION: pytorch-lightning 1.5.10 has a non-standard dependency specifier torch>=1.7.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063\u001b[0m\u001b[33m\n\u001b[0mInstalling collected packages: setuptools, sacremoses, pyDeprecate, transformers, pytorch-lightning, simplet5\n Attempting uninstall: setuptools\n Found existing installation: setuptools 69.0.3\n Uninstalling setuptools-69.0.3:\n Successfully uninstalled setuptools-69.0.3\n Attempting uninstall: transformers\n Found existing installation: transformers 4.37.0\n Uninstalling transformers-4.37.0:\n Successfully uninstalled transformers-4.37.0\n Attempting uninstall: pytorch-lightning\n Found existing installation: pytorch-lightning 2.1.3\n Uninstalling pytorch-lightning-2.1.3:\n Successfully uninstalled pytorch-lightning-2.1.3\n\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\narviz 0.17.0 requires setuptools>=60.0.0, but you have setuptools 59.5.0 which is incompatible.\nconda 23.7.4 requires setuptools>=60.0.0, but you have setuptools 59.5.0 which is incompatible.\nkaggle-environments 1.14.3 requires transformers>=4.33.1, but you have transformers 4.16.2 which is incompatible.\nlibpysal 4.9.2 requires packaging>=22, but you have packaging 21.3 which is incompatible.\nlibpysal 4.9.2 requires shapely>=2.0.1, but you have shapely 1.8.5.post1 which is incompatible.\nmomepy 0.7.0 requires shapely>=2, but you have shapely 1.8.5.post1 which is incompatible.\nosmnx 1.8.1 requires shapely>=2.0, but you have shapely 1.8.5.post1 which is incompatible.\nspopt 0.6.0 requires shapely>=2.0.1, but you have shapely 1.8.5.post1 which is incompatible.\ntensorflowjs 4.16.0 requires packaging~=23.1, but you have packaging 21.3 which is incompatible.\ntrl 0.7.10 requires transformers>=4.31.0, but you have transformers 4.16.2 which is incompatible.\u001b[0m\u001b[31m\n\u001b[0mSuccessfully installed pyDeprecate-0.3.1 pytorch-lightning-1.5.10 sacremoses-0.1.1 setuptools-59.5.0 simplet5-0.1.4 transformers-4.16.2\n","output_type":"stream"}]},{"cell_type":"code","source":"!pip install -q -U datasets accelerate peft trl bitsandbytes","metadata":{"execution":{"iopub.status.busy":"2024-02-09T16:56:31.715618Z","iopub.execute_input":"2024-02-09T16:56:31.715980Z","iopub.status.idle":"2024-02-09T16:56:53.144584Z","shell.execute_reply.started":"2024-02-09T16:56:31.715930Z","shell.execute_reply":"2024-02-09T16:56:53.143456Z"},"trusted":true},"execution_count":3,"outputs":[{"name":"stdout","text":"\u001b[33mDEPRECATION: pytorch-lightning 1.5.10 has a non-standard dependency specifier torch>=1.7.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063\u001b[0m\u001b[33m\n\u001b[0m\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\nsimplet5 0.1.4 requires transformers==4.16.2, but you have transformers 4.37.2 which is incompatible.\u001b[0m\u001b[31m\n\u001b[0m","output_type":"stream"}]},{"cell_type":"code","source":"from datasets import load_dataset, Dataset, DatasetDict\ndataset = load_dataset(\"mayank200456789/Viva\")\ndataset","metadata":{"execution":{"iopub.status.busy":"2024-02-09T16:56:53.145905Z","iopub.execute_input":"2024-02-09T16:56:53.146218Z","iopub.status.idle":"2024-02-09T16:57:00.237013Z","shell.execute_reply.started":"2024-02-09T16:56:53.146191Z","shell.execute_reply":"2024-02-09T16:57:00.236142Z"},"trusted":true},"execution_count":4,"outputs":[{"output_type":"display_data","data":{"text/plain":"Downloading readme: 0%| | 0.00/21.0 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"c88011f5d35d4852a5526e7aa34ae2d7"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Downloading data: 0%| | 0.00/12.0k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"97c66c67981444bb86467956fe0ef805"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Generating train split: 0 examples [00:00, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"cd2e5b9f81744d55b7fa6ab8998bc30d"}},"metadata":{}},{"execution_count":4,"output_type":"execute_result","data":{"text/plain":"DatasetDict({\n train: Dataset({\n features: ['text'],\n num_rows: 381\n })\n})"},"metadata":{}}]},{"cell_type":"code","source":"def remove_blank_strings(dataset):\n return dataset.filter(lambda example: example['text'] != \"\")\n\n# Apply the function to the 'train' split\ndataset[\"train\"] = remove_blank_strings(dataset[\"train\"])\n\n# Print the modified dataset\nprint(DatasetDict(dataset))","metadata":{"execution":{"iopub.status.busy":"2024-02-09T16:59:18.096048Z","iopub.execute_input":"2024-02-09T16:59:18.096432Z","iopub.status.idle":"2024-02-09T16:59:18.125779Z","shell.execute_reply.started":"2024-02-09T16:59:18.096400Z","shell.execute_reply":"2024-02-09T16:59:18.124889Z"},"trusted":true},"execution_count":16,"outputs":[{"output_type":"display_data","data":{"text/plain":"Filter: 0%| | 0/221 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"875f9afee65740e99ea4de0eefdc56ec"}},"metadata":{}},{"name":"stdout","text":"DatasetDict({\n train: Dataset({\n features: ['text'],\n num_rows: 221\n })\n})\n","output_type":"stream"}]},{"cell_type":"code","source":"def remove_blank_strings(dataset):\n return dataset.filter(lambda example: example['text'] != \"\")\n\n# Apply the function to the 'train' split\ndataset[\"train\"] = remove_blank_strings(dataset[\"train\"])\n\n# Print the modified dataset\nprint(DatasetDict(dataset))","metadata":{"execution":{"iopub.status.busy":"2024-02-09T16:59:20.505739Z","iopub.execute_input":"2024-02-09T16:59:20.506522Z","iopub.status.idle":"2024-02-09T16:59:20.538424Z","shell.execute_reply.started":"2024-02-09T16:59:20.506489Z","shell.execute_reply":"2024-02-09T16:59:20.537290Z"},"trusted":true},"execution_count":17,"outputs":[{"output_type":"display_data","data":{"text/plain":"Filter: 0%| | 0/221 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"a6f31012cecb470eb507d2c531d90a74"}},"metadata":{}},{"name":"stdout","text":"DatasetDict({\n train: Dataset({\n features: ['text'],\n num_rows: 221\n })\n})\n","output_type":"stream"}]},{"cell_type":"code","source":"import pandas as pd\n\ndf = pd.DataFrame({\n \"instruction\": dataset[\"train\"][\"instruction\"],\n \"output\": dataset[\"train\"][\"output\"]\n})\n\ndf.to_csv(\"dataset.csv\", index=False)","metadata":{"execution":{"iopub.status.busy":"2024-02-09T16:58:30.583249Z","iopub.execute_input":"2024-02-09T16:58:30.583957Z","iopub.status.idle":"2024-02-09T16:58:30.652402Z","shell.execute_reply.started":"2024-02-09T16:58:30.583922Z","shell.execute_reply":"2024-02-09T16:58:30.651221Z"},"trusted":true},"execution_count":15,"outputs":[{"traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)","Cell \u001b[0;32mIn[15], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mpd\u001b[39;00m\n\u001b[1;32m 3\u001b[0m df \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mDataFrame({\n\u001b[0;32m----> 4\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minstruction\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[43mdataset\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtrain\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43minstruction\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m,\n\u001b[1;32m 5\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124moutput\u001b[39m\u001b[38;5;124m\"\u001b[39m: dataset[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtrain\u001b[39m\u001b[38;5;124m\"\u001b[39m][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124moutput\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 6\u001b[0m })\n\u001b[1;32m 8\u001b[0m df\u001b[38;5;241m.\u001b[39mto_csv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdataset.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m, index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/datasets/arrow_dataset.py:2810\u001b[0m, in \u001b[0;36mDataset.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 2808\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__getitem__\u001b[39m(\u001b[38;5;28mself\u001b[39m, key): \u001b[38;5;66;03m# noqa: F811\u001b[39;00m\n\u001b[1;32m 2809\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Can be used to index columns (by string names) or rows (by integer index or iterable of indices or bools).\"\"\"\u001b[39;00m\n\u001b[0;32m-> 2810\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_getitem\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/datasets/arrow_dataset.py:2794\u001b[0m, in \u001b[0;36mDataset._getitem\u001b[0;34m(self, key, **kwargs)\u001b[0m\n\u001b[1;32m 2792\u001b[0m format_kwargs \u001b[38;5;241m=\u001b[39m format_kwargs \u001b[38;5;28;01mif\u001b[39;00m format_kwargs \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m {}\n\u001b[1;32m 2793\u001b[0m formatter \u001b[38;5;241m=\u001b[39m get_formatter(format_type, features\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_info\u001b[38;5;241m.\u001b[39mfeatures, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mformat_kwargs)\n\u001b[0;32m-> 2794\u001b[0m pa_subtable \u001b[38;5;241m=\u001b[39m \u001b[43mquery_table\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_data\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindices\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_indices\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2795\u001b[0m formatted_output \u001b[38;5;241m=\u001b[39m format_table(\n\u001b[1;32m 2796\u001b[0m pa_subtable, key, formatter\u001b[38;5;241m=\u001b[39mformatter, format_columns\u001b[38;5;241m=\u001b[39mformat_columns, output_all_columns\u001b[38;5;241m=\u001b[39moutput_all_columns\n\u001b[1;32m 2797\u001b[0m )\n\u001b[1;32m 2798\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m formatted_output\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/datasets/formatting/formatting.py:580\u001b[0m, in \u001b[0;36mquery_table\u001b[0;34m(table, key, indices)\u001b[0m\n\u001b[1;32m 578\u001b[0m _raise_bad_key_type(key)\n\u001b[1;32m 579\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(key, \u001b[38;5;28mstr\u001b[39m):\n\u001b[0;32m--> 580\u001b[0m \u001b[43m_check_valid_column_key\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtable\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumn_names\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 581\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 582\u001b[0m size \u001b[38;5;241m=\u001b[39m indices\u001b[38;5;241m.\u001b[39mnum_rows \u001b[38;5;28;01mif\u001b[39;00m indices \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m table\u001b[38;5;241m.\u001b[39mnum_rows\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/datasets/formatting/formatting.py:520\u001b[0m, in \u001b[0;36m_check_valid_column_key\u001b[0;34m(key, columns)\u001b[0m\n\u001b[1;32m 518\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_check_valid_column_key\u001b[39m(key: \u001b[38;5;28mstr\u001b[39m, columns: List[\u001b[38;5;28mstr\u001b[39m]) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 519\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m key \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m columns:\n\u001b[0;32m--> 520\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mColumn \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkey\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m not in the dataset. Current columns in the dataset: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcolumns\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n","\u001b[0;31mKeyError\u001b[0m: \"Column instruction not in the dataset. Current columns in the dataset: ['text']\""],"ename":"KeyError","evalue":"\"Column instruction not in the dataset. Current columns in the dataset: ['text']\"","output_type":"error"}]},{"cell_type":"markdown","source":"","metadata":{}},{"cell_type":"code","source":"import pandas as pd\n\ndf = pd.DataFrame({\n \"instruction\": dataset[\"train\"][\"instruction\"],\n \"output\": dataset[\"train\"][\"output\"]\n})\n\ndf.to_csv(\"dataset.csv\", index=False)","metadata":{"execution":{"iopub.status.busy":"2024-02-09T16:57:40.314602Z","iopub.execute_input":"2024-02-09T16:57:40.315312Z","iopub.status.idle":"2024-02-09T16:57:40.384510Z","shell.execute_reply.started":"2024-02-09T16:57:40.315275Z","shell.execute_reply":"2024-02-09T16:57:40.383123Z"},"trusted":true},"execution_count":14,"outputs":[{"traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)","Cell \u001b[0;32mIn[14], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mpd\u001b[39;00m\n\u001b[1;32m 3\u001b[0m df \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mDataFrame({\n\u001b[0;32m----> 4\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minstruction\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[43mdataset\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtrain\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43minstruction\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m,\n\u001b[1;32m 5\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124moutput\u001b[39m\u001b[38;5;124m\"\u001b[39m: dataset[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtrain\u001b[39m\u001b[38;5;124m\"\u001b[39m][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124moutput\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 6\u001b[0m })\n\u001b[1;32m 8\u001b[0m df\u001b[38;5;241m.\u001b[39mto_csv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdataset.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m, index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/datasets/arrow_dataset.py:2810\u001b[0m, in \u001b[0;36mDataset.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 2808\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__getitem__\u001b[39m(\u001b[38;5;28mself\u001b[39m, key): \u001b[38;5;66;03m# noqa: F811\u001b[39;00m\n\u001b[1;32m 2809\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Can be used to index columns (by string names) or rows (by integer index or iterable of indices or bools).\"\"\"\u001b[39;00m\n\u001b[0;32m-> 2810\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_getitem\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/datasets/arrow_dataset.py:2794\u001b[0m, in \u001b[0;36mDataset._getitem\u001b[0;34m(self, key, **kwargs)\u001b[0m\n\u001b[1;32m 2792\u001b[0m format_kwargs \u001b[38;5;241m=\u001b[39m format_kwargs \u001b[38;5;28;01mif\u001b[39;00m format_kwargs \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m {}\n\u001b[1;32m 2793\u001b[0m formatter \u001b[38;5;241m=\u001b[39m get_formatter(format_type, features\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_info\u001b[38;5;241m.\u001b[39mfeatures, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mformat_kwargs)\n\u001b[0;32m-> 2794\u001b[0m pa_subtable \u001b[38;5;241m=\u001b[39m \u001b[43mquery_table\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_data\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindices\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_indices\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2795\u001b[0m formatted_output \u001b[38;5;241m=\u001b[39m format_table(\n\u001b[1;32m 2796\u001b[0m pa_subtable, key, formatter\u001b[38;5;241m=\u001b[39mformatter, format_columns\u001b[38;5;241m=\u001b[39mformat_columns, output_all_columns\u001b[38;5;241m=\u001b[39moutput_all_columns\n\u001b[1;32m 2797\u001b[0m )\n\u001b[1;32m 2798\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m formatted_output\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/datasets/formatting/formatting.py:580\u001b[0m, in \u001b[0;36mquery_table\u001b[0;34m(table, key, indices)\u001b[0m\n\u001b[1;32m 578\u001b[0m _raise_bad_key_type(key)\n\u001b[1;32m 579\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(key, \u001b[38;5;28mstr\u001b[39m):\n\u001b[0;32m--> 580\u001b[0m \u001b[43m_check_valid_column_key\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtable\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumn_names\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 581\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 582\u001b[0m size \u001b[38;5;241m=\u001b[39m indices\u001b[38;5;241m.\u001b[39mnum_rows \u001b[38;5;28;01mif\u001b[39;00m indices \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m table\u001b[38;5;241m.\u001b[39mnum_rows\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/datasets/formatting/formatting.py:520\u001b[0m, in \u001b[0;36m_check_valid_column_key\u001b[0;34m(key, columns)\u001b[0m\n\u001b[1;32m 518\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_check_valid_column_key\u001b[39m(key: \u001b[38;5;28mstr\u001b[39m, columns: List[\u001b[38;5;28mstr\u001b[39m]) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 519\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m key \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m columns:\n\u001b[0;32m--> 520\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mColumn \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkey\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m not in the dataset. Current columns in the dataset: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcolumns\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n","\u001b[0;31mKeyError\u001b[0m: \"Column instruction not in the dataset. Current columns in the dataset: ['text']\""],"ename":"KeyError","evalue":"\"Column instruction not in the dataset. Current columns in the dataset: ['text']\"","output_type":"error"}]},{"cell_type":"code","source":"df_train=df","metadata":{"execution":{"iopub.status.busy":"2024-02-09T16:57:33.295721Z","iopub.execute_input":"2024-02-09T16:57:33.296084Z","iopub.status.idle":"2024-02-09T16:57:33.322560Z","shell.execute_reply.started":"2024-02-09T16:57:33.296054Z","shell.execute_reply":"2024-02-09T16:57:33.321243Z"},"trusted":true},"execution_count":12,"outputs":[{"traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)","Cell \u001b[0;32mIn[12], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m df_train\u001b[38;5;241m=\u001b[39m\u001b[43mdf\u001b[49m\n","\u001b[0;31mNameError\u001b[0m: name 'df' is not defined"],"ename":"NameError","evalue":"name 'df' is not defined","output_type":"error"}]},{"cell_type":"code","source":"df_train = df_train.rename(columns={\"output\":\"target_text\", \"instruction\":\"source_text\"})\ndf_train = df_train[['source_text', 'target_text']]\n \n# T5 model expects a task related prefix: since it is a question answering task, we will add a prefix \"answer: \"\ndf_train['source_text'] = \"answer: \" + df_train['source_text']","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"from simplet5 import SimpleT5\n \nmodel = SimpleT5()\nmodel.from_pretrained(model_type=\"t5\", model_name=\"t5-base\")from simplet5 import SimpleT5\n \nmodel = SimpleT5()\nmodel.from_pretrained(model_type=\"t5\", model_name=\"t5-base\")","metadata":{"execution":{"iopub.status.busy":"2024-02-09T16:57:18.507197Z","iopub.execute_input":"2024-02-09T16:57:18.508303Z","iopub.status.idle":"2024-02-09T16:57:18.514478Z","shell.execute_reply.started":"2024-02-09T16:57:18.508241Z","shell.execute_reply":"2024-02-09T16:57:18.513287Z"},"trusted":true},"execution_count":8,"outputs":[{"traceback":["\u001b[0;36m Cell \u001b[0;32mIn[8], line 4\u001b[0;36m\u001b[0m\n\u001b[0;31m model.from_pretrained(model_type=\"t5\", model_name=\"t5-base\")from simplet5 import SimpleT5\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"],"ename":"SyntaxError","evalue":"invalid syntax (11971172.py, line 4)","output_type":"error"}]},{"cell_type":"code","source":"dataset = {\n \"train\": Dataset.from_dict({\n \"instruction\": dataset[\"train\"][\"text\"][:220:2],\n \"output\": dataset[\"train\"][\"text\"][1::2]\n })\n}\n\n\nprint(DatasetDict(dataset))","metadata":{"execution":{"iopub.status.busy":"2024-02-09T16:59:39.851774Z","iopub.execute_input":"2024-02-09T16:59:39.852523Z","iopub.status.idle":"2024-02-09T16:59:39.868269Z","shell.execute_reply.started":"2024-02-09T16:59:39.852490Z","shell.execute_reply":"2024-02-09T16:59:39.867306Z"},"trusted":true},"execution_count":18,"outputs":[{"name":"stdout","text":"DatasetDict({\n train: Dataset({\n features: ['instruction', 'output'],\n num_rows: 110\n })\n})\n","output_type":"stream"}]},{"cell_type":"code","source":"import pandas as pd\n\ndf = pd.DataFrame({\n \"instruction\": dataset[\"train\"][\"instruction\"],\n \"output\": dataset[\"train\"][\"output\"]\n})\n\ndf.to_csv(\"dataset.csv\", index=False)","metadata":{"execution":{"iopub.status.busy":"2024-02-09T16:59:50.479464Z","iopub.execute_input":"2024-02-09T16:59:50.480102Z","iopub.status.idle":"2024-02-09T16:59:50.491346Z","shell.execute_reply.started":"2024-02-09T16:59:50.480068Z","shell.execute_reply":"2024-02-09T16:59:50.490507Z"},"trusted":true},"execution_count":19,"outputs":[]},{"cell_type":"code","source":"df_train=df\ndf_train = df_train.rename(columns={\"output\":\"target_text\", \"instruction\":\"source_text\"})\ndf_train = df_train[['source_text', 'target_text']]\n \n# T5 model expects a task related prefix: since it is a question answering task, we will add a prefix \"answer: \"\ndf_train['source_text'] = \"answer: \" + df_train['source_text']\n# Display some data to see final data to finetune t5 model\n# df_train.head()","metadata":{"execution":{"iopub.status.busy":"2024-02-09T17:00:05.001173Z","iopub.execute_input":"2024-02-09T17:00:05.001952Z","iopub.status.idle":"2024-02-09T17:00:05.016196Z","shell.execute_reply.started":"2024-02-09T17:00:05.001918Z","shell.execute_reply":"2024-02-09T17:00:05.015090Z"},"trusted":true},"execution_count":21,"outputs":[]},{"cell_type":"code","source":"df_train","metadata":{"execution":{"iopub.status.busy":"2024-02-09T17:00:14.809811Z","iopub.execute_input":"2024-02-09T17:00:14.810503Z","iopub.status.idle":"2024-02-09T17:00:14.825239Z","shell.execute_reply.started":"2024-02-09T17:00:14.810470Z","shell.execute_reply":"2024-02-09T17:00:14.824418Z"},"trusted":true},"execution_count":22,"outputs":[{"execution_count":22,"output_type":"execute_result","data":{"text/plain":" source_text \\\n0 answer: What is Unit cell? \n1 answer: What is Atomic basis? \n2 answer: What is Space lattice? \n3 answer: What is Susceptibility? \n4 answer: What are Diamagnetic materials? \n.. ... \n105 answer: Proximate Analysis \n106 answer: Ultimate Analysis \n107 answer: Moisture content \n108 answer: Volatile matter \n109 answer: Ash content \n\n target_text \n0 Smallest repeating structure in a crystal tha... \n1 Every point in space lattice is associated wit... \n2 Infinite 3d array of points in which every poi... \n3 Magnetic susceptibility is the measure of exte... \n4 Materials which weakly repell magnetic field a... \n.. ... \n105 proximate Analysis gives approximate compositi... \n106 proximate Analysis gives approximate compositi... \n107 it controls the rate of combustion. \n108 it helps in initial combustion. \n109 higher is the Ash content lower is the calorif... \n\n[110 rows x 2 columns]","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>source_text</th>\n <th>target_text</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>answer: What is Unit cell?</td>\n <td>Smallest repeating structure in a crystal tha...</td>\n </tr>\n <tr>\n <th>1</th>\n <td>answer: What is Atomic basis?</td>\n <td>Every point in space lattice is associated wit...</td>\n </tr>\n <tr>\n <th>2</th>\n <td>answer: What is Space lattice?</td>\n <td>Infinite 3d array of points in which every poi...</td>\n </tr>\n <tr>\n <th>3</th>\n <td>answer: What is Susceptibility?</td>\n <td>Magnetic susceptibility is the measure of exte...</td>\n </tr>\n <tr>\n <th>4</th>\n <td>answer: What are Diamagnetic materials?</td>\n <td>Materials which weakly repell magnetic field a...</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>105</th>\n <td>answer: Proximate Analysis</td>\n <td>proximate Analysis gives approximate compositi...</td>\n </tr>\n <tr>\n <th>106</th>\n <td>answer: Ultimate Analysis</td>\n <td>proximate Analysis gives approximate compositi...</td>\n </tr>\n <tr>\n <th>107</th>\n <td>answer: Moisture content</td>\n <td>it controls the rate of combustion.</td>\n </tr>\n <tr>\n <th>108</th>\n <td>answer: Volatile matter</td>\n <td>it helps in initial combustion.</td>\n </tr>\n <tr>\n <th>109</th>\n <td>answer: Ash content</td>\n <td>higher is the Ash content lower is the calorif...</td>\n </tr>\n </tbody>\n</table>\n<p>110 rows Γ 2 columns</p>\n</div>"},"metadata":{}}]},{"cell_type":"code","source":"from simplet5 import SimpleT5\n \nmodel = SimpleT5()\nmodel.from_pretrained(model_type=\"t5\", model_name=\"t5-base\")\nmodel.train(train_df=df_train,\n eval_df=df_train[89:91],\n source_max_token_len=128,\n target_max_token_len=50,\n batch_size=16, max_epochs=25, use_gpu=True)","metadata":{"execution":{"iopub.status.busy":"2024-02-09T17:00:46.049143Z","iopub.execute_input":"2024-02-09T17:00:46.050118Z","iopub.status.idle":"2024-02-09T17:06:46.521338Z","shell.execute_reply.started":"2024-02-09T17:00:46.050083Z","shell.execute_reply":"2024-02-09T17:06:46.519601Z"},"trusted":true},"execution_count":23,"outputs":[{"name":"stderr","text":"2024-02-09 17:00:54.614885: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n2024-02-09 17:00:54.615001: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n2024-02-09 17:00:54.740068: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"spiece.model: 0%| | 0.00/792k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"27b4253c38f2410cb993efdf593f9e5f"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"tokenizer.json: 0%| | 0.00/1.39M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"ff90eea7d6704453b64005888af04a21"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"config.json: 0%| | 0.00/1.21k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"a96b4ed2f8ab40eab6d444b63ca98ecc"}},"metadata":{}},{"name":"stderr","text":"/opt/conda/lib/python3.10/site-packages/transformers/models/t5/tokenization_t5_fast.py:160: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\nFor now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n- Be aware that you SHOULD NOT rely on t5-base automatically truncating your input to 512 when padding/encoding.\n- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n warnings.warn(\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"model.safetensors: 0%| | 0.00/892M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"5bb2ffb379604d15ab6ca56b0adfcad9"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"generation_config.json: 0%| | 0.00/147 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"55bcaf6279d645a29cb40b47db152610"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Validation sanity check: 0it [00:00, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"name":"stderr","text":"/opt/conda/lib/python3.10/site-packages/pytorch_lightning/trainer/data_loading.py:132: UserWarning: The dataloader, val_dataloader 0, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 4 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.\n rank_zero_warn(\n/opt/conda/lib/python3.10/site-packages/pytorch_lightning/trainer/data_loading.py:132: UserWarning: The dataloader, train_dataloader, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 4 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.\n rank_zero_warn(\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"Training: 0it [00:00, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"d8375b4615c142479d3e24c4c753e37a"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Validating: 0it [00:00, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Validating: 0it [00:00, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Validating: 0it [00:00, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Validating: 0it [00:00, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Validating: 0it [00:00, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Validating: 0it [00:00, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Validating: 0it [00:00, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Validating: 0it [00:00, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Validating: 0it [00:00, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Validating: 0it [00:00, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Validating: 0it [00:00, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Validating: 0it [00:00, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Validating: 0it [00:00, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Validating: 0it [00:00, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Validating: 0it [00:00, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Validating: 0it [00:00, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Validating: 0it [00:00, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Validating: 0it [00:00, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)","Cell \u001b[0;32mIn[23], line 5\u001b[0m\n\u001b[1;32m 3\u001b[0m model \u001b[38;5;241m=\u001b[39m SimpleT5()\n\u001b[1;32m 4\u001b[0m model\u001b[38;5;241m.\u001b[39mfrom_pretrained(model_type\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mt5\u001b[39m\u001b[38;5;124m\"\u001b[39m, model_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mt5-base\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 5\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtrain_df\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdf_train\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43meval_df\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdf_train\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m10\u001b[39;49m\u001b[43m:\u001b[49m\u001b[38;5;241;43m12\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[43msource_max_token_len\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m128\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43mtarget_max_token_len\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m50\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m16\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmax_epochs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m25\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43muse_gpu\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/simplet5/simplet5.py:395\u001b[0m, in \u001b[0;36mSimpleT5.train\u001b[0;34m(self, train_df, eval_df, source_max_token_len, target_max_token_len, batch_size, max_epochs, use_gpu, outputdir, early_stopping_patience_epochs, precision, logger, dataloader_num_workers, save_only_last_epoch)\u001b[0m\n\u001b[1;32m 385\u001b[0m trainer \u001b[38;5;241m=\u001b[39m pl\u001b[38;5;241m.\u001b[39mTrainer(\n\u001b[1;32m 386\u001b[0m logger\u001b[38;5;241m=\u001b[39mloggers,\n\u001b[1;32m 387\u001b[0m callbacks\u001b[38;5;241m=\u001b[39mcallbacks,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 391\u001b[0m log_every_n_steps\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m,\n\u001b[1;32m 392\u001b[0m )\n\u001b[1;32m 394\u001b[0m \u001b[38;5;66;03m# fit trainer\u001b[39;00m\n\u001b[0;32m--> 395\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mT5Model\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdata_module\u001b[49m\u001b[43m)\u001b[49m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py:740\u001b[0m, in \u001b[0;36mTrainer.fit\u001b[0;34m(self, model, train_dataloaders, val_dataloaders, datamodule, train_dataloader, ckpt_path)\u001b[0m\n\u001b[1;32m 735\u001b[0m rank_zero_deprecation(\n\u001b[1;32m 736\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m`trainer.fit(train_dataloader)` is deprecated in v1.4 and will be removed in v1.6.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 737\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m Use `trainer.fit(train_dataloaders)` instead. HINT: added \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124ms\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 738\u001b[0m )\n\u001b[1;32m 739\u001b[0m train_dataloaders \u001b[38;5;241m=\u001b[39m train_dataloader\n\u001b[0;32m--> 740\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_and_handle_interrupt\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 741\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fit_impl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtrain_dataloaders\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mval_dataloaders\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdatamodule\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mckpt_path\u001b[49m\n\u001b[1;32m 742\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py:685\u001b[0m, in \u001b[0;36mTrainer._call_and_handle_interrupt\u001b[0;34m(self, trainer_fn, *args, **kwargs)\u001b[0m\n\u001b[1;32m 675\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 676\u001b[0m \u001b[38;5;124;03mError handling, intended to be used only for main trainer function entry points (fit, validate, test, predict)\u001b[39;00m\n\u001b[1;32m 677\u001b[0m \u001b[38;5;124;03mas all errors should funnel through them\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 682\u001b[0m \u001b[38;5;124;03m **kwargs: keyword arguments to be passed to `trainer_fn`\u001b[39;00m\n\u001b[1;32m 683\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 684\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 685\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtrainer_fn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 686\u001b[0m \u001b[38;5;66;03m# TODO: treat KeyboardInterrupt as BaseException (delete the code below) in v1.7\u001b[39;00m\n\u001b[1;32m 687\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyboardInterrupt\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exception:\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py:777\u001b[0m, in \u001b[0;36mTrainer._fit_impl\u001b[0;34m(self, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path)\u001b[0m\n\u001b[1;32m 775\u001b[0m \u001b[38;5;66;03m# TODO: ckpt_path only in v1.7\u001b[39;00m\n\u001b[1;32m 776\u001b[0m ckpt_path \u001b[38;5;241m=\u001b[39m ckpt_path \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mresume_from_checkpoint\n\u001b[0;32m--> 777\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mckpt_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mckpt_path\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 779\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mstopped\n\u001b[1;32m 780\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtraining \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py:1199\u001b[0m, in \u001b[0;36mTrainer._run\u001b[0;34m(self, model, ckpt_path)\u001b[0m\n\u001b[1;32m 1196\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcheckpoint_connector\u001b[38;5;241m.\u001b[39mresume_end()\n\u001b[1;32m 1198\u001b[0m \u001b[38;5;66;03m# dispatch `start_training` or `start_evaluating` or `start_predicting`\u001b[39;00m\n\u001b[0;32m-> 1199\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_dispatch\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1201\u001b[0m \u001b[38;5;66;03m# plugin will finalized fitting (e.g. ddp_spawn will load trained model)\u001b[39;00m\n\u001b[1;32m 1202\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_post_dispatch()\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py:1279\u001b[0m, in \u001b[0;36mTrainer._dispatch\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1277\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtraining_type_plugin\u001b[38;5;241m.\u001b[39mstart_predicting(\u001b[38;5;28mself\u001b[39m)\n\u001b[1;32m 1278\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1279\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtraining_type_plugin\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstart_training\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py:202\u001b[0m, in \u001b[0;36mTrainingTypePlugin.start_training\u001b[0;34m(self, trainer)\u001b[0m\n\u001b[1;32m 200\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mstart_training\u001b[39m(\u001b[38;5;28mself\u001b[39m, trainer: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpl.Trainer\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 201\u001b[0m \u001b[38;5;66;03m# double dispatch to initiate the training loop\u001b[39;00m\n\u001b[0;32m--> 202\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_results \u001b[38;5;241m=\u001b[39m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_stage\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py:1289\u001b[0m, in \u001b[0;36mTrainer.run_stage\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1287\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpredicting:\n\u001b[1;32m 1288\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_run_predict()\n\u001b[0;32m-> 1289\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run_train\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py:1319\u001b[0m, in \u001b[0;36mTrainer._run_train\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1317\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfit_loop\u001b[38;5;241m.\u001b[39mtrainer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\n\u001b[1;32m 1318\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mautograd\u001b[38;5;241m.\u001b[39mset_detect_anomaly(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_detect_anomaly):\n\u001b[0;32m-> 1319\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit_loop\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pytorch_lightning/loops/base.py:145\u001b[0m, in \u001b[0;36mLoop.run\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 143\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 144\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mon_advance_start(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m--> 145\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43madvance\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 146\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mon_advance_end()\n\u001b[1;32m 147\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrestarting \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pytorch_lightning/loops/fit_loop.py:234\u001b[0m, in \u001b[0;36mFitLoop.advance\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 231\u001b[0m data_fetcher \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtrainer\u001b[38;5;241m.\u001b[39m_data_connector\u001b[38;5;241m.\u001b[39mget_profiled_dataloader(dataloader)\n\u001b[1;32m 233\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtrainer\u001b[38;5;241m.\u001b[39mprofiler\u001b[38;5;241m.\u001b[39mprofile(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrun_training_epoch\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m--> 234\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mepoch_loop\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata_fetcher\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 236\u001b[0m \u001b[38;5;66;03m# the global step is manually decreased here due to backwards compatibility with existing loggers\u001b[39;00m\n\u001b[1;32m 237\u001b[0m \u001b[38;5;66;03m# as they expect that the same step is used when logging epoch end metrics even when the batch loop has\u001b[39;00m\n\u001b[1;32m 238\u001b[0m \u001b[38;5;66;03m# finished. this means the attribute does not exactly track the number of optimizer steps applied.\u001b[39;00m\n\u001b[1;32m 239\u001b[0m \u001b[38;5;66;03m# TODO(@carmocca): deprecate and rename so users don't get confused\u001b[39;00m\n\u001b[1;32m 240\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mglobal_step \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pytorch_lightning/loops/base.py:151\u001b[0m, in \u001b[0;36mLoop.run\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 148\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m:\n\u001b[1;32m 149\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[0;32m--> 151\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mon_run_end\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 152\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m output\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pytorch_lightning/loops/epoch/training_epoch_loop.py:298\u001b[0m, in \u001b[0;36mTrainingEpochLoop.on_run_end\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 295\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtrainer\u001b[38;5;241m.\u001b[39mfit_loop\u001b[38;5;241m.\u001b[39mepoch_progress\u001b[38;5;241m.\u001b[39mincrement_processed()\n\u001b[1;32m 297\u001b[0m \u001b[38;5;66;03m# call train epoch end hooks\u001b[39;00m\n\u001b[0;32m--> 298\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcall_hook\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mon_train_epoch_end\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 299\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtrainer\u001b[38;5;241m.\u001b[39mcall_hook(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mon_epoch_end\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 300\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtrainer\u001b[38;5;241m.\u001b[39mlogger_connector\u001b[38;5;241m.\u001b[39mon_epoch_end()\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py:1495\u001b[0m, in \u001b[0;36mTrainer.call_hook\u001b[0;34m(self, hook_name, pl_module, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1493\u001b[0m callback_fx \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mself\u001b[39m, hook_name, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m 1494\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcallable\u001b[39m(callback_fx):\n\u001b[0;32m-> 1495\u001b[0m \u001b[43mcallback_fx\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1497\u001b[0m \u001b[38;5;66;03m# next call hook in lightningModule\u001b[39;00m\n\u001b[1;32m 1498\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pytorch_lightning/trainer/callback_hook.py:93\u001b[0m, in \u001b[0;36mTrainerCallbackHookMixin.on_train_epoch_end\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Called when the epoch ends.\"\"\"\u001b[39;00m\n\u001b[1;32m 92\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m callback \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcallbacks:\n\u001b[0;32m---> 93\u001b[0m \u001b[43mcallback\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mon_train_epoch_end\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlightning_module\u001b[49m\u001b[43m)\u001b[49m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:321\u001b[0m, in \u001b[0;36mModelCheckpoint.on_train_epoch_end\u001b[0;34m(self, trainer, pl_module)\u001b[0m\n\u001b[1;32m 314\u001b[0m trainer\u001b[38;5;241m.\u001b[39mfit_loop\u001b[38;5;241m.\u001b[39mglobal_step \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 315\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 316\u001b[0m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_should_skip_saving_checkpoint(trainer)\n\u001b[1;32m 317\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_save_on_train_epoch_end\n\u001b[1;32m 318\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_every_n_epochs \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 319\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m (trainer\u001b[38;5;241m.\u001b[39mcurrent_epoch \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m) \u001b[38;5;241m%\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_every_n_epochs \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 320\u001b[0m ):\n\u001b[0;32m--> 321\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave_checkpoint\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtrainer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 322\u001b[0m trainer\u001b[38;5;241m.\u001b[39mfit_loop\u001b[38;5;241m.\u001b[39mglobal_step \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:398\u001b[0m, in \u001b[0;36mModelCheckpoint.save_checkpoint\u001b[0;34m(self, trainer)\u001b[0m\n\u001b[1;32m 396\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_save_top_k_checkpoint(trainer, monitor_candidates)\n\u001b[1;32m 397\u001b[0m \u001b[38;5;66;03m# Mode 2: save monitor=None checkpoints\u001b[39;00m\n\u001b[0;32m--> 398\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_save_none_monitor_checkpoint\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtrainer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmonitor_candidates\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 399\u001b[0m \u001b[38;5;66;03m# Mode 3: save last checkpoints\u001b[39;00m\n\u001b[1;32m 400\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_save_last_checkpoint(trainer, monitor_candidates)\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:696\u001b[0m, in \u001b[0;36mModelCheckpoint._save_none_monitor_checkpoint\u001b[0;34m(self, trainer, monitor_candidates)\u001b[0m\n\u001b[1;32m 694\u001b[0m \u001b[38;5;66;03m# set the best model path before saving because it will be part of the state.\u001b[39;00m\n\u001b[1;32m 695\u001b[0m previous, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbest_model_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbest_model_path, filepath\n\u001b[0;32m--> 696\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave_checkpoint\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave_weights_only\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 697\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msave_top_k \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m previous \u001b[38;5;129;01mand\u001b[39;00m previous \u001b[38;5;241m!=\u001b[39m filepath:\n\u001b[1;32m 698\u001b[0m trainer\u001b[38;5;241m.\u001b[39mtraining_type_plugin\u001b[38;5;241m.\u001b[39mremove_checkpoint(previous)\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py:1913\u001b[0m, in \u001b[0;36mTrainer.save_checkpoint\u001b[0;34m(self, filepath, weights_only)\u001b[0m\n\u001b[1;32m 1912\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21msave_checkpoint\u001b[39m(\u001b[38;5;28mself\u001b[39m, filepath: _PATH, weights_only: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1913\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcheckpoint_connector\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave_checkpoint\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mweights_only\u001b[49m\u001b[43m)\u001b[49m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/checkpoint_connector.py:478\u001b[0m, in \u001b[0;36mCheckpointConnector.save_checkpoint\u001b[0;34m(self, filepath, weights_only)\u001b[0m\n\u001b[1;32m 471\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Save model/training states as a checkpoint file through state-dump and file-write.\u001b[39;00m\n\u001b[1;32m 472\u001b[0m \n\u001b[1;32m 473\u001b[0m \u001b[38;5;124;03mArgs:\u001b[39;00m\n\u001b[1;32m 474\u001b[0m \u001b[38;5;124;03m filepath: write-target file's path\u001b[39;00m\n\u001b[1;32m 475\u001b[0m \u001b[38;5;124;03m weights_only: saving model weights only\u001b[39;00m\n\u001b[1;32m 476\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 477\u001b[0m _checkpoint \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdump_checkpoint(weights_only)\n\u001b[0;32m--> 478\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtraining_type_plugin\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave_checkpoint\u001b[49m\u001b[43m(\u001b[49m\u001b[43m_checkpoint\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfilepath\u001b[49m\u001b[43m)\u001b[49m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py:294\u001b[0m, in \u001b[0;36mTrainingTypePlugin.save_checkpoint\u001b[0;34m(self, checkpoint, filepath)\u001b[0m\n\u001b[1;32m 287\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Save model/training states as a checkpoint file through state-dump and file-write.\u001b[39;00m\n\u001b[1;32m 288\u001b[0m \n\u001b[1;32m 289\u001b[0m \u001b[38;5;124;03mArgs:\u001b[39;00m\n\u001b[1;32m 290\u001b[0m \u001b[38;5;124;03m checkpoint: dict containing model and trainer state\u001b[39;00m\n\u001b[1;32m 291\u001b[0m \u001b[38;5;124;03m filepath: write-target file's path\u001b[39;00m\n\u001b[1;32m 292\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 293\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mshould_rank_save_checkpoint:\n\u001b[0;32m--> 294\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcheckpoint_io\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave_checkpoint\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcheckpoint\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfilepath\u001b[49m\u001b[43m)\u001b[49m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pytorch_lightning/plugins/io/torch_plugin.py:37\u001b[0m, in \u001b[0;36mTorchCheckpointIO.save_checkpoint\u001b[0;34m(self, checkpoint, path, storage_options)\u001b[0m\n\u001b[1;32m 34\u001b[0m fs\u001b[38;5;241m.\u001b[39mmakedirs(os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mdirname(path), exist_ok\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 35\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 36\u001b[0m \u001b[38;5;66;03m# write the checkpoint dictionary on the file\u001b[39;00m\n\u001b[0;32m---> 37\u001b[0m \u001b[43matomic_save\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcheckpoint\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 38\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[1;32m 39\u001b[0m \u001b[38;5;66;03m# todo (sean): is this try catch necessary still?\u001b[39;00m\n\u001b[1;32m 40\u001b[0m \u001b[38;5;66;03m# https://github.com/PyTorchLightning/pytorch-lightning/pull/431\u001b[39;00m\n\u001b[1;32m 41\u001b[0m key \u001b[38;5;241m=\u001b[39m pl\u001b[38;5;241m.\u001b[39mLightningModule\u001b[38;5;241m.\u001b[39mCHECKPOINT_HYPER_PARAMS_KEY\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pytorch_lightning/utilities/cloud_io.py:70\u001b[0m, in \u001b[0;36matomic_save\u001b[0;34m(checkpoint, filepath)\u001b[0m\n\u001b[1;32m 68\u001b[0m torch\u001b[38;5;241m.\u001b[39msave(checkpoint, bytesbuffer)\n\u001b[1;32m 69\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m fsspec\u001b[38;5;241m.\u001b[39mopen(filepath, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwb\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[0;32m---> 70\u001b[0m \u001b[43mf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwrite\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbytesbuffer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetvalue\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/fsspec/implementations/local.py:369\u001b[0m, in \u001b[0;36mLocalFileOpener.write\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 368\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrite\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 369\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwrite\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n","\u001b[0;31mOSError\u001b[0m: [Errno 28] No space left on device"],"ename":"OSError","evalue":"[Errno 28] No space left on device","output_type":"error"}]},{"cell_type":"code","source":"import os\n\ndef remove_folder_contents(folder):\n for the_file in os.listdir(folder):\n file_path = os.path.join(folder, the_file)\n try:\n if os.path.isfile(file_path):\n os.unlink(file_path)\n elif os.path.isdir(file_path):\n remove_folder_contents(file_path)\n os.rmdir(file_path)\n except Exception as e:\n print(e)\n\nfolder_path = '/kaggle/working'\nremove_folder_contents(folder_path)\nos.rmdir(folder_path)","metadata":{"execution":{"iopub.status.busy":"2024-02-09T17:18:08.747776Z","iopub.execute_input":"2024-02-09T17:18:08.748644Z","iopub.status.idle":"2024-02-09T17:18:10.602533Z","shell.execute_reply.started":"2024-02-09T17:18:08.748609Z","shell.execute_reply":"2024-02-09T17:18:10.601223Z"},"trusted":true},"execution_count":28,"outputs":[{"traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)","Cell \u001b[0;32mIn[28], line 17\u001b[0m\n\u001b[1;32m 15\u001b[0m folder_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m/kaggle/working\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 16\u001b[0m remove_folder_contents(folder_path)\n\u001b[0;32m---> 17\u001b[0m \u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrmdir\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfolder_path\u001b[49m\u001b[43m)\u001b[49m\n","\u001b[0;31mOSError\u001b[0m: [Errno 16] Device or resource busy: '/kaggle/working'"],"ename":"OSError","evalue":"[Errno 16] Device or resource busy: '/kaggle/working'","output_type":"error"}]},{"cell_type":"code","source":"model.train(train_df=df_train,\n eval_df=df_train[80:82],\n source_max_token_len=128,\n target_max_token_len=50,\n batch_size=16, max_epochs=3, use_gpu=True,outputdir = 'trained_t5')","metadata":{"execution":{"iopub.status.busy":"2024-02-09T17:18:39.897372Z","iopub.execute_input":"2024-02-09T17:18:39.899050Z","iopub.status.idle":"2024-02-09T17:19:30.317362Z","shell.execute_reply.started":"2024-02-09T17:18:39.898973Z","shell.execute_reply":"2024-02-09T17:19:30.316449Z"},"trusted":true},"execution_count":29,"outputs":[{"output_type":"display_data","data":{"text/plain":"Validation sanity check: 0it [00:00, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"name":"stderr","text":"/opt/conda/lib/python3.10/site-packages/pytorch_lightning/trainer/data_loading.py:132: UserWarning: The dataloader, val_dataloader 0, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 4 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.\n rank_zero_warn(\n/opt/conda/lib/python3.10/site-packages/pytorch_lightning/trainer/data_loading.py:132: UserWarning: The dataloader, train_dataloader, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 4 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.\n rank_zero_warn(\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"Training: 0it [00:00, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"4db9c2c773a2403fab0c7840d51c5761"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Validating: 0it [00:00, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Validating: 0it [00:00, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Validating: 0it [00:00, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":""}},"metadata":{}}]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]}]}
|