diff --git "a/yarngpt/notebooks/train_YarnGPT_local.ipynb" "b/yarngpt/notebooks/train_YarnGPT_local.ipynb" new file mode 100644--- /dev/null +++ "b/yarngpt/notebooks/train_YarnGPT_local.ipynb" @@ -0,0 +1,7126 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Rxa73RyKnhy3", + "outputId": "8ef54b52-69c2-43e2-f2f5-6dd977e4401a" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting outetts\n", + " Downloading outetts-0.2.3-py3-none-any.whl.metadata (10 kB)\n", + "Collecting uroman\n", + " Downloading uroman-1.3.1.1-py3-none-any.whl.metadata (18 kB)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from outetts) (1.13.1)\n", + "Requirement already satisfied: einops in /usr/local/lib/python3.10/dist-packages (from outetts) (0.8.0)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from outetts) (6.0.2)\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from outetts) (0.27.1)\n", + "Collecting encodec (from outetts)\n", + " Downloading encodec-0.1.1.tar.gz (3.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.7/3.7 MB\u001b[0m \u001b[31m80.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from outetts) (3.10.0)\n", + "Requirement already satisfied: transformers>=4.46.1 in /usr/local/lib/python3.10/dist-packages (from outetts) (4.47.1)\n", + "Collecting pytorch-lightning (from outetts)\n", + " Downloading pytorch_lightning-2.5.0.post0-py3-none-any.whl.metadata (21 kB)\n", + "Collecting tensorboardX (from outetts)\n", + " Downloading tensorboardX-2.6.2.2-py2.py3-none-any.whl.metadata (5.8 kB)\n", + "Requirement already satisfied: soundfile in /usr/local/lib/python3.10/dist-packages (from outetts) (0.13.0)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from outetts) (1.26.4)\n", + "Collecting jsonargparse (from outetts)\n", + " Downloading jsonargparse-4.35.0-py3-none-any.whl.metadata (12 kB)\n", + "Collecting torchcrepe (from outetts)\n", + " Downloading torchcrepe-0.0.23-py3-none-any.whl.metadata (7.8 kB)\n", + "Requirement already satisfied: librosa in /usr/local/lib/python3.10/dist-packages (from outetts) (0.10.2.post1)\n", + "Collecting pesq (from outetts)\n", + " Downloading pesq-0.0.4.tar.gz (38 kB)\n", + " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: inflect in /usr/local/lib/python3.10/dist-packages (from outetts) (7.5.0)\n", + "Collecting loguru (from outetts)\n", + " Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)\n", + "Requirement already satisfied: polars in /usr/local/lib/python3.10/dist-packages (from outetts) (1.9.0)\n", + "Requirement already satisfied: natsort in /usr/local/lib/python3.10/dist-packages (from outetts) (8.4.0)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from outetts) (4.67.1)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from outetts) (2.32.3)\n", + "Collecting sounddevice (from outetts)\n", + " Downloading sounddevice-0.5.1-py3-none-any.whl.metadata (1.4 kB)\n", + "Collecting mecab-python3 (from outetts)\n", + " Downloading mecab_python3-1.0.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.2 kB)\n", + "Collecting unidic-lite (from outetts)\n", + " Downloading unidic-lite-1.0.8.tar.gz (47.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m47.4/47.4 MB\u001b[0m \u001b[31m45.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting openai-whisper>=20240930 (from outetts)\n", + " Downloading openai-whisper-20240930.tar.gz (800 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m800.5/800.5 kB\u001b[0m \u001b[31m57.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: regex>=2024.5.15 in /usr/local/lib/python3.10/dist-packages (from uroman) (2024.11.6)\n", + "Requirement already satisfied: numba in /usr/local/lib/python3.10/dist-packages (from openai-whisper>=20240930->outetts) (0.60.0)\n", + "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from openai-whisper>=20240930->outetts) (2.5.1+cu121)\n", + "Requirement already satisfied: more-itertools in /usr/local/lib/python3.10/dist-packages (from openai-whisper>=20240930->outetts) (10.5.0)\n", + "Collecting tiktoken (from openai-whisper>=20240930->outetts)\n", + " Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n", + "Collecting triton>=2.0.0 (from openai-whisper>=20240930->outetts)\n", + " Downloading triton-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.3 kB)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers>=4.46.1->outetts) (3.16.1)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.46.1->outetts) (24.2)\n", + "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.46.1->outetts) (0.21.0)\n", + "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.46.1->outetts) (0.5.1)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->outetts) (2024.10.0)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->outetts) (4.12.2)\n", + "Requirement already satisfied: torchaudio in /usr/local/lib/python3.10/dist-packages (from encodec->outetts) (2.5.1+cu121)\n", + "Requirement already satisfied: typeguard>=4.0.1 in /usr/local/lib/python3.10/dist-packages (from inflect->outetts) (4.4.1)\n", + "Requirement already satisfied: audioread>=2.1.9 in /usr/local/lib/python3.10/dist-packages (from librosa->outetts) (3.0.1)\n", + "Requirement already satisfied: scikit-learn>=0.20.0 in /usr/local/lib/python3.10/dist-packages (from librosa->outetts) (1.6.0)\n", + "Requirement already satisfied: joblib>=0.14 in /usr/local/lib/python3.10/dist-packages (from librosa->outetts) (1.4.2)\n", + "Requirement already satisfied: decorator>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from librosa->outetts) (4.4.2)\n", + "Requirement already satisfied: pooch>=1.1 in /usr/local/lib/python3.10/dist-packages (from librosa->outetts) (1.8.2)\n", + "Requirement already satisfied: soxr>=0.3.2 in /usr/local/lib/python3.10/dist-packages (from librosa->outetts) (0.5.0.post1)\n", + "Requirement already satisfied: lazy-loader>=0.1 in /usr/local/lib/python3.10/dist-packages (from librosa->outetts) (0.4)\n", + "Requirement already satisfied: msgpack>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa->outetts) (1.1.0)\n", + "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.10/dist-packages (from soundfile->outetts) (1.17.1)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->outetts) (1.3.1)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->outetts) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->outetts) (4.55.3)\n", + "Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->outetts) (1.4.8)\n", + "Requirement already satisfied: pillow>=8 in /usr/local/lib/python3.10/dist-packages (from matplotlib->outetts) (11.1.0)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->outetts) (3.2.1)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->outetts) (2.8.2)\n", + "Collecting torchmetrics>=0.7.0 (from pytorch-lightning->outetts)\n", + " Downloading torchmetrics-1.6.1-py3-none-any.whl.metadata (21 kB)\n", + "Collecting lightning-utilities>=0.10.0 (from pytorch-lightning->outetts)\n", + " Downloading lightning_utilities-0.11.9-py3-none-any.whl.metadata (5.2 kB)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->outetts) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->outetts) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->outetts) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->outetts) (2024.12.14)\n", + "Requirement already satisfied: protobuf>=3.20 in /usr/local/lib/python3.10/dist-packages (from tensorboardX->outetts) (4.25.5)\n", + "Collecting resampy (from torchcrepe->outetts)\n", + " Downloading resampy-0.4.3-py3-none-any.whl.metadata (3.0 kB)\n", + "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.0->soundfile->outetts) (2.22)\n", + "Requirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in /usr/local/lib/python3.10/dist-packages (from fsspec[http]>=2022.5.0->pytorch-lightning->outetts) (3.11.11)\n", + "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from lightning-utilities>=0.10.0->pytorch-lightning->outetts) (75.1.0)\n", + "Requirement already satisfied: llvmlite<0.44,>=0.43.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba->openai-whisper>=20240930->outetts) (0.43.0)\n", + "Requirement already satisfied: platformdirs>=2.5.0 in /usr/local/lib/python3.10/dist-packages (from pooch>=1.1->librosa->outetts) (4.3.6)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib->outetts) (1.17.0)\n", + "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.20.0->librosa->outetts) (3.5.0)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->openai-whisper>=20240930->outetts) (3.4.2)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->openai-whisper>=20240930->outetts) (3.1.5)\n", + "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.10/dist-packages (from torch->openai-whisper>=20240930->outetts) (1.13.1)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy==1.13.1->torch->openai-whisper>=20240930->outetts) (1.3.0)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]>=2022.5.0->pytorch-lightning->outetts) (2.4.4)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]>=2022.5.0->pytorch-lightning->outetts) (1.3.2)\n", + "Requirement already satisfied: async-timeout<6.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]>=2022.5.0->pytorch-lightning->outetts) (4.0.3)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]>=2022.5.0->pytorch-lightning->outetts) (24.3.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]>=2022.5.0->pytorch-lightning->outetts) (1.5.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]>=2022.5.0->pytorch-lightning->outetts) (6.1.0)\n", + "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]>=2022.5.0->pytorch-lightning->outetts) (0.2.1)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]>=2022.5.0->pytorch-lightning->outetts) (1.18.3)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->openai-whisper>=20240930->outetts) (3.0.2)\n", + "Downloading outetts-0.2.3-py3-none-any.whl (125 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m125.1/125.1 kB\u001b[0m \u001b[31m10.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading uroman-1.3.1.1-py3-none-any.whl (930 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m930.7/930.7 kB\u001b[0m \u001b[31m57.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading jsonargparse-4.35.0-py3-none-any.whl (211 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.0/211.0 kB\u001b[0m \u001b[31m21.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading loguru-0.7.3-py3-none-any.whl (61 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.6/61.6 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading mecab_python3-1.0.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (581 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m581.7/581.7 kB\u001b[0m \u001b[31m42.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pytorch_lightning-2.5.0.post0-py3-none-any.whl (819 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m819.3/819.3 kB\u001b[0m \u001b[31m50.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading sounddevice-0.5.1-py3-none-any.whl (32 kB)\n", + "Downloading tensorboardX-2.6.2.2-py2.py3-none-any.whl (101 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m101.7/101.7 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading torchcrepe-0.0.23-py3-none-any.whl (72.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m72.3/72.3 MB\u001b[0m \u001b[31m30.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading lightning_utilities-0.11.9-py3-none-any.whl (28 kB)\n", + "Downloading torchmetrics-1.6.1-py3-none-any.whl (927 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m927.3/927.3 kB\u001b[0m \u001b[31m55.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading triton-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (209.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m209.5/209.5 MB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading resampy-0.4.3-py3-none-any.whl (3.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m95.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m66.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hBuilding wheels for collected packages: openai-whisper, encodec, pesq, unidic-lite\n", + " Building wheel for openai-whisper (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for openai-whisper: filename=openai_whisper-20240930-py3-none-any.whl size=803373 sha256=dd63697d5f2380f1444bc5fe1dc31a3f87a270ec30cb9b937637ec567e330f74\n", + " Stored in directory: /root/.cache/pip/wheels/dd/4a/1f/d1c4bf3b9133c8168fe617ed979cab7b14fe381d059ffb9d83\n", + " Building wheel for encodec (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for encodec: filename=encodec-0.1.1-py3-none-any.whl size=45760 sha256=adf14d590c9e74786104dff1762303f43bc6c752e3f1236446ae6305cb515579\n", + " Stored in directory: /root/.cache/pip/wheels/fc/36/cb/81af8b985a5f5e0815312d5e52b41263237af07b977e6bcbf3\n", + " Building wheel for pesq (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for pesq: filename=pesq-0.0.4-cp310-cp310-linux_x86_64.whl size=262943 sha256=f0d6f54d68b8b9288a2fc6c590cd6bdbeb81726e2e20d5f2b8c0247d4a6f070b\n", + " Stored in directory: /root/.cache/pip/wheels/c5/4e/2c/251524370c0fdd659e99639a0fbd0ca5a782c3aafcd456b28d\n", + " Building wheel for unidic-lite (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for unidic-lite: filename=unidic_lite-1.0.8-py3-none-any.whl size=47658818 sha256=e89007bef2e730232d99961e7a6c9bca61d80047f53366475e01b6c78a72aae1\n", + " Stored in directory: /root/.cache/pip/wheels/89/e8/68/f9ac36b8cc6c8b3c96888cd57434abed96595d444f42243853\n", + "Successfully built openai-whisper encodec pesq unidic-lite\n", + "Installing collected packages: unidic-lite, pesq, mecab-python3, uroman, triton, tensorboardX, loguru, lightning-utilities, jsonargparse, tiktoken, sounddevice, resampy, torchmetrics, openai-whisper, torchcrepe, encodec, pytorch-lightning, outetts\n", + "Successfully installed encodec-0.1.1 jsonargparse-4.35.0 lightning-utilities-0.11.9 loguru-0.7.3 mecab-python3-1.0.10 openai-whisper-20240930 outetts-0.2.3 pesq-0.0.4 pytorch-lightning-2.5.0.post0 resampy-0.4.3 sounddevice-0.5.1 tensorboardX-2.6.2.2 tiktoken-0.8.0 torchcrepe-0.0.23 torchmetrics-1.6.1 triton-3.1.0 unidic-lite-1.0.8 uroman-1.3.1.1\n" + ] + } + ], + "source": [ + "pip install outetts uroman" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "HgJjekSOT8iX", + "outputId": "2f0873ae-60ff-49a3-eb76-f82c36fe2390" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting datasets\n", + " Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)\n", + "Requirement already satisfied: triton in /usr/local/lib/python3.10/dist-packages (3.1.0)\n", + "Collecting snac\n", + " Downloading snac-1.2.1-py3-none-any.whl.metadata (3.5 kB)\n", + "Requirement already satisfied: wandb in /usr/local/lib/python3.10/dist-packages (0.19.1)\n", + "Requirement already satisfied: accelerate in /usr/local/lib/python3.10/dist-packages (1.2.1)\n", + "Collecting torchdata\n", + " Downloading torchdata-0.10.1-py3-none-any.whl.metadata (6.3 kB)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from datasets) (3.16.1)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (1.26.4)\n", + "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (17.0.0)\n", + "Collecting dill<0.3.9,>=0.3.0 (from datasets)\n", + " Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (2.2.2)\n", + "Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.10/dist-packages (from datasets) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.66.3 in /usr/local/lib/python3.10/dist-packages (from datasets) (4.67.1)\n", + "Collecting xxhash (from datasets)\n", + " Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n", + "Collecting multiprocess<0.70.17 (from datasets)\n", + " Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)\n", + "Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)\n", + " Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)\n", + "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.11.11)\n", + "Requirement already satisfied: huggingface-hub>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.27.1)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets) (24.2)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (6.0.2)\n", + "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from snac) (2.5.1+cu121)\n", + "Requirement already satisfied: einops in /usr/local/lib/python3.10/dist-packages (from snac) (0.8.0)\n", + "Requirement already satisfied: click!=8.0.0,>=7.1 in /usr/local/lib/python3.10/dist-packages (from wandb) (8.1.8)\n", + "Requirement already satisfied: docker-pycreds>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from wandb) (0.4.0)\n", + "Requirement already satisfied: gitpython!=3.1.29,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from wandb) (3.1.44)\n", + "Requirement already satisfied: platformdirs in /usr/local/lib/python3.10/dist-packages (from wandb) (4.3.6)\n", + "Requirement already satisfied: protobuf!=4.21.0,!=5.28.0,<6,>=3.19.0 in /usr/local/lib/python3.10/dist-packages (from wandb) (4.25.5)\n", + "Requirement already satisfied: psutil>=5.0.0 in /usr/local/lib/python3.10/dist-packages (from wandb) (5.9.5)\n", + "Requirement already satisfied: pydantic<3,>=2.6 in /usr/local/lib/python3.10/dist-packages (from wandb) (2.10.4)\n", + "Requirement already satisfied: sentry-sdk>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from wandb) (2.19.2)\n", + "Requirement already satisfied: setproctitle in /usr/local/lib/python3.10/dist-packages (from wandb) (1.3.4)\n", + "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from wandb) (75.1.0)\n", + "Requirement already satisfied: typing-extensions<5,>=4.4 in /usr/local/lib/python3.10/dist-packages (from wandb) (4.12.2)\n", + "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.10/dist-packages (from accelerate) (0.5.1)\n", + "Requirement already satisfied: urllib3>=1.25 in /usr/local/lib/python3.10/dist-packages (from torchdata) (2.3.0)\n", + "Requirement already satisfied: six>=1.4.0 in /usr/local/lib/python3.10/dist-packages (from docker-pycreds>=0.4.0->wandb) (1.17.0)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (2.4.4)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.2)\n", + "Requirement already satisfied: async-timeout<6.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (24.3.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.5.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.1.0)\n", + "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (0.2.1)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.18.3)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.10/dist-packages (from gitpython!=3.1.29,>=1.0.0->wandb) (4.0.12)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=2.6->wandb) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=2.6->wandb) (2.27.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets) (3.10)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets) (2024.12.14)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->snac) (3.4.2)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->snac) (3.1.5)\n", + "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.10/dist-packages (from torch->snac) (1.13.1)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy==1.13.1->torch->snac) (1.3.0)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2024.2)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.10/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.29,>=1.0.0->wandb) (5.0.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->snac) (3.0.2)\n", + "Downloading datasets-3.2.0-py3-none-any.whl (480 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m480.6/480.6 kB\u001b[0m \u001b[31m35.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading snac-1.2.1-py3-none-any.whl (8.4 kB)\n", + "Downloading torchdata-0.10.1-py3-none-any.whl (57 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.5/57.5 kB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m11.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (179 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.3/179.3 kB\u001b[0m \u001b[31m17.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m13.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m19.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: xxhash, fsspec, dill, multiprocess, torchdata, snac, datasets\n", + " Attempting uninstall: fsspec\n", + " Found existing installation: fsspec 2024.10.0\n", + " Uninstalling fsspec-2024.10.0:\n", + " Successfully uninstalled fsspec-2024.10.0\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed datasets-3.2.0 dill-0.3.8 fsspec-2024.9.0 multiprocess-0.70.16 snac-1.2.1 torchdata-0.10.1 xxhash-3.5.0\n" + ] + } + ], + "source": [ + "!pip install datasets triton snac wandb accelerate torchdata" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "m4uPM3IpnsEo", + "outputId": "63a19431-04b3-49d3-da29-1119152ed72e" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-01-13 08:34:21.368\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36moutetts.version.v1.interface\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m21\u001b[0m - \u001b[31m\u001b[1mPortAudio library not found\u001b[0m\n", + "\u001b[32m2025-01-13 08:34:21.370\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36moutetts.version.v1.interface\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m22\u001b[0m - \u001b[33m\u001b[1mFailed to import sounddevice. Audio playback is disabled.\u001b[0m\n" + ] + } + ], + "source": [ + "from outetts.wav_tokenizer.decoder import WavTokenizer\n", + "from outetts.wav_tokenizer.encoder.utils import convert_audio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "543a-ZmC7xjE", + "outputId": "7b8f7b74-991f-4680-c930-39511544f3af" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mounted at /content/drive\n" + ] + } + ], + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/drive')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EVyBedbQUM3F" + }, + "outputs": [], + "source": [ + "\n", + "\n", + "import os\n", + "import torch\n", + "import time\n", + "import numpy as np\n", + "import torchaudio\n", + "#from snac import SNAC\n", + "from tqdm import tqdm\n", + "import huggingface_hub\n", + "import shutil\n", + "import soundfile as sf\n", + "from torch.utils.data import DataLoader, Dataset\n", + "from transformers import AdamW, get_linear_schedule_with_warmup, DataCollatorWithPadding\n", + "from datasets import load_dataset, concatenate_datasets, Audio, load_from_disk, interleave_datasets,Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Z8LFkziTgFRf" + }, + "outputs": [], + "source": [ + "import torchaudio\n", + "import torch\n", + "import torchaudio.functional as F\n", + "import inflect\n", + "import re\n", + "import uroman as ur\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 17, + "referenced_widgets": [ + "70f2b1a35f414c798a8300c74e1d1be0", + "02848bb5b7494a4fa7fa9a05aa4ac2bc" + ] + }, + "id": "DN19SQCOUc6m", + "outputId": "f602c414-4208-4605-d57b-a0d4a7ce0fff" + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "70f2b1a35f414c798a8300c74e1d1be0", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(HTML(value='
\",\n", + " \"<|igbo|>\",\n", + " \"<|yoruba|>\",])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-i0n61YJ0uTc" + }, + "outputs": [], + "source": [ + "#tokenizer(\"<|yoruba|>\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "OJk2i5urKIec", + "outputId": "56ba5483-516f-422d-e36b-4022e579bd35" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tokenizer.eos_token_id" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6BPy5GpEKGP_" + }, + "outputs": [], + "source": [ + "tokenizer.pad_token_id=0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "N90blgKsHJo6", + "outputId": "846de4c2-95fc-4c1a-99db-afb99367ff6d" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "52186" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(tokenizer)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1yjGTRLMWI26" + }, + "outputs": [], + "source": [ + "model=torch.compile(model.to(device))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QOxVQVeZWL1d", + "outputId": "6a51f7c3-ae45-46e0-a661-b1277b89cde6" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "731.510784" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.get_memory_footprint()/ 1e6" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zqe1ZczmWP1b", + "outputId": "8c777d86-4110-4205-8df7-6ac5c94102b4" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "365753280" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.num_parameters()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gRdr07gcLN7H" + }, + "outputs": [], + "source": [ + "train_data=pd.read_csv(\"/content/drive/MyDrive/naij_tokenized/final_all_lang.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 424 + }, + "id": "t9rddLyYLgnh", + "outputId": "cb29fdcc-ea2f-400b-d674-5641518038f0" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "train_data" + }, + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0ttslength
00<|im_start|>\\n<|text_start|>awako<|text_sep|>w...579
11<|im_start|>\\n<|text_start|>ririn<|text_sep|>i...603
22<|im_start|>\\n<|text_start|>akanti<|text_sep|>...346
33<|im_start|>\\n<|text_start|>a<|text_sep|>maa<|...450
44<|im_start|>\\n<|text_start|>enikeni<|text_sep|...345
............
1929271995<|im_start|>\\n<|text_start|>jide<|text_sep|>yo...345
1929272996<|im_start|>\\n<|text_start|>mi<|text_sep|>o<|t...233
1929273997<|im_start|>\\n<|text_start|>sola<|text_sep|>fe...277
1929274998<|im_start|>\\n<|text_start|>beeni<|text_sep|>m...250
1929275999<|im_start|>\\n<|text_start|>obe<|text_sep|>ele...277
\n", + "

1929276 rows × 3 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "text/plain": [ + " Unnamed: 0 tts length\n", + "0 0 <|im_start|>\\n<|text_start|>awako<|text_sep|>w... 579\n", + "1 1 <|im_start|>\\n<|text_start|>ririn<|text_sep|>i... 603\n", + "2 2 <|im_start|>\\n<|text_start|>akanti<|text_sep|>... 346\n", + "3 3 <|im_start|>\\n<|text_start|>a<|text_sep|>maa<|... 450\n", + "4 4 <|im_start|>\\n<|text_start|>enikeni<|text_sep|... 345\n", + "... ... ... ...\n", + "1929271 995 <|im_start|>\\n<|text_start|>jide<|text_sep|>yo... 345\n", + "1929272 996 <|im_start|>\\n<|text_start|>mi<|text_sep|>o<|t... 233\n", + "1929273 997 <|im_start|>\\n<|text_start|>sola<|text_sep|>fe... 277\n", + "1929274 998 <|im_start|>\\n<|text_start|>beeni<|text_sep|>m... 250\n", + "1929275 999 <|im_start|>\\n<|text_start|>obe<|text_sep|>ele... 277\n", + "\n", + "[1929276 rows x 3 columns]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "1oYlD-iTIFXw", + "outputId": "f0d3b529-d6ab-4570-8ea2-7c3f34b25081" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(1929276, 3)" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_data.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mR8OImavy1Z5", + "outputId": "1482fcfc-41f6-4529-af5c-aa848821ebda" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "3462" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_data[\"length\"].max()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AFTfQ4RHIyw8", + "outputId": "01e8262a-4981-4cfd-da18-c4f72e99afa6" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(1929276, 3)" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_data.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nDWFPH4YI4q-" + }, + "outputs": [], + "source": [ + "#train_data=train_data.reset_index(drop=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "t0QWUWIlI-U9" + }, + "outputs": [], + "source": [ + "\n", + "from datasets import Dataset\n", + "train_dataset=Dataset.from_pandas(train_data[[\"tts\"]])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QepGS-a8qSJL" + }, + "outputs": [], + "source": [ + "train_dataset=train_dataset.shuffle()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ToMeB3qx6i_V" + }, + "outputs": [], + "source": [ + "train_dataset=train_dataset.shuffle()" + ] + }, + { + "cell_type": "code", + "source": [ + "train_dataset=train_dataset.shuffle()" + ], + "metadata": { + "id": "t_pjjZL194-5" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "train_dataset=train_dataset.shuffle()" + ], + "metadata": { + "id": "mDN0r319953f" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Eoxv09xaGsM_" + }, + "outputs": [], + "source": [ + "from torch.utils.data import DataLoader, Dataset\n", + "class YarnDataset(Dataset):\n", + " def __init__(self,dataset):\n", + " self.ds = dataset\n", + " super().__init__()\n", + "\n", + " def __len__(self):\n", + " return len(self.ds)\n", + "\n", + "\n", + " def __getitem__(self, idx):\n", + " prompt=self.ds[idx][\"tts\"]\n", + " #print(prompt)\n", + " return tokenizer(prompt,)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PdiQt7_Ctlbb" + }, + "outputs": [], + "source": [ + "yarn_dataset = YarnDataset(train_dataset)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QJBKk_oXQ3HP" + }, + "outputs": [], + "source": [ + "batch_size=4\n", + "learning_rate=1e-3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qPkuxPZIO1dP" + }, + "outputs": [], + "source": [ + "# Initialize data collator\n", + "data_collator = DataCollatorWithPadding(tokenizer=tokenizer)\n", + "\n", + "# Create DataLoader with collate_fn using data collator\n", + "dataloader = DataLoader(\n", + " yarn_dataset,\n", + " batch_size=batch_size,\n", + " collate_fn=data_collator,shuffle=True # Automatically handles padding\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XHZZVQBQRaXi", + "outputId": "03404215-aa7d-4672-b24b-b8341a747c51" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1929276" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(yarn_dataset)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mpqTRQC50jEy", + "outputId": "e99d7e6f-3334-45b0-f658-6e6a960a4798" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'input_ids': [1, 198, 49152, 14765, 49158, 721, 518, 49158, 10374, 49158, 270, 101, 49158, 93, 49158, 5568, 518, 49158, 3250, 49153, 198, 52184, 198, 49154, 198, 14765, 51291, 49156, 49315, 49378, 49378, 49277, 50451, 49725, 49876, 50336, 49760, 50640, 50226, 49810, 50526, 50135, 49840, 50293, 50282, 50438, 50770, 49707, 50289, 50587, 50532, 50213, 50182, 50464, 50607, 50532, 50258, 50864, 50126, 50648, 49579, 50280, 50519, 49326, 50382, 49839, 50140, 50382, 49958, 50980, 49874, 50473, 50098, 50187, 50135, 50154, 50481, 49710, 49825, 50025, 49595, 49433, 49315, 49324, 49612, 49612, 49612, 49612, 49403, 49378, 50716, 50021, 50726, 50936, 49280, 50774, 49785, 50481, 50469, 50755, 50334, 50070, 50420, 49961, 50269, 50465, 49945, 50283, 49526, 49157, 198, 721, 518, 51237, 49156, 50445, 50343, 49918, 50379, 50003, 49859, 49258, 50792, 49542, 50467, 50190, 50103, 50485, 50193, 50904, 50443, 50653, 49583, 49772, 49667, 50329, 50269, 49715, 50646, 49659, 50228, 49665, 49207, 49739, 50023, 49389, 50360, 49817, 49276, 49581, 49307, 49196, 50079, 49573, 49855, 49191, 49157, 198, 10374, 51201, 49156, 50878, 49461, 50234, 49292, 49210, 49857, 49695, 49452, 49289, 49195, 50586, 49553, 49746, 49157, 198, 270, 101, 44, 108, 100, 79, 32, 30, 34, 32, 108, 46, 49156, 49906, 49269, 50053, 49216, 49865, 50574, 49336, 50777, 49906, 50269, 49673, 50004, 49359, 49805, 50380, 49157, 198, 93, 51219, 49156, 49942, 49497, 49216, 50689, 50854, 50931, 50395, 50226, 50370, 50251, 50468, 50410, 50344, 50556, 49540, 50280, 50480, 50565, 50190, 50364, 50943, 50040, 49866, 50782, 50945, 49930, 50941, 49157, 198, 5568, 518, 51221, 49156, 50167, 49832, 49821, 50893, 50012, 49669, 50338, 49667, 50427, 49317, 49159, 50157, 49474, 50075, 49638, 50143, 49456, 50846, 49586, 49790, 50067, 49341, 50615, 50072, 50017, 50434, 50777, 50194, 50771, 49157, 198, 3250, 51191, 49156, 50037, 50593, 50243, 50090, 50338, 50440, 49157, 198, 49155, 198, 2, 198], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "yarn_dataset[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "87LfDjTFyj6y", + "outputId": "f98d4b12-13cb-4ae1-898b-a9080a084e28" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "482319" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(dataloader)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_NJ4gP3_2Cp-", + "outputId": "a3ddfd13-1e34-4686-eb8a-cdcd4655bac7" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "942.029296875" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "482319/512" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dUtacXKt2Iw1", + "outputId": "d800f7b0-6785-4e46-8ddd-522acf58b7ef" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1250.0" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "640000/512" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "M340iQMK2PiF", + "outputId": "f61ad845-1472-4cbe-b02a-63e0271aa6d8" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "942.029296875" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "1929276/(512*4)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xfpWBncA2dK9", + "outputId": "27fe6dd5-34ff-40bc-92fc-e3540ffe3857" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "942.029296875" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "482319/512" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1Ez7HQd7GFEA" + }, + "outputs": [], + "source": [ + "from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts\n", + "from torch.optim.lr_scheduler import LambdaLR\n", + "from transformers import get_linear_schedule_with_warmup,get_cosine_schedule_with_warmup,get_constant_schedule_with_warmup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "kBWFROqbzO3h" + }, + "outputs": [], + "source": [ + "def get_lr_lambda(step):\n", + " if step < lr_warmup_steps:\n", + " # Linear warmup\n", + " return step / lr_warmup_steps\n", + " elif step >=(num_decay_start):\n", + " return 1-(step-num_decay_start)/(num_training_steps-num_decay_start)\n", + " else:\n", + " # Constant learning rate\n", + " return 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "18ObtWm1Ryr3" + }, + "outputs": [], + "source": [ + "#0.2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AVI3iLgUbYnW", + "outputId": "9e2b09ad-b2e6-4699-b834-c75674b927bc" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/transformers/optimization.py:591: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "OptimizedModule(\n", + " (_orig_mod): LlamaForCausalLM(\n", + " (model): LlamaModel(\n", + " (embed_tokens): Embedding(53248, 960)\n", + " (layers): ModuleList(\n", + " (0-31): 32 x LlamaDecoderLayer(\n", + " (self_attn): LlamaSdpaAttention(\n", + " (q_proj): Linear(in_features=960, out_features=960, bias=False)\n", + " (k_proj): Linear(in_features=960, out_features=320, bias=False)\n", + " (v_proj): Linear(in_features=960, out_features=320, bias=False)\n", + " (o_proj): Linear(in_features=960, out_features=960, bias=False)\n", + " (rotary_emb): LlamaRotaryEmbedding()\n", + " )\n", + " (mlp): LlamaMLP(\n", + " (gate_proj): Linear(in_features=960, out_features=2560, bias=False)\n", + " (up_proj): Linear(in_features=960, out_features=2560, bias=False)\n", + " (down_proj): Linear(in_features=2560, out_features=960, bias=False)\n", + " (act_fn): SiLU()\n", + " )\n", + " (input_layernorm): LlamaRMSNorm((960,), eps=1e-05)\n", + " (post_attention_layernorm): LlamaRMSNorm((960,), eps=1e-05)\n", + " )\n", + " )\n", + " (norm): LlamaRMSNorm((960,), eps=1e-05)\n", + " (rotary_emb): LlamaRotaryEmbedding()\n", + " )\n", + " (lm_head): Linear(in_features=960, out_features=53248, bias=False)\n", + " )\n", + ")" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "num_epochs=2\n", + "optimizer = AdamW(model.parameters(), lr=learning_rate, betas=(0.9, 0.95),weight_decay=0.01)\n", + "lr_warmup_steps=200\n", + "\n", + "num_training_steps=1255*num_epochs\n", + "num_decay_start=50#num_training_steps#-20\n", + "#scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=T_0, T_mult=T_mult, eta_min=eta_min)\n", + "#scheduler = # Create LambdaLR scheduler\n", + "scheduler = get_constant_schedule_with_warmup(optimizer,num_warmup_steps=lr_warmup_steps)#LambdaLR(optimizer, lr_lambda=get_lr_lambda) #get_constant_schedule_with_warmup(optimizer,num_warmup_steps=10)#\n", + "global_step = 0\n", + "accumulation_steps = int(512/batch_size)#32\n", + "model.train()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "j9vN0iRPYbRa" + }, + "outputs": [], + "source": [ + "new_checkpoint=\"saheedniyi/YarnGPT-local\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 17, + "referenced_widgets": [ + "3d687f99d4564f4e9efc1f988f5d6799", + "5625d8053fb64e00a587464d8800a25c" + ] + }, + "id": "V0k8jG6Q2iMP", + "outputId": "c8665e1c-e876-48de-cd97-82a25086ff0e" + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "3d687f99d4564f4e9efc1f988f5d6799", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(HTML(value='
:1: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n", + " checkpoint=torch.load(\"/content/drive/MyDrive/YarnGPT_naij/final_{epoch}epoch.pt\")\n" + ] + }, + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: '/content/drive/MyDrive/YarnGPT_naij/final_{epoch}epoch.pt'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mcheckpoint\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"/content/drive/MyDrive/YarnGPT_naij/final_{epoch}epoch.pt\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_state_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcheckpoint\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'optimizer_state_dict'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;31m#model.load_state_dict(checkpoint['model_state_dict'])\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/serialization.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(f, map_location, pickle_module, weights_only, mmap, **pickle_load_args)\u001b[0m\n\u001b[1;32m 1317\u001b[0m \u001b[0mpickle_load_args\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"encoding\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"utf-8\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1318\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1319\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0m_open_file_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"rb\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mopened_file\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1320\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m_is_zipfile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopened_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1321\u001b[0m \u001b[0;31m# The zipfile reader is going to advance the current file position.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/serialization.py\u001b[0m in \u001b[0;36m_open_file_like\u001b[0;34m(name_or_buffer, mode)\u001b[0m\n\u001b[1;32m 657\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_open_file_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 658\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m_is_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname_or_buffer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 659\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_open_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 660\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 661\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m\"w\"\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/serialization.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, name, mode)\u001b[0m\n\u001b[1;32m 638\u001b[0m \u001b[0;32mclass\u001b[0m \u001b[0m_open_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_opener\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 639\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 640\u001b[0;31m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 641\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 642\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__exit__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '/content/drive/MyDrive/YarnGPT_naij/final_{epoch}epoch.pt'" + ] + } + ], + "source": [ + "\n", + "checkpoint=torch.load(\"/content/drive/MyDrive/YarnGPT_naij/final_1epoch.pt\")\n", + "optimizer.load_state_dict(checkpoint['optimizer_state_dict'])\n", + "#model.load_state_dict(checkpoint['model_state_dict'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "eP0TyvltVUpO" + }, + "outputs": [], + "source": [ + "device" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JVpdMTfeKJzL", + "outputId": "53e282a6-80cf-4ee7-a40b-ac94d02a9399" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "482319" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(dataloader)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "background_save": true, + "base_uri": "https://localhost:8080/" + }, + "id": "G1qPorNycNvM", + "outputId": "7d92557d-2086-4769-aa16-db4f40f3db4b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'loss': '2.3553764820098877', 'num_iter': 512, 'lr': 5e-06, 'time': '518.3966403007507 Seconds', 'norm': 0.37109375}\n", + "{'loss': '2.3886661529541016', 'num_iter': 1024, 'lr': 1e-05, 'time': '8.709006786346436 Seconds', 'norm': 0.408203125}\n", + "{'loss': '2.3773303031921387', 'num_iter': 1536, 'lr': 1.5e-05, 'time': '7.984138488769531 Seconds', 'norm': 0.390625}\n", + "{'loss': '2.393421173095703', 'num_iter': 2048, 'lr': 2e-05, 'time': '7.874379634857178 Seconds', 'norm': 0.345703125}\n", + "{'loss': '2.3772716522216797', 'num_iter': 2560, 'lr': 2.5e-05, 'time': '8.141849756240845 Seconds', 'norm': 0.34765625}\n", + "{'loss': '2.2777481079101562', 'num_iter': 3072, 'lr': 3e-05, 'time': '8.402084350585938 Seconds', 'norm': 0.376953125}\n", + "{'loss': '2.374420166015625', 'num_iter': 3584, 'lr': 3.5000000000000004e-05, 'time': '8.258236646652222 Seconds', 'norm': 0.34375}\n", + "{'loss': '2.426032066345215', 'num_iter': 4096, 'lr': 4e-05, 'time': '7.905863523483276 Seconds', 'norm': 0.3515625}\n", + "{'loss': '2.38438081741333', 'num_iter': 4608, 'lr': 4.4999999999999996e-05, 'time': '8.373307228088379 Seconds', 'norm': 0.322265625}\n", + "{'loss': '2.4124197959899902', 'num_iter': 5120, 'lr': 5e-05, 'time': '8.07342004776001 Seconds', 'norm': 0.31640625}\n", + "{'loss': '2.353393077850342', 'num_iter': 5632, 'lr': 5.5e-05, 'time': '8.879180669784546 Seconds', 'norm': 0.26953125}\n", + "{'loss': '2.435251235961914', 'num_iter': 6144, 'lr': 6e-05, 'time': '7.773240566253662 Seconds', 'norm': 0.279296875}\n", + "{'loss': '2.3781023025512695', 'num_iter': 6656, 'lr': 6.500000000000001e-05, 'time': '8.137864828109741 Seconds', 'norm': 0.2001953125}\n", + "{'loss': '2.3647618293762207', 'num_iter': 7168, 'lr': 7.000000000000001e-05, 'time': '8.527109384536743 Seconds', 'norm': 0.22265625}\n", + "{'loss': '2.4120516777038574', 'num_iter': 7680, 'lr': 7.5e-05, 'time': '7.987094879150391 Seconds', 'norm': 0.1982421875}\n", + "{'loss': '2.3994290828704834', 'num_iter': 8192, 'lr': 8e-05, 'time': '8.160203218460083 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.3718833923339844', 'num_iter': 8704, 'lr': 8.5e-05, 'time': '8.707377672195435 Seconds', 'norm': 0.1728515625}\n", + "{'loss': '2.3764662742614746', 'num_iter': 9216, 'lr': 8.999999999999999e-05, 'time': '8.56162166595459 Seconds', 'norm': 0.1669921875}\n", + "{'loss': '2.3861515522003174', 'num_iter': 9728, 'lr': 9.5e-05, 'time': '8.259408950805664 Seconds', 'norm': 0.16796875}\n", + "{'loss': '2.361534357070923', 'num_iter': 10240, 'lr': 0.0001, 'time': '8.57894253730774 Seconds', 'norm': 0.17578125}\n", + "{'loss': '2.3970224857330322', 'num_iter': 10752, 'lr': 0.000105, 'time': '8.104854822158813 Seconds', 'norm': 0.1865234375}\n", + "{'loss': '2.3602302074432373', 'num_iter': 11264, 'lr': 0.00011, 'time': '8.286742687225342 Seconds', 'norm': 0.1708984375}\n", + "{'loss': '2.32814621925354', 'num_iter': 11776, 'lr': 0.000115, 'time': '8.555330038070679 Seconds', 'norm': 0.197265625}\n", + "{'loss': '2.3199210166931152', 'num_iter': 12288, 'lr': 0.00012, 'time': '8.61067271232605 Seconds', 'norm': 0.1865234375}\n", + "{'loss': '2.3607125282287598', 'num_iter': 12800, 'lr': 0.000125, 'time': '8.1532621383667 Seconds', 'norm': 0.1708984375}\n", + "{'loss': '2.4452242851257324', 'num_iter': 13312, 'lr': 0.00013000000000000002, 'time': '7.863337755203247 Seconds', 'norm': 0.1796875}\n", + "{'loss': '2.411501407623291', 'num_iter': 13824, 'lr': 0.000135, 'time': '8.207067966461182 Seconds', 'norm': 0.162109375}\n", + "{'loss': '2.358992099761963', 'num_iter': 14336, 'lr': 0.00014000000000000001, 'time': '8.41010856628418 Seconds', 'norm': 0.1494140625}\n", + "{'loss': '2.3445119857788086', 'num_iter': 14848, 'lr': 0.000145, 'time': '8.07404613494873 Seconds', 'norm': 0.1435546875}\n", + "{'loss': '2.3470990657806396', 'num_iter': 15360, 'lr': 0.00015, 'time': '8.536539316177368 Seconds', 'norm': 0.154296875}\n", + "{'loss': '2.401120185852051', 'num_iter': 15872, 'lr': 0.000155, 'time': '7.948678731918335 Seconds', 'norm': 0.138671875}\n", + "{'loss': '2.3401360511779785', 'num_iter': 16384, 'lr': 0.00016, 'time': '8.829581260681152 Seconds', 'norm': 0.12451171875}\n", + "{'loss': '2.349322557449341', 'num_iter': 16896, 'lr': 0.000165, 'time': '8.222765922546387 Seconds', 'norm': 0.1435546875}\n", + "{'loss': '2.3633759021759033', 'num_iter': 17408, 'lr': 0.00017, 'time': '8.066200494766235 Seconds', 'norm': 0.138671875}\n", + "{'loss': '2.3978312015533447', 'num_iter': 17920, 'lr': 0.000175, 'time': '7.996991395950317 Seconds', 'norm': 0.1298828125}\n", + "{'loss': '2.347712993621826', 'num_iter': 18432, 'lr': 0.00017999999999999998, 'time': '8.294510841369629 Seconds', 'norm': 0.12451171875}\n", + "{'loss': '2.358757495880127', 'num_iter': 18944, 'lr': 0.000185, 'time': '8.351856231689453 Seconds', 'norm': 0.12109375}\n", + "{'loss': '2.366154909133911', 'num_iter': 19456, 'lr': 0.00019, 'time': '7.99259352684021 Seconds', 'norm': 0.130859375}\n", + "{'loss': '2.449955701828003', 'num_iter': 19968, 'lr': 0.00019500000000000002, 'time': '8.415942907333374 Seconds', 'norm': 0.125}\n", + "{'loss': '2.3736064434051514', 'num_iter': 20480, 'lr': 0.0002, 'time': '8.19329571723938 Seconds', 'norm': 0.12158203125}\n", + "{'loss': '2.3238604068756104', 'num_iter': 20992, 'lr': 0.000205, 'time': '8.75000524520874 Seconds', 'norm': 0.123046875}\n", + "{'loss': '2.380065441131592', 'num_iter': 21504, 'lr': 0.00021, 'time': '8.150588512420654 Seconds', 'norm': 0.1240234375}\n", + "{'loss': '2.3814148902893066', 'num_iter': 22016, 'lr': 0.000215, 'time': '8.162990093231201 Seconds', 'norm': 0.12890625}\n", + "{'loss': '2.3275983333587646', 'num_iter': 22528, 'lr': 0.00022, 'time': '8.741149663925171 Seconds', 'norm': 0.126953125}\n", + "{'loss': '2.397125482559204', 'num_iter': 23040, 'lr': 0.00022500000000000002, 'time': '8.343623399734497 Seconds', 'norm': 0.119140625}\n", + "{'loss': '2.338527202606201', 'num_iter': 23552, 'lr': 0.00023, 'time': '8.187989234924316 Seconds', 'norm': 0.1181640625}\n", + "{'loss': '2.30912446975708', 'num_iter': 24064, 'lr': 0.000235, 'time': '8.868049621582031 Seconds', 'norm': 0.1123046875}\n", + "{'loss': '2.339061975479126', 'num_iter': 24576, 'lr': 0.00024, 'time': '8.476134300231934 Seconds', 'norm': 0.11572265625}\n", + "{'loss': '2.443168878555298', 'num_iter': 25088, 'lr': 0.000245, 'time': '7.777587652206421 Seconds', 'norm': 0.1259765625}\n", + "{'loss': '2.3255555629730225', 'num_iter': 25600, 'lr': 0.00025, 'time': '8.258908748626709 Seconds', 'norm': 0.1181640625}\n", + "{'loss': '2.411543369293213', 'num_iter': 26112, 'lr': 0.000255, 'time': '8.900785684585571 Seconds', 'norm': 0.11474609375}\n", + "{'loss': '2.3854050636291504', 'num_iter': 26624, 'lr': 0.00026000000000000003, 'time': '8.534614562988281 Seconds', 'norm': 0.11865234375}\n", + "{'loss': '2.331355333328247', 'num_iter': 27136, 'lr': 0.00026500000000000004, 'time': '8.386263608932495 Seconds', 'norm': 0.12353515625}\n", + "{'loss': '2.3668413162231445', 'num_iter': 27648, 'lr': 0.00027, 'time': '8.525294303894043 Seconds', 'norm': 0.111328125}\n", + "{'loss': '2.4455020427703857', 'num_iter': 28160, 'lr': 0.000275, 'time': '7.845079183578491 Seconds', 'norm': 0.115234375}\n", + "{'loss': '2.4223060607910156', 'num_iter': 28672, 'lr': 0.00028000000000000003, 'time': '7.894446134567261 Seconds', 'norm': 0.126953125}\n", + "{'loss': '2.3918445110321045', 'num_iter': 29184, 'lr': 0.000285, 'time': '8.117273092269897 Seconds', 'norm': 0.1162109375}\n", + "{'loss': '2.3779983520507812', 'num_iter': 29696, 'lr': 0.00029, 'time': '8.186065196990967 Seconds', 'norm': 0.1162109375}\n", + "{'loss': '2.398768424987793', 'num_iter': 30208, 'lr': 0.000295, 'time': '7.911287307739258 Seconds', 'norm': 0.1201171875}\n", + "{'loss': '2.3646914958953857', 'num_iter': 30720, 'lr': 0.0003, 'time': '8.44401240348816 Seconds', 'norm': 0.11669921875}\n", + "{'loss': '2.342487335205078', 'num_iter': 31232, 'lr': 0.000305, 'time': '8.41776728630066 Seconds', 'norm': 0.12890625}\n", + "{'loss': '2.39149808883667', 'num_iter': 31744, 'lr': 0.00031, 'time': '8.103184223175049 Seconds', 'norm': 0.11865234375}\n", + "{'loss': '2.329709053039551', 'num_iter': 32256, 'lr': 0.000315, 'time': '8.342815160751343 Seconds', 'norm': 0.12451171875}\n", + "{'loss': '2.2598955631256104', 'num_iter': 32768, 'lr': 0.00032, 'time': '8.689735412597656 Seconds', 'norm': 0.11181640625}\n", + "{'loss': '2.354614019393921', 'num_iter': 33280, 'lr': 0.00032500000000000004, 'time': '13.412319898605347 Seconds', 'norm': 0.12890625}\n", + "{'loss': '2.3664662837982178', 'num_iter': 33792, 'lr': 0.00033, 'time': '11.629522800445557 Seconds', 'norm': 0.11328125}\n", + "{'loss': '2.390644073486328', 'num_iter': 34304, 'lr': 0.000335, 'time': '8.799498796463013 Seconds', 'norm': 0.1259765625}\n", + "{'loss': '2.380173444747925', 'num_iter': 34816, 'lr': 0.00034, 'time': '8.459708213806152 Seconds', 'norm': 0.1171875}\n", + "{'loss': '2.344499111175537', 'num_iter': 35328, 'lr': 0.000345, 'time': '8.396592617034912 Seconds', 'norm': 0.115234375}\n", + "{'loss': '2.3183677196502686', 'num_iter': 35840, 'lr': 0.00035, 'time': '8.558419466018677 Seconds', 'norm': 0.1181640625}\n", + "{'loss': '2.3765032291412354', 'num_iter': 36352, 'lr': 0.000355, 'time': '8.514953374862671 Seconds', 'norm': 0.12158203125}\n", + "{'loss': '2.403637170791626', 'num_iter': 36864, 'lr': 0.00035999999999999997, 'time': '8.656627178192139 Seconds', 'norm': 0.1181640625}\n", + "{'loss': '2.305107593536377', 'num_iter': 37376, 'lr': 0.000365, 'time': '8.978270530700684 Seconds', 'norm': 0.126953125}\n", + "{'loss': '2.3270010948181152', 'num_iter': 37888, 'lr': 0.00037, 'time': '8.611132383346558 Seconds', 'norm': 0.12255859375}\n", + "{'loss': '2.4043121337890625', 'num_iter': 38400, 'lr': 0.000375, 'time': '8.12365198135376 Seconds', 'norm': 0.11669921875}\n", + "{'loss': '2.3827450275421143', 'num_iter': 38912, 'lr': 0.00038, 'time': '8.3145272731781 Seconds', 'norm': 0.1181640625}\n", + "{'loss': '2.455231189727783', 'num_iter': 39424, 'lr': 0.00038500000000000003, 'time': '8.034704685211182 Seconds', 'norm': 0.130859375}\n", + "{'loss': '2.401585340499878', 'num_iter': 39936, 'lr': 0.00039000000000000005, 'time': '8.298332691192627 Seconds', 'norm': 0.1435546875}\n", + "{'loss': '2.282731056213379', 'num_iter': 40448, 'lr': 0.000395, 'time': '8.60951018333435 Seconds', 'norm': 0.12255859375}\n", + "{'loss': '2.353754997253418', 'num_iter': 40960, 'lr': 0.0004, 'time': '8.257336854934692 Seconds', 'norm': 0.13671875}\n", + "{'loss': '2.3511581420898438', 'num_iter': 41472, 'lr': 0.00040500000000000003, 'time': '8.564049482345581 Seconds', 'norm': 0.12109375}\n", + "{'loss': '2.41495418548584', 'num_iter': 41984, 'lr': 0.00041, 'time': '8.119805574417114 Seconds', 'norm': 0.142578125}\n", + "{'loss': '2.4552862644195557', 'num_iter': 42496, 'lr': 0.000415, 'time': '7.849672317504883 Seconds', 'norm': 0.1357421875}\n", + "{'loss': '2.35587477684021', 'num_iter': 43008, 'lr': 0.00042, 'time': '8.433326005935669 Seconds', 'norm': 0.125}\n", + "{'loss': '2.354590654373169', 'num_iter': 43520, 'lr': 0.000425, 'time': '8.200602531433105 Seconds', 'norm': 0.130859375}\n", + "{'loss': '2.2961995601654053', 'num_iter': 44032, 'lr': 0.00043, 'time': '9.298598527908325 Seconds', 'norm': 0.1416015625}\n", + "{'loss': '2.3308427333831787', 'num_iter': 44544, 'lr': 0.000435, 'time': '8.310254335403442 Seconds', 'norm': 0.1298828125}\n", + "{'loss': '2.417625904083252', 'num_iter': 45056, 'lr': 0.00044, 'time': '8.514857053756714 Seconds', 'norm': 0.1474609375}\n", + "{'loss': '2.386002540588379', 'num_iter': 45568, 'lr': 0.00044500000000000003, 'time': '8.684640645980835 Seconds', 'norm': 0.138671875}\n", + "{'loss': '2.357957363128662', 'num_iter': 46080, 'lr': 0.00045000000000000004, 'time': '8.349209785461426 Seconds', 'norm': 0.14453125}\n", + "{'loss': '2.3467557430267334', 'num_iter': 46592, 'lr': 0.000455, 'time': '8.250343322753906 Seconds', 'norm': 0.1279296875}\n", + "{'loss': '2.419018507003784', 'num_iter': 47104, 'lr': 0.00046, 'time': '7.75583815574646 Seconds', 'norm': 0.15625}\n", + "{'loss': '2.3793187141418457', 'num_iter': 47616, 'lr': 0.000465, 'time': '8.037800073623657 Seconds', 'norm': 0.1318359375}\n", + "{'loss': '2.355764150619507', 'num_iter': 48128, 'lr': 0.00047, 'time': '8.193324089050293 Seconds', 'norm': 0.1484375}\n", + "{'loss': '2.386695623397827', 'num_iter': 48640, 'lr': 0.000475, 'time': '8.205627918243408 Seconds', 'norm': 0.134765625}\n", + "{'loss': '2.3478472232818604', 'num_iter': 49152, 'lr': 0.00048, 'time': '8.32002305984497 Seconds', 'norm': 0.140625}\n", + "{'loss': '2.406860589981079', 'num_iter': 49664, 'lr': 0.00048499999999999997, 'time': '7.975932598114014 Seconds', 'norm': 0.1494140625}\n", + "{'loss': '2.3558359146118164', 'num_iter': 50176, 'lr': 0.00049, 'time': '8.38038969039917 Seconds', 'norm': 0.1376953125}\n", + "{'loss': '2.3090360164642334', 'num_iter': 50688, 'lr': 0.000495, 'time': '8.853670597076416 Seconds', 'norm': 0.1513671875}\n", + "{'loss': '2.355339527130127', 'num_iter': 51200, 'lr': 0.0005, 'time': '8.346829175949097 Seconds', 'norm': 0.1474609375}\n", + "{'loss': '2.356957197189331', 'num_iter': 51712, 'lr': 0.000505, 'time': '7.844237327575684 Seconds', 'norm': 0.150390625}\n", + "{'loss': '2.4549560546875', 'num_iter': 52224, 'lr': 0.00051, 'time': '8.24063515663147 Seconds', 'norm': 0.154296875}\n", + "{'loss': '2.3761038780212402', 'num_iter': 52736, 'lr': 0.000515, 'time': '8.501140356063843 Seconds', 'norm': 0.171875}\n", + "{'loss': '2.342712163925171', 'num_iter': 53248, 'lr': 0.0005200000000000001, 'time': '8.617476463317871 Seconds', 'norm': 0.1298828125}\n", + "{'loss': '2.3526930809020996', 'num_iter': 53760, 'lr': 0.0005250000000000001, 'time': '8.536534070968628 Seconds', 'norm': 0.1533203125}\n", + "{'loss': '2.416839838027954', 'num_iter': 54272, 'lr': 0.0005300000000000001, 'time': '8.195648193359375 Seconds', 'norm': 0.140625}\n", + "{'loss': '2.3539321422576904', 'num_iter': 54784, 'lr': 0.000535, 'time': '8.520131826400757 Seconds', 'norm': 0.1552734375}\n", + "{'loss': '2.3183279037475586', 'num_iter': 55296, 'lr': 0.00054, 'time': '8.82524561882019 Seconds', 'norm': 0.1611328125}\n", + "{'loss': '2.322287082672119', 'num_iter': 55808, 'lr': 0.000545, 'time': '9.179141998291016 Seconds', 'norm': 0.1630859375}\n", + "{'loss': '2.35573673248291', 'num_iter': 56320, 'lr': 0.00055, 'time': '8.099822282791138 Seconds', 'norm': 0.14453125}\n", + "{'loss': '2.3199551105499268', 'num_iter': 56832, 'lr': 0.000555, 'time': '8.51465392112732 Seconds', 'norm': 0.1650390625}\n", + "{'loss': '2.3757176399230957', 'num_iter': 57344, 'lr': 0.0005600000000000001, 'time': '8.225489377975464 Seconds', 'norm': 0.13671875}\n", + "{'loss': '2.389883279800415', 'num_iter': 57856, 'lr': 0.000565, 'time': '8.298118829727173 Seconds', 'norm': 0.146484375}\n", + "{'loss': '2.4201955795288086', 'num_iter': 58368, 'lr': 0.00057, 'time': '8.417352437973022 Seconds', 'norm': 0.1494140625}\n", + "{'loss': '2.3758251667022705', 'num_iter': 58880, 'lr': 0.000575, 'time': '8.668560981750488 Seconds', 'norm': 0.1484375}\n", + "{'loss': '2.3664801120758057', 'num_iter': 59392, 'lr': 0.00058, 'time': '8.312018156051636 Seconds', 'norm': 0.1416015625}\n", + "{'loss': '2.445268392562866', 'num_iter': 59904, 'lr': 0.000585, 'time': '7.928400993347168 Seconds', 'norm': 0.17578125}\n", + "{'loss': '2.4117512702941895', 'num_iter': 60416, 'lr': 0.00059, 'time': '8.192112684249878 Seconds', 'norm': 0.15234375}\n", + "{'loss': '2.3946962356567383', 'num_iter': 60928, 'lr': 0.0005949999999999999, 'time': '8.116294384002686 Seconds', 'norm': 0.15625}\n", + "{'loss': '2.414203405380249', 'num_iter': 61440, 'lr': 0.0006, 'time': '8.06369686126709 Seconds', 'norm': 0.1630859375}\n", + "{'loss': '2.26425838470459', 'num_iter': 61952, 'lr': 0.000605, 'time': '8.723063468933105 Seconds', 'norm': 0.171875}\n", + "{'loss': '2.2576560974121094', 'num_iter': 62464, 'lr': 0.00061, 'time': '8.730216979980469 Seconds', 'norm': 0.150390625}\n", + "{'loss': '2.406400680541992', 'num_iter': 62976, 'lr': 0.000615, 'time': '8.161418437957764 Seconds', 'norm': 0.1884765625}\n", + "{'loss': '2.350969076156616', 'num_iter': 63488, 'lr': 0.00062, 'time': '8.821264028549194 Seconds', 'norm': 0.150390625}\n", + "{'loss': '2.3164260387420654', 'num_iter': 64000, 'lr': 0.000625, 'time': '8.436145305633545 Seconds', 'norm': 0.17578125}\n", + "{'loss': '2.3948323726654053', 'num_iter': 64512, 'lr': 0.00063, 'time': '8.28288459777832 Seconds', 'norm': 0.1845703125}\n", + "{'loss': '2.3741116523742676', 'num_iter': 65024, 'lr': 0.000635, 'time': '8.511958599090576 Seconds', 'norm': 0.1611328125}\n", + "{'loss': '2.283215045928955', 'num_iter': 65536, 'lr': 0.00064, 'time': '8.543206930160522 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.367689371109009', 'num_iter': 66048, 'lr': 0.0006450000000000001, 'time': '13.543059349060059 Seconds', 'norm': 0.1650390625}\n", + "{'loss': '2.338901996612549', 'num_iter': 66560, 'lr': 0.0006500000000000001, 'time': '9.350599765777588 Seconds', 'norm': 0.1884765625}\n", + "{'loss': '2.412482500076294', 'num_iter': 67072, 'lr': 0.0006550000000000001, 'time': '9.60750937461853 Seconds', 'norm': 0.1728515625}\n", + "{'loss': '2.3718104362487793', 'num_iter': 67584, 'lr': 0.00066, 'time': '8.743152856826782 Seconds', 'norm': 0.1591796875}\n", + "{'loss': '2.3709359169006348', 'num_iter': 68096, 'lr': 0.000665, 'time': '8.349513530731201 Seconds', 'norm': 0.16015625}\n", + "{'loss': '2.3730757236480713', 'num_iter': 68608, 'lr': 0.00067, 'time': '8.790771484375 Seconds', 'norm': 0.1640625}\n", + "{'loss': '2.3677053451538086', 'num_iter': 69120, 'lr': 0.000675, 'time': '8.449764728546143 Seconds', 'norm': 0.1767578125}\n", + "{'loss': '2.3063783645629883', 'num_iter': 69632, 'lr': 0.00068, 'time': '8.788265466690063 Seconds', 'norm': 0.15625}\n", + "{'loss': '2.377204418182373', 'num_iter': 70144, 'lr': 0.0006850000000000001, 'time': '8.464972257614136 Seconds', 'norm': 0.1796875}\n", + "{'loss': '2.349269151687622', 'num_iter': 70656, 'lr': 0.00069, 'time': '8.85305380821228 Seconds', 'norm': 0.1572265625}\n", + "{'loss': '2.3833532333374023', 'num_iter': 71168, 'lr': 0.000695, 'time': '8.16352915763855 Seconds', 'norm': 0.2314453125}\n", + "{'loss': '2.3449063301086426', 'num_iter': 71680, 'lr': 0.0007, 'time': '8.155242919921875 Seconds', 'norm': 0.162109375}\n", + "{'loss': '2.3762624263763428', 'num_iter': 72192, 'lr': 0.000705, 'time': '7.953240633010864 Seconds', 'norm': 0.1943359375}\n", + "{'loss': '2.3810596466064453', 'num_iter': 72704, 'lr': 0.00071, 'time': '8.464589595794678 Seconds', 'norm': 0.15625}\n", + "{'loss': '2.3786823749542236', 'num_iter': 73216, 'lr': 0.000715, 'time': '8.562390327453613 Seconds', 'norm': 0.185546875}\n", + "{'loss': '2.3624539375305176', 'num_iter': 73728, 'lr': 0.0007199999999999999, 'time': '8.15932321548462 Seconds', 'norm': 0.1787109375}\n", + "{'loss': '2.3338747024536133', 'num_iter': 74240, 'lr': 0.000725, 'time': '8.17479920387268 Seconds', 'norm': 0.2041015625}\n", + "{'loss': '2.3889710903167725', 'num_iter': 74752, 'lr': 0.00073, 'time': '8.568461179733276 Seconds', 'norm': 0.1982421875}\n", + "{'loss': '2.4609615802764893', 'num_iter': 75264, 'lr': 0.000735, 'time': '7.998297214508057 Seconds', 'norm': 0.1728515625}\n", + "{'loss': '2.3587160110473633', 'num_iter': 75776, 'lr': 0.00074, 'time': '8.368019104003906 Seconds', 'norm': 0.1982421875}\n", + "{'loss': '2.4108667373657227', 'num_iter': 76288, 'lr': 0.000745, 'time': '8.769832134246826 Seconds', 'norm': 0.1962890625}\n", + "{'loss': '2.421757698059082', 'num_iter': 76800, 'lr': 0.00075, 'time': '7.9297332763671875 Seconds', 'norm': 0.1787109375}\n", + "{'loss': '2.376873731613159', 'num_iter': 77312, 'lr': 0.000755, 'time': '8.204802513122559 Seconds', 'norm': 0.255859375}\n", + "{'loss': '2.3318161964416504', 'num_iter': 77824, 'lr': 0.00076, 'time': '8.489221096038818 Seconds', 'norm': 0.224609375}\n", + "{'loss': '2.3669848442077637', 'num_iter': 78336, 'lr': 0.0007650000000000001, 'time': '10.954768419265747 Seconds', 'norm': 0.2060546875}\n", + "{'loss': '2.349841356277466', 'num_iter': 78848, 'lr': 0.0007700000000000001, 'time': '8.276170492172241 Seconds', 'norm': 0.185546875}\n", + "{'loss': '2.4127888679504395', 'num_iter': 79360, 'lr': 0.0007750000000000001, 'time': '8.089359998703003 Seconds', 'norm': 0.201171875}\n", + "{'loss': '2.3530919551849365', 'num_iter': 79872, 'lr': 0.0007800000000000001, 'time': '8.18125319480896 Seconds', 'norm': 0.2021484375}\n", + "{'loss': '2.392930030822754', 'num_iter': 80384, 'lr': 0.000785, 'time': '8.027729272842407 Seconds', 'norm': 0.17578125}\n", + "{'loss': '2.32802414894104', 'num_iter': 80896, 'lr': 0.00079, 'time': '8.3948974609375 Seconds', 'norm': 0.1787109375}\n", + "{'loss': '2.3464791774749756', 'num_iter': 81408, 'lr': 0.000795, 'time': '8.842905521392822 Seconds', 'norm': 0.1943359375}\n", + "{'loss': '2.4307701587677', 'num_iter': 81920, 'lr': 0.0008, 'time': '8.085870504379272 Seconds', 'norm': 0.2021484375}\n", + "{'loss': '2.3958780765533447', 'num_iter': 82432, 'lr': 0.000805, 'time': '8.237189531326294 Seconds', 'norm': 0.1953125}\n", + "{'loss': '2.401933431625366', 'num_iter': 82944, 'lr': 0.0008100000000000001, 'time': '7.952329158782959 Seconds', 'norm': 0.18359375}\n", + "{'loss': '2.4208853244781494', 'num_iter': 83456, 'lr': 0.000815, 'time': '8.02254343032837 Seconds', 'norm': 0.1865234375}\n", + "{'loss': '2.328188896179199', 'num_iter': 83968, 'lr': 0.00082, 'time': '8.682481527328491 Seconds', 'norm': 0.212890625}\n", + "{'loss': '2.3519139289855957', 'num_iter': 84480, 'lr': 0.000825, 'time': '8.804353713989258 Seconds', 'norm': 0.2333984375}\n", + "{'loss': '2.3379065990448', 'num_iter': 84992, 'lr': 0.00083, 'time': '8.357772588729858 Seconds', 'norm': 0.15625}\n", + "{'loss': '2.38081955909729', 'num_iter': 85504, 'lr': 0.000835, 'time': '8.997489929199219 Seconds', 'norm': 0.185546875}\n", + "{'loss': '2.38285756111145', 'num_iter': 86016, 'lr': 0.00084, 'time': '8.883950233459473 Seconds', 'norm': 0.201171875}\n", + "{'loss': '2.3775012493133545', 'num_iter': 86528, 'lr': 0.0008449999999999999, 'time': '8.475273370742798 Seconds', 'norm': 0.19140625}\n", + "{'loss': '2.3875529766082764', 'num_iter': 87040, 'lr': 0.00085, 'time': '8.420514345169067 Seconds', 'norm': 0.197265625}\n", + "{'loss': '2.3699862957000732', 'num_iter': 87552, 'lr': 0.000855, 'time': '8.80019211769104 Seconds', 'norm': 0.212890625}\n", + "{'loss': '2.379849433898926', 'num_iter': 88064, 'lr': 0.00086, 'time': '8.469244241714478 Seconds', 'norm': 0.23046875}\n", + "{'loss': '2.368663787841797', 'num_iter': 88576, 'lr': 0.000865, 'time': '8.865850687026978 Seconds', 'norm': 0.173828125}\n", + "{'loss': '2.324857234954834', 'num_iter': 89088, 'lr': 0.00087, 'time': '8.564958095550537 Seconds', 'norm': 0.177734375}\n", + "{'loss': '2.402688503265381', 'num_iter': 89600, 'lr': 0.000875, 'time': '8.380212545394897 Seconds', 'norm': 0.2080078125}\n", + "{'loss': '2.4041521549224854', 'num_iter': 90112, 'lr': 0.00088, 'time': '8.555177927017212 Seconds', 'norm': 0.1865234375}\n", + "{'loss': '2.399150848388672', 'num_iter': 90624, 'lr': 0.000885, 'time': '8.415316343307495 Seconds', 'norm': 0.17578125}\n", + "{'loss': '2.3180184364318848', 'num_iter': 91136, 'lr': 0.0008900000000000001, 'time': '8.698083877563477 Seconds', 'norm': 0.2041015625}\n", + "{'loss': '2.324018716812134', 'num_iter': 91648, 'lr': 0.0008950000000000001, 'time': '8.570551872253418 Seconds', 'norm': 0.1923828125}\n", + "{'loss': '2.3361947536468506', 'num_iter': 92160, 'lr': 0.0009000000000000001, 'time': '8.687859296798706 Seconds', 'norm': 0.1953125}\n", + "{'loss': '2.3590660095214844', 'num_iter': 92672, 'lr': 0.0009050000000000001, 'time': '8.587313652038574 Seconds', 'norm': 0.24609375}\n", + "{'loss': '2.4278576374053955', 'num_iter': 93184, 'lr': 0.00091, 'time': '8.3777494430542 Seconds', 'norm': 0.2158203125}\n", + "{'loss': '2.410914421081543', 'num_iter': 93696, 'lr': 0.000915, 'time': '8.557882308959961 Seconds', 'norm': 0.1689453125}\n", + "{'loss': '2.352102279663086', 'num_iter': 94208, 'lr': 0.00092, 'time': '8.737375974655151 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.437932252883911', 'num_iter': 94720, 'lr': 0.000925, 'time': '8.788272857666016 Seconds', 'norm': 0.1953125}\n", + "{'loss': '2.363410472869873', 'num_iter': 95232, 'lr': 0.00093, 'time': '8.723515510559082 Seconds', 'norm': 0.212890625}\n", + "{'loss': '2.354689359664917', 'num_iter': 95744, 'lr': 0.0009350000000000001, 'time': '8.843135595321655 Seconds', 'norm': 0.2080078125}\n", + "{'loss': '2.3388495445251465', 'num_iter': 96256, 'lr': 0.00094, 'time': '8.968782186508179 Seconds', 'norm': 0.2138671875}\n", + "{'loss': '2.373152494430542', 'num_iter': 96768, 'lr': 0.000945, 'time': '8.660739183425903 Seconds', 'norm': 0.2265625}\n", + "{'loss': '2.4144287109375', 'num_iter': 97280, 'lr': 0.00095, 'time': '8.257589340209961 Seconds', 'norm': 0.296875}\n", + "{'loss': '2.394932746887207', 'num_iter': 97792, 'lr': 0.000955, 'time': '8.411039352416992 Seconds', 'norm': 0.29296875}\n", + "{'loss': '2.364985227584839', 'num_iter': 98304, 'lr': 0.00096, 'time': '8.48658537864685 Seconds', 'norm': 0.310546875}\n", + "{'loss': '2.3635356426239014', 'num_iter': 98816, 'lr': 0.000965, 'time': '13.614242315292358 Seconds', 'norm': 0.3046875}\n", + "{'loss': '2.4077939987182617', 'num_iter': 99328, 'lr': 0.0009699999999999999, 'time': '8.797057628631592 Seconds', 'norm': 0.26171875}\n", + "{'loss': '2.451826810836792', 'num_iter': 99840, 'lr': 0.000975, 'time': '8.936829328536987 Seconds', 'norm': 0.26171875}\n", + "{'loss': '2.3354451656341553', 'num_iter': 100352, 'lr': 0.00098, 'time': '8.79259181022644 Seconds', 'norm': 0.2265625}\n", + "{'loss': '2.384429931640625', 'num_iter': 100864, 'lr': 0.000985, 'time': '8.493739128112793 Seconds', 'norm': 0.259765625}\n", + "{'loss': '2.4137513637542725', 'num_iter': 101376, 'lr': 0.00099, 'time': '8.348110914230347 Seconds', 'norm': 0.2578125}\n", + "{'loss': '2.4221231937408447', 'num_iter': 101888, 'lr': 0.000995, 'time': '7.8091583251953125 Seconds', 'norm': 0.30859375}\n", + "{'loss': '2.283956289291382', 'num_iter': 102400, 'lr': 0.001, 'time': '8.540755033493042 Seconds', 'norm': 0.26171875}\n", + "{'loss': '2.369210958480835', 'num_iter': 102912, 'lr': 0.001, 'time': '8.566282510757446 Seconds', 'norm': 0.236328125}\n", + "{'loss': '2.3665859699249268', 'num_iter': 103424, 'lr': 0.001, 'time': '8.518738746643066 Seconds', 'norm': 0.2431640625}\n", + "{'loss': '2.358678102493286', 'num_iter': 103936, 'lr': 0.001, 'time': '8.357083559036255 Seconds', 'norm': 0.26171875}\n", + "{'loss': '2.398306131362915', 'num_iter': 104448, 'lr': 0.001, 'time': '8.289208173751831 Seconds', 'norm': 0.2294921875}\n", + "{'loss': '2.412965774536133', 'num_iter': 104960, 'lr': 0.001, 'time': '8.162434577941895 Seconds', 'norm': 0.21875}\n", + "{'loss': '2.374194622039795', 'num_iter': 105472, 'lr': 0.001, 'time': '9.230849504470825 Seconds', 'norm': 0.2138671875}\n", + "{'loss': '2.4285941123962402', 'num_iter': 105984, 'lr': 0.001, 'time': '8.201667547225952 Seconds', 'norm': 0.2470703125}\n", + "{'loss': '2.3540616035461426', 'num_iter': 106496, 'lr': 0.001, 'time': '8.346999645233154 Seconds', 'norm': 0.2275390625}\n", + "{'loss': '2.410151481628418', 'num_iter': 107008, 'lr': 0.001, 'time': '7.925924062728882 Seconds', 'norm': 0.205078125}\n", + "{'loss': '2.350382089614868', 'num_iter': 107520, 'lr': 0.001, 'time': '8.304706335067749 Seconds', 'norm': 0.216796875}\n", + "{'loss': '2.348229169845581', 'num_iter': 108032, 'lr': 0.001, 'time': '8.805323600769043 Seconds', 'norm': 0.2158203125}\n", + "{'loss': '2.388207197189331', 'num_iter': 108544, 'lr': 0.001, 'time': '8.578596830368042 Seconds', 'norm': 0.259765625}\n", + "{'loss': '2.381629705429077', 'num_iter': 109056, 'lr': 0.001, 'time': '8.356505870819092 Seconds', 'norm': 0.2177734375}\n", + "{'loss': '2.3828892707824707', 'num_iter': 109568, 'lr': 0.001, 'time': '8.564954996109009 Seconds', 'norm': 0.203125}\n", + "{'loss': '2.3729209899902344', 'num_iter': 110080, 'lr': 0.001, 'time': '8.12981128692627 Seconds', 'norm': 0.2099609375}\n", + "{'loss': '2.367323875427246', 'num_iter': 110592, 'lr': 0.001, 'time': '8.300622701644897 Seconds', 'norm': 0.2294921875}\n", + "{'loss': '2.4512875080108643', 'num_iter': 111104, 'lr': 0.001, 'time': '8.38793659210205 Seconds', 'norm': 0.205078125}\n", + "{'loss': '2.423461675643921', 'num_iter': 111616, 'lr': 0.001, 'time': '8.16628909111023 Seconds', 'norm': 0.193359375}\n", + "{'loss': '2.365154266357422', 'num_iter': 112128, 'lr': 0.001, 'time': '8.354376792907715 Seconds', 'norm': 0.2060546875}\n", + "{'loss': '2.348111391067505', 'num_iter': 112640, 'lr': 0.001, 'time': '8.686992645263672 Seconds', 'norm': 0.1953125}\n", + "{'loss': '2.3756349086761475', 'num_iter': 113152, 'lr': 0.001, 'time': '8.304920196533203 Seconds', 'norm': 0.1865234375}\n", + "{'loss': '2.3925533294677734', 'num_iter': 113664, 'lr': 0.001, 'time': '8.424681425094604 Seconds', 'norm': 0.1875}\n", + "{'loss': '2.3529117107391357', 'num_iter': 114176, 'lr': 0.001, 'time': '8.123831033706665 Seconds', 'norm': 0.1728515625}\n", + "{'loss': '2.3032734394073486', 'num_iter': 114688, 'lr': 0.001, 'time': '8.51890754699707 Seconds', 'norm': 0.19140625}\n", + "{'loss': '2.4193785190582275', 'num_iter': 115200, 'lr': 0.001, 'time': '7.907999515533447 Seconds', 'norm': 0.1865234375}\n", + "{'loss': '2.344939947128296', 'num_iter': 115712, 'lr': 0.001, 'time': '8.558756589889526 Seconds', 'norm': 0.1962890625}\n", + "{'loss': '2.370755672454834', 'num_iter': 116224, 'lr': 0.001, 'time': '8.544370651245117 Seconds', 'norm': 0.201171875}\n", + "{'loss': '2.377723217010498', 'num_iter': 116736, 'lr': 0.001, 'time': '8.407794713973999 Seconds', 'norm': 0.18359375}\n", + "{'loss': '2.316657781600952', 'num_iter': 117248, 'lr': 0.001, 'time': '8.972618818283081 Seconds', 'norm': 0.2265625}\n", + "{'loss': '2.4240846633911133', 'num_iter': 117760, 'lr': 0.001, 'time': '8.14678406715393 Seconds', 'norm': 0.2001953125}\n", + "{'loss': '2.4044954776763916', 'num_iter': 118272, 'lr': 0.001, 'time': '8.106743335723877 Seconds', 'norm': 0.1591796875}\n", + "{'loss': '2.344862222671509', 'num_iter': 118784, 'lr': 0.001, 'time': '8.248379468917847 Seconds', 'norm': 0.1943359375}\n", + "{'loss': '2.4241511821746826', 'num_iter': 119296, 'lr': 0.001, 'time': '8.432803630828857 Seconds', 'norm': 0.181640625}\n", + "{'loss': '2.3621268272399902', 'num_iter': 119808, 'lr': 0.001, 'time': '8.17098069190979 Seconds', 'norm': 0.19140625}\n", + "{'loss': '2.3872365951538086', 'num_iter': 120320, 'lr': 0.001, 'time': '8.527016639709473 Seconds', 'norm': 0.18359375}\n", + "{'loss': '2.3484396934509277', 'num_iter': 120832, 'lr': 0.001, 'time': '8.368358612060547 Seconds', 'norm': 0.2001953125}\n", + "{'loss': '2.35953950881958', 'num_iter': 121344, 'lr': 0.001, 'time': '8.75240683555603 Seconds', 'norm': 0.193359375}\n", + "{'loss': '2.3780829906463623', 'num_iter': 121856, 'lr': 0.001, 'time': '7.965952396392822 Seconds', 'norm': 0.21484375}\n", + "{'loss': '2.4046552181243896', 'num_iter': 122368, 'lr': 0.001, 'time': '7.968393087387085 Seconds', 'norm': 0.25}\n", + "{'loss': '2.4121322631835938', 'num_iter': 122880, 'lr': 0.001, 'time': '10.597578287124634 Seconds', 'norm': 0.26171875}\n", + "{'loss': '2.3784353733062744', 'num_iter': 123392, 'lr': 0.001, 'time': '8.244983196258545 Seconds', 'norm': 0.283203125}\n", + "{'loss': '2.323871374130249', 'num_iter': 123904, 'lr': 0.001, 'time': '8.678279161453247 Seconds', 'norm': 0.2255859375}\n", + "{'loss': '2.4406731128692627', 'num_iter': 124416, 'lr': 0.001, 'time': '8.40961766242981 Seconds', 'norm': 0.2041015625}\n", + "{'loss': '2.324458122253418', 'num_iter': 124928, 'lr': 0.001, 'time': '8.549541234970093 Seconds', 'norm': 0.21875}\n", + "{'loss': '2.3017566204071045', 'num_iter': 125440, 'lr': 0.001, 'time': '8.60848355293274 Seconds', 'norm': 0.20703125}\n", + "{'loss': '2.3696084022521973', 'num_iter': 125952, 'lr': 0.001, 'time': '8.499809741973877 Seconds', 'norm': 0.2138671875}\n", + "{'loss': '2.3843212127685547', 'num_iter': 126464, 'lr': 0.001, 'time': '8.224026203155518 Seconds', 'norm': 0.2353515625}\n", + "{'loss': '2.387418031692505', 'num_iter': 126976, 'lr': 0.001, 'time': '8.440656423568726 Seconds', 'norm': 0.19921875}\n", + "{'loss': '2.398210048675537', 'num_iter': 127488, 'lr': 0.001, 'time': '8.452331781387329 Seconds', 'norm': 0.19921875}\n", + "{'loss': '2.362698554992676', 'num_iter': 128000, 'lr': 0.001, 'time': '8.275820016860962 Seconds', 'norm': 0.234375}\n", + "{'loss': '2.375330924987793', 'num_iter': 128512, 'lr': 0.001, 'time': '9.05879259109497 Seconds', 'norm': 0.21484375}\n", + "{'loss': '2.382153272628784', 'num_iter': 129024, 'lr': 0.001, 'time': '8.158925533294678 Seconds', 'norm': 0.22265625}\n", + "{'loss': '2.329805612564087', 'num_iter': 129536, 'lr': 0.001, 'time': '8.21394944190979 Seconds', 'norm': 0.1923828125}\n", + "{'loss': '2.4287467002868652', 'num_iter': 130048, 'lr': 0.001, 'time': '7.8217291831970215 Seconds', 'norm': 0.19140625}\n", + "{'loss': '2.3117587566375732', 'num_iter': 130560, 'lr': 0.001, 'time': '8.823063373565674 Seconds', 'norm': 0.23046875}\n", + "{'loss': '2.4591474533081055', 'num_iter': 131072, 'lr': 0.001, 'time': '8.120489835739136 Seconds', 'norm': 0.1806640625}\n", + "{'loss': '2.382108688354492', 'num_iter': 131584, 'lr': 0.001, 'time': '13.287187814712524 Seconds', 'norm': 0.1982421875}\n", + "{'loss': '2.382751941680908', 'num_iter': 132096, 'lr': 0.001, 'time': '8.829148292541504 Seconds', 'norm': 0.263671875}\n", + "{'loss': '2.4449405670166016', 'num_iter': 132608, 'lr': 0.001, 'time': '8.512131452560425 Seconds', 'norm': 0.216796875}\n", + "{'loss': '2.3751182556152344', 'num_iter': 133120, 'lr': 0.001, 'time': '8.991081714630127 Seconds', 'norm': 0.291015625}\n", + "{'loss': '2.4070489406585693', 'num_iter': 133632, 'lr': 0.001, 'time': '7.952652454376221 Seconds', 'norm': 0.2412109375}\n", + "{'loss': '2.391798734664917', 'num_iter': 134144, 'lr': 0.001, 'time': '8.443435430526733 Seconds', 'norm': 0.185546875}\n", + "{'loss': '2.4219603538513184', 'num_iter': 134656, 'lr': 0.001, 'time': '8.178690433502197 Seconds', 'norm': 0.2294921875}\n", + "{'loss': '2.346550941467285', 'num_iter': 135168, 'lr': 0.001, 'time': '8.336941957473755 Seconds', 'norm': 0.2421875}\n", + "{'loss': '2.4325077533721924', 'num_iter': 135680, 'lr': 0.001, 'time': '8.335406064987183 Seconds', 'norm': 0.21875}\n", + "{'loss': '2.3842790126800537', 'num_iter': 136192, 'lr': 0.001, 'time': '8.842617988586426 Seconds', 'norm': 0.2109375}\n", + "{'loss': '2.3502941131591797', 'num_iter': 136704, 'lr': 0.001, 'time': '8.26529049873352 Seconds', 'norm': 0.1962890625}\n", + "{'loss': '2.364230155944824', 'num_iter': 137216, 'lr': 0.001, 'time': '8.576199293136597 Seconds', 'norm': 0.232421875}\n", + "{'loss': '2.3840322494506836', 'num_iter': 137728, 'lr': 0.001, 'time': '8.738868474960327 Seconds', 'norm': 0.2138671875}\n", + "{'loss': '2.366302013397217', 'num_iter': 138240, 'lr': 0.001, 'time': '8.898669004440308 Seconds', 'norm': 0.216796875}\n", + "{'loss': '2.316016435623169', 'num_iter': 138752, 'lr': 0.001, 'time': '8.982850790023804 Seconds', 'norm': 0.2255859375}\n", + "{'loss': '2.328399896621704', 'num_iter': 139264, 'lr': 0.001, 'time': '8.698952913284302 Seconds', 'norm': 0.26953125}\n", + "{'loss': '2.3097548484802246', 'num_iter': 139776, 'lr': 0.001, 'time': '9.314266920089722 Seconds', 'norm': 0.171875}\n", + "{'loss': '2.4194869995117188', 'num_iter': 140288, 'lr': 0.001, 'time': '8.258531093597412 Seconds', 'norm': 0.2197265625}\n", + "{'loss': '2.2808616161346436', 'num_iter': 140800, 'lr': 0.001, 'time': '9.373393774032593 Seconds', 'norm': 0.1650390625}\n", + "{'loss': '2.441530227661133', 'num_iter': 141312, 'lr': 0.001, 'time': '8.108644247055054 Seconds', 'norm': 0.1806640625}\n", + "{'loss': '2.4203357696533203', 'num_iter': 141824, 'lr': 0.001, 'time': '8.070127964019775 Seconds', 'norm': 0.2080078125}\n", + "{'loss': '2.366215467453003', 'num_iter': 142336, 'lr': 0.001, 'time': '8.438155889511108 Seconds', 'norm': 0.203125}\n", + "{'loss': '2.409937858581543', 'num_iter': 142848, 'lr': 0.001, 'time': '8.398412227630615 Seconds', 'norm': 0.224609375}\n", + "{'loss': '2.389439344406128', 'num_iter': 143360, 'lr': 0.001, 'time': '8.435163497924805 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.370175838470459', 'num_iter': 143872, 'lr': 0.001, 'time': '8.442939043045044 Seconds', 'norm': 0.2421875}\n", + "{'loss': '2.383334159851074', 'num_iter': 144384, 'lr': 0.001, 'time': '8.447250366210938 Seconds', 'norm': 0.2265625}\n", + "{'loss': '2.3789870738983154', 'num_iter': 144896, 'lr': 0.001, 'time': '8.169019222259521 Seconds', 'norm': 0.2578125}\n", + "{'loss': '2.378997564315796', 'num_iter': 145408, 'lr': 0.001, 'time': '8.60593843460083 Seconds', 'norm': 0.3046875}\n", + "{'loss': '2.39009428024292', 'num_iter': 145920, 'lr': 0.001, 'time': '8.239660024642944 Seconds', 'norm': 0.267578125}\n", + "{'loss': '2.4134621620178223', 'num_iter': 146432, 'lr': 0.001, 'time': '8.078928470611572 Seconds', 'norm': 0.26171875}\n", + "{'loss': '2.400651454925537', 'num_iter': 146944, 'lr': 0.001, 'time': '8.288937091827393 Seconds', 'norm': 0.234375}\n", + "{'loss': '2.436373472213745', 'num_iter': 147456, 'lr': 0.001, 'time': '8.20724606513977 Seconds', 'norm': 0.25}\n", + "{'loss': '2.374776840209961', 'num_iter': 147968, 'lr': 0.001, 'time': '8.152068614959717 Seconds', 'norm': 0.2734375}\n", + "{'loss': '2.3640079498291016', 'num_iter': 148480, 'lr': 0.001, 'time': '8.602315902709961 Seconds', 'norm': 0.2119140625}\n", + "{'loss': '2.280787229537964', 'num_iter': 148992, 'lr': 0.001, 'time': '8.974380016326904 Seconds', 'norm': 0.2021484375}\n", + "{'loss': '2.3305742740631104', 'num_iter': 149504, 'lr': 0.001, 'time': '8.470110177993774 Seconds', 'norm': 0.2158203125}\n", + "{'loss': '2.4098970890045166', 'num_iter': 150016, 'lr': 0.001, 'time': '8.090431213378906 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.428356885910034', 'num_iter': 150528, 'lr': 0.001, 'time': '8.086579322814941 Seconds', 'norm': 0.2197265625}\n", + "{'loss': '2.4177095890045166', 'num_iter': 151040, 'lr': 0.001, 'time': '8.110919713973999 Seconds', 'norm': 0.1982421875}\n", + "{'loss': '2.3164119720458984', 'num_iter': 151552, 'lr': 0.001, 'time': '8.77794098854065 Seconds', 'norm': 0.2265625}\n", + "{'loss': '2.4001293182373047', 'num_iter': 152064, 'lr': 0.001, 'time': '7.848429918289185 Seconds', 'norm': 0.244140625}\n", + "{'loss': '2.3741555213928223', 'num_iter': 152576, 'lr': 0.001, 'time': '8.267451763153076 Seconds', 'norm': 0.2001953125}\n", + "{'loss': '2.3437516689300537', 'num_iter': 153088, 'lr': 0.001, 'time': '8.299741268157959 Seconds', 'norm': 0.22265625}\n", + "{'loss': '2.353262424468994', 'num_iter': 153600, 'lr': 0.001, 'time': '8.548104524612427 Seconds', 'norm': 0.2236328125}\n", + "{'loss': '2.4060401916503906', 'num_iter': 154112, 'lr': 0.001, 'time': '8.478615283966064 Seconds', 'norm': 0.1904296875}\n", + "{'loss': '2.3300604820251465', 'num_iter': 154624, 'lr': 0.001, 'time': '8.470060110092163 Seconds', 'norm': 0.205078125}\n", + "{'loss': '2.3785202503204346', 'num_iter': 155136, 'lr': 0.001, 'time': '8.421005487442017 Seconds', 'norm': 0.173828125}\n", + "{'loss': '2.380131483078003', 'num_iter': 155648, 'lr': 0.001, 'time': '8.6744863986969 Seconds', 'norm': 0.2216796875}\n", + "{'loss': '2.4541163444519043', 'num_iter': 156160, 'lr': 0.001, 'time': '8.00025486946106 Seconds', 'norm': 0.234375}\n", + "{'loss': '2.4264039993286133', 'num_iter': 156672, 'lr': 0.001, 'time': '7.949988603591919 Seconds', 'norm': 0.2412109375}\n", + "{'loss': '2.3760228157043457', 'num_iter': 157184, 'lr': 0.001, 'time': '8.16675591468811 Seconds', 'norm': 0.205078125}\n", + "{'loss': '2.419093132019043', 'num_iter': 157696, 'lr': 0.001, 'time': '8.110333919525146 Seconds', 'norm': 0.251953125}\n", + "{'loss': '2.3473074436187744', 'num_iter': 158208, 'lr': 0.001, 'time': '8.571992635726929 Seconds', 'norm': 0.2001953125}\n", + "{'loss': '2.3978145122528076', 'num_iter': 158720, 'lr': 0.001, 'time': '8.455940008163452 Seconds', 'norm': 0.2197265625}\n", + "{'loss': '2.3912198543548584', 'num_iter': 159232, 'lr': 0.001, 'time': '8.212659120559692 Seconds', 'norm': 0.2578125}\n", + "{'loss': '2.394611358642578', 'num_iter': 159744, 'lr': 0.001, 'time': '8.391351222991943 Seconds', 'norm': 0.22265625}\n", + "{'loss': '2.296705722808838', 'num_iter': 160256, 'lr': 0.001, 'time': '8.791725397109985 Seconds', 'norm': 0.1787109375}\n", + "{'loss': '2.4715523719787598', 'num_iter': 160768, 'lr': 0.001, 'time': '8.123394250869751 Seconds', 'norm': 0.2041015625}\n", + "{'loss': '2.3635003566741943', 'num_iter': 161280, 'lr': 0.001, 'time': '8.853474378585815 Seconds', 'norm': 0.1962890625}\n", + "{'loss': '2.3635966777801514', 'num_iter': 161792, 'lr': 0.001, 'time': '8.725740671157837 Seconds', 'norm': 0.1953125}\n", + "{'loss': '2.341452121734619', 'num_iter': 162304, 'lr': 0.001, 'time': '8.323640584945679 Seconds', 'norm': 0.19921875}\n", + "{'loss': '2.330770492553711', 'num_iter': 162816, 'lr': 0.001, 'time': '8.84046721458435 Seconds', 'norm': 0.1865234375}\n", + "{'loss': '2.3927061557769775', 'num_iter': 163328, 'lr': 0.001, 'time': '8.528711557388306 Seconds', 'norm': 0.2021484375}\n", + "{'loss': '2.348679542541504', 'num_iter': 163840, 'lr': 0.001, 'time': '8.463924407958984 Seconds', 'norm': 0.1669921875}\n", + "{'loss': '2.410590648651123', 'num_iter': 164352, 'lr': 0.001, 'time': '13.290242433547974 Seconds', 'norm': 0.20703125}\n", + "{'loss': '2.410336971282959', 'num_iter': 164864, 'lr': 0.001, 'time': '8.453669309616089 Seconds', 'norm': 0.265625}\n", + "{'loss': '2.4056358337402344', 'num_iter': 165376, 'lr': 0.001, 'time': '9.204848289489746 Seconds', 'norm': 0.3046875}\n", + "{'loss': '2.4024312496185303', 'num_iter': 165888, 'lr': 0.001, 'time': '9.05479645729065 Seconds', 'norm': 0.267578125}\n", + "{'loss': '2.3533430099487305', 'num_iter': 166400, 'lr': 0.001, 'time': '8.526433229446411 Seconds', 'norm': 0.2353515625}\n", + "{'loss': '2.3808906078338623', 'num_iter': 166912, 'lr': 0.001, 'time': '8.373178720474243 Seconds', 'norm': 0.2021484375}\n", + "{'loss': '2.3636302947998047', 'num_iter': 167424, 'lr': 0.001, 'time': '8.742811441421509 Seconds', 'norm': 0.2119140625}\n", + "{'loss': '2.4313549995422363', 'num_iter': 167936, 'lr': 0.001, 'time': '10.581190824508667 Seconds', 'norm': 0.1962890625}\n", + "{'loss': '2.3715522289276123', 'num_iter': 168448, 'lr': 0.001, 'time': '8.243183851242065 Seconds', 'norm': 0.251953125}\n", + "{'loss': '2.390352725982666', 'num_iter': 168960, 'lr': 0.001, 'time': '8.11393427848816 Seconds', 'norm': 0.240234375}\n", + "{'loss': '2.438720464706421', 'num_iter': 169472, 'lr': 0.001, 'time': '8.0389564037323 Seconds', 'norm': 0.236328125}\n", + "{'loss': '2.407698631286621', 'num_iter': 169984, 'lr': 0.001, 'time': '8.643431663513184 Seconds', 'norm': 0.244140625}\n", + "{'loss': '2.3528716564178467', 'num_iter': 170496, 'lr': 0.001, 'time': '8.375278949737549 Seconds', 'norm': 0.2236328125}\n", + "{'loss': '2.3246371746063232', 'num_iter': 171008, 'lr': 0.001, 'time': '8.527279376983643 Seconds', 'norm': 0.193359375}\n", + "{'loss': '2.338951349258423', 'num_iter': 171520, 'lr': 0.001, 'time': '8.57494330406189 Seconds', 'norm': 0.205078125}\n", + "{'loss': '2.4132702350616455', 'num_iter': 172032, 'lr': 0.001, 'time': '8.255114555358887 Seconds', 'norm': 0.279296875}\n", + "{'loss': '2.347482681274414', 'num_iter': 172544, 'lr': 0.001, 'time': '8.837162256240845 Seconds', 'norm': 0.189453125}\n", + "{'loss': '2.3248636722564697', 'num_iter': 173056, 'lr': 0.001, 'time': '8.481815576553345 Seconds', 'norm': 0.1884765625}\n", + "{'loss': '2.3607559204101562', 'num_iter': 173568, 'lr': 0.001, 'time': '8.499971389770508 Seconds', 'norm': 0.185546875}\n", + "{'loss': '2.3352715969085693', 'num_iter': 174080, 'lr': 0.001, 'time': '8.579814434051514 Seconds', 'norm': 0.169921875}\n", + "{'loss': '2.371962785720825', 'num_iter': 174592, 'lr': 0.001, 'time': '8.568901538848877 Seconds', 'norm': 0.1865234375}\n", + "{'loss': '2.3810174465179443', 'num_iter': 175104, 'lr': 0.001, 'time': '8.14989161491394 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.409205675125122', 'num_iter': 175616, 'lr': 0.001, 'time': '8.016071081161499 Seconds', 'norm': 0.181640625}\n", + "{'loss': '2.3557281494140625', 'num_iter': 176128, 'lr': 0.001, 'time': '8.10802698135376 Seconds', 'norm': 0.1708984375}\n", + "{'loss': '2.4115004539489746', 'num_iter': 176640, 'lr': 0.001, 'time': '8.01994776725769 Seconds', 'norm': 0.1865234375}\n", + "{'loss': '2.404536247253418', 'num_iter': 177152, 'lr': 0.001, 'time': '8.481751680374146 Seconds', 'norm': 0.1884765625}\n", + "{'loss': '2.3743655681610107', 'num_iter': 177664, 'lr': 0.001, 'time': '8.365893602371216 Seconds', 'norm': 0.181640625}\n", + "{'loss': '2.3779876232147217', 'num_iter': 178176, 'lr': 0.001, 'time': '8.483413219451904 Seconds', 'norm': 0.18359375}\n", + "{'loss': '2.30751633644104', 'num_iter': 178688, 'lr': 0.001, 'time': '8.686514854431152 Seconds', 'norm': 0.1689453125}\n", + "{'loss': '2.3443827629089355', 'num_iter': 179200, 'lr': 0.001, 'time': '8.228882551193237 Seconds', 'norm': 0.2294921875}\n", + "{'loss': '2.3815975189208984', 'num_iter': 179712, 'lr': 0.001, 'time': '8.253013610839844 Seconds', 'norm': 0.2041015625}\n", + "{'loss': '2.4312551021575928', 'num_iter': 180224, 'lr': 0.001, 'time': '7.904991626739502 Seconds', 'norm': 0.1884765625}\n", + "{'loss': '2.3476133346557617', 'num_iter': 180736, 'lr': 0.001, 'time': '8.588699340820312 Seconds', 'norm': 0.19921875}\n", + "{'loss': '2.407395839691162', 'num_iter': 181248, 'lr': 0.001, 'time': '8.374473094940186 Seconds', 'norm': 0.1865234375}\n", + "{'loss': '2.2928924560546875', 'num_iter': 181760, 'lr': 0.001, 'time': '8.66974663734436 Seconds', 'norm': 0.2138671875}\n", + "{'loss': '2.3799545764923096', 'num_iter': 182272, 'lr': 0.001, 'time': '8.678651571273804 Seconds', 'norm': 0.19921875}\n", + "{'loss': '2.419018030166626', 'num_iter': 182784, 'lr': 0.001, 'time': '8.178403854370117 Seconds', 'norm': 0.1865234375}\n", + "{'loss': '2.402024507522583', 'num_iter': 183296, 'lr': 0.001, 'time': '8.218433141708374 Seconds', 'norm': 0.1728515625}\n", + "{'loss': '2.308976650238037', 'num_iter': 183808, 'lr': 0.001, 'time': '8.492849588394165 Seconds', 'norm': 0.279296875}\n", + "{'loss': '2.3735179901123047', 'num_iter': 184320, 'lr': 0.001, 'time': '8.180338859558105 Seconds', 'norm': 0.2431640625}\n", + "{'loss': '2.443040370941162', 'num_iter': 184832, 'lr': 0.001, 'time': '8.396362543106079 Seconds', 'norm': 0.23046875}\n", + "{'loss': '2.3716039657592773', 'num_iter': 185344, 'lr': 0.001, 'time': '8.364355325698853 Seconds', 'norm': 0.251953125}\n", + "{'loss': '2.3963816165924072', 'num_iter': 185856, 'lr': 0.001, 'time': '8.191945314407349 Seconds', 'norm': 0.21484375}\n", + "{'loss': '2.4441351890563965', 'num_iter': 186368, 'lr': 0.001, 'time': '8.596657514572144 Seconds', 'norm': 0.22265625}\n", + "{'loss': '2.3792989253997803', 'num_iter': 186880, 'lr': 0.001, 'time': '8.697709560394287 Seconds', 'norm': 0.2060546875}\n", + "{'loss': '2.319247007369995', 'num_iter': 187392, 'lr': 0.001, 'time': '9.042456865310669 Seconds', 'norm': 0.259765625}\n", + "{'loss': '2.317769765853882', 'num_iter': 187904, 'lr': 0.001, 'time': '8.529827356338501 Seconds', 'norm': 0.255859375}\n", + "{'loss': '2.3265223503112793', 'num_iter': 188416, 'lr': 0.001, 'time': '8.988086223602295 Seconds', 'norm': 0.2412109375}\n", + "{'loss': '2.3581628799438477', 'num_iter': 188928, 'lr': 0.001, 'time': '8.864873170852661 Seconds', 'norm': 0.2080078125}\n", + "{'loss': '2.3540351390838623', 'num_iter': 189440, 'lr': 0.001, 'time': '9.287997245788574 Seconds', 'norm': 0.2236328125}\n", + "{'loss': '2.352262020111084', 'num_iter': 189952, 'lr': 0.001, 'time': '8.305871963500977 Seconds', 'norm': 0.181640625}\n", + "{'loss': '2.397305965423584', 'num_iter': 190464, 'lr': 0.001, 'time': '8.774291753768921 Seconds', 'norm': 0.265625}\n", + "{'loss': '2.379282236099243', 'num_iter': 190976, 'lr': 0.001, 'time': '8.71970272064209 Seconds', 'norm': 0.234375}\n", + "{'loss': '2.3912370204925537', 'num_iter': 191488, 'lr': 0.001, 'time': '8.56525731086731 Seconds', 'norm': 0.2119140625}\n", + "{'loss': '2.4019556045532227', 'num_iter': 192000, 'lr': 0.001, 'time': '8.555302143096924 Seconds', 'norm': 0.2021484375}\n", + "{'loss': '2.4382483959198', 'num_iter': 192512, 'lr': 0.001, 'time': '8.539350271224976 Seconds', 'norm': 0.2060546875}\n", + "{'loss': '2.368314027786255', 'num_iter': 193024, 'lr': 0.001, 'time': '8.356388092041016 Seconds', 'norm': 0.2392578125}\n", + "{'loss': '2.303382396697998', 'num_iter': 193536, 'lr': 0.001, 'time': '8.817384958267212 Seconds', 'norm': 0.265625}\n", + "{'loss': '2.4346792697906494', 'num_iter': 194048, 'lr': 0.001, 'time': '8.135556936264038 Seconds', 'norm': 0.2431640625}\n", + "{'loss': '2.3610384464263916', 'num_iter': 194560, 'lr': 0.001, 'time': '8.586567401885986 Seconds', 'norm': 0.1611328125}\n", + "{'loss': '2.358102321624756', 'num_iter': 195072, 'lr': 0.001, 'time': '8.900879621505737 Seconds', 'norm': 0.23046875}\n", + "{'loss': '2.3662548065185547', 'num_iter': 195584, 'lr': 0.001, 'time': '8.590677261352539 Seconds', 'norm': 0.1904296875}\n", + "{'loss': '2.401867389678955', 'num_iter': 196096, 'lr': 0.001, 'time': '8.482840061187744 Seconds', 'norm': 0.1962890625}\n", + "{'loss': '2.376189947128296', 'num_iter': 196608, 'lr': 0.001, 'time': '8.400667905807495 Seconds', 'norm': 0.201171875}\n", + "{'loss': '2.340115547180176', 'num_iter': 197120, 'lr': 0.001, 'time': '14.034735679626465 Seconds', 'norm': 0.193359375}\n", + "{'loss': '2.3026089668273926', 'num_iter': 197632, 'lr': 0.001, 'time': '8.940040349960327 Seconds', 'norm': 0.2021484375}\n", + "{'loss': '2.4754436016082764', 'num_iter': 198144, 'lr': 0.001, 'time': '8.47199559211731 Seconds', 'norm': 0.1923828125}\n", + "{'loss': '2.323779821395874', 'num_iter': 198656, 'lr': 0.001, 'time': '9.111096382141113 Seconds', 'norm': 0.1669921875}\n", + "{'loss': '2.3296773433685303', 'num_iter': 199168, 'lr': 0.001, 'time': '8.707337617874146 Seconds', 'norm': 0.17578125}\n", + "{'loss': '2.402021884918213', 'num_iter': 199680, 'lr': 0.001, 'time': '8.420790672302246 Seconds', 'norm': 0.171875}\n", + "{'loss': '2.3557069301605225', 'num_iter': 200192, 'lr': 0.001, 'time': '8.448639631271362 Seconds', 'norm': 0.1904296875}\n", + "{'loss': '2.373485803604126', 'num_iter': 200704, 'lr': 0.001, 'time': '8.372979640960693 Seconds', 'norm': 0.16796875}\n", + "{'loss': '2.345262050628662', 'num_iter': 201216, 'lr': 0.001, 'time': '8.477368831634521 Seconds', 'norm': 0.1865234375}\n", + "{'loss': '2.340141534805298', 'num_iter': 201728, 'lr': 0.001, 'time': '8.401488304138184 Seconds', 'norm': 0.181640625}\n", + "{'loss': '2.3137013912200928', 'num_iter': 202240, 'lr': 0.001, 'time': '9.203272342681885 Seconds', 'norm': 0.1904296875}\n", + "{'loss': '2.4149842262268066', 'num_iter': 202752, 'lr': 0.001, 'time': '8.790534973144531 Seconds', 'norm': 0.2333984375}\n", + "{'loss': '2.4090964794158936', 'num_iter': 203264, 'lr': 0.001, 'time': '7.9481377601623535 Seconds', 'norm': 0.2373046875}\n", + "{'loss': '2.413677453994751', 'num_iter': 203776, 'lr': 0.001, 'time': '8.145129680633545 Seconds', 'norm': 0.251953125}\n", + "{'loss': '2.38226580619812', 'num_iter': 204288, 'lr': 0.001, 'time': '8.24802303314209 Seconds', 'norm': 0.283203125}\n", + "{'loss': '2.345913887023926', 'num_iter': 204800, 'lr': 0.001, 'time': '8.365803718566895 Seconds', 'norm': 0.22265625}\n", + "{'loss': '2.3469247817993164', 'num_iter': 205312, 'lr': 0.001, 'time': '8.045371294021606 Seconds', 'norm': 0.2197265625}\n", + "{'loss': '2.3835318088531494', 'num_iter': 205824, 'lr': 0.001, 'time': '8.170892000198364 Seconds', 'norm': 0.271484375}\n", + "{'loss': '2.448836088180542', 'num_iter': 206336, 'lr': 0.001, 'time': '8.012107849121094 Seconds', 'norm': 0.21875}\n", + "{'loss': '2.3233399391174316', 'num_iter': 206848, 'lr': 0.001, 'time': '8.744713544845581 Seconds', 'norm': 0.236328125}\n", + "{'loss': '2.3320868015289307', 'num_iter': 207360, 'lr': 0.001, 'time': '8.70252513885498 Seconds', 'norm': 0.25390625}\n", + "{'loss': '2.3801751136779785', 'num_iter': 207872, 'lr': 0.001, 'time': '8.338006973266602 Seconds', 'norm': 0.24609375}\n", + "{'loss': '2.355252265930176', 'num_iter': 208384, 'lr': 0.001, 'time': '8.790690422058105 Seconds', 'norm': 0.17578125}\n", + "{'loss': '2.386415719985962', 'num_iter': 208896, 'lr': 0.001, 'time': '8.765786409378052 Seconds', 'norm': 0.255859375}\n", + "{'loss': '2.398970365524292', 'num_iter': 209408, 'lr': 0.001, 'time': '8.377013683319092 Seconds', 'norm': 0.251953125}\n", + "{'loss': '2.4278042316436768', 'num_iter': 209920, 'lr': 0.001, 'time': '8.138808488845825 Seconds', 'norm': 0.224609375}\n", + "{'loss': '2.3264434337615967', 'num_iter': 210432, 'lr': 0.001, 'time': '8.604609489440918 Seconds', 'norm': 0.2138671875}\n", + "{'loss': '2.3022260665893555', 'num_iter': 210944, 'lr': 0.001, 'time': '8.813130617141724 Seconds', 'norm': 0.1904296875}\n", + "{'loss': '2.3174846172332764', 'num_iter': 211456, 'lr': 0.001, 'time': '8.536523818969727 Seconds', 'norm': 0.185546875}\n", + "{'loss': '2.3424057960510254', 'num_iter': 211968, 'lr': 0.001, 'time': '8.058176755905151 Seconds', 'norm': 0.1865234375}\n", + "{'loss': '2.4122111797332764', 'num_iter': 212480, 'lr': 0.001, 'time': '8.526128053665161 Seconds', 'norm': 0.201171875}\n", + "{'loss': '2.3980116844177246', 'num_iter': 212992, 'lr': 0.001, 'time': '8.307657718658447 Seconds', 'norm': 0.19921875}\n", + "{'loss': '2.3523271083831787', 'num_iter': 213504, 'lr': 0.001, 'time': '10.703664064407349 Seconds', 'norm': 0.1611328125}\n", + "{'loss': '2.360304117202759', 'num_iter': 214016, 'lr': 0.001, 'time': '8.352770805358887 Seconds', 'norm': 0.1923828125}\n", + "{'loss': '2.3762311935424805', 'num_iter': 214528, 'lr': 0.001, 'time': '8.728522777557373 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.3424596786499023', 'num_iter': 215040, 'lr': 0.001, 'time': '8.548719882965088 Seconds', 'norm': 0.1787109375}\n", + "{'loss': '2.3331634998321533', 'num_iter': 215552, 'lr': 0.001, 'time': '8.587762832641602 Seconds', 'norm': 0.177734375}\n", + "{'loss': '2.3859400749206543', 'num_iter': 216064, 'lr': 0.001, 'time': '8.286732912063599 Seconds', 'norm': 0.1787109375}\n", + "{'loss': '2.3784117698669434', 'num_iter': 216576, 'lr': 0.001, 'time': '8.418378591537476 Seconds', 'norm': 0.189453125}\n", + "{'loss': '2.438253164291382', 'num_iter': 217088, 'lr': 0.001, 'time': '8.145642518997192 Seconds', 'norm': 0.1748046875}\n", + "{'loss': '2.3865485191345215', 'num_iter': 217600, 'lr': 0.001, 'time': '8.378966331481934 Seconds', 'norm': 0.2041015625}\n", + "{'loss': '2.4209232330322266', 'num_iter': 218112, 'lr': 0.001, 'time': '8.249592542648315 Seconds', 'norm': 0.193359375}\n", + "{'loss': '2.4072771072387695', 'num_iter': 218624, 'lr': 0.001, 'time': '8.111175537109375 Seconds', 'norm': 0.224609375}\n", + "{'loss': '2.40582275390625', 'num_iter': 219136, 'lr': 0.001, 'time': '8.505855321884155 Seconds', 'norm': 0.2451171875}\n", + "{'loss': '2.31947922706604', 'num_iter': 219648, 'lr': 0.001, 'time': '8.685045003890991 Seconds', 'norm': 0.224609375}\n", + "{'loss': '2.4264163970947266', 'num_iter': 220160, 'lr': 0.001, 'time': '8.074942827224731 Seconds', 'norm': 0.25}\n", + "{'loss': '2.402250051498413', 'num_iter': 220672, 'lr': 0.001, 'time': '8.235146760940552 Seconds', 'norm': 0.240234375}\n", + "{'loss': '2.308009147644043', 'num_iter': 221184, 'lr': 0.001, 'time': '8.296538591384888 Seconds', 'norm': 0.220703125}\n", + "{'loss': '2.430100679397583', 'num_iter': 221696, 'lr': 0.001, 'time': '7.859760999679565 Seconds', 'norm': 0.310546875}\n", + "{'loss': '2.4234437942504883', 'num_iter': 222208, 'lr': 0.001, 'time': '8.362849950790405 Seconds', 'norm': 0.357421875}\n", + "{'loss': '2.3611183166503906', 'num_iter': 222720, 'lr': 0.001, 'time': '8.58720850944519 Seconds', 'norm': 0.2451171875}\n", + "{'loss': '2.3180902004241943', 'num_iter': 223232, 'lr': 0.001, 'time': '8.51003885269165 Seconds', 'norm': 0.21484375}\n", + "{'loss': '2.4069008827209473', 'num_iter': 223744, 'lr': 0.001, 'time': '8.100350379943848 Seconds', 'norm': 0.255859375}\n", + "{'loss': '2.4283080101013184', 'num_iter': 224256, 'lr': 0.001, 'time': '8.16063928604126 Seconds', 'norm': 0.2197265625}\n", + "{'loss': '2.4220941066741943', 'num_iter': 224768, 'lr': 0.001, 'time': '8.39595913887024 Seconds', 'norm': 0.2041015625}\n", + "{'loss': '2.393131971359253', 'num_iter': 225280, 'lr': 0.001, 'time': '8.766296625137329 Seconds', 'norm': 0.2109375}\n", + "{'loss': '2.373231887817383', 'num_iter': 225792, 'lr': 0.001, 'time': '8.511415004730225 Seconds', 'norm': 0.19921875}\n", + "{'loss': '2.353454828262329', 'num_iter': 226304, 'lr': 0.001, 'time': '8.447685718536377 Seconds', 'norm': 0.2451171875}\n", + "{'loss': '2.3797378540039062', 'num_iter': 226816, 'lr': 0.001, 'time': '8.553575992584229 Seconds', 'norm': 0.1708984375}\n", + "{'loss': '2.368077039718628', 'num_iter': 227328, 'lr': 0.001, 'time': '8.239809274673462 Seconds', 'norm': 0.255859375}\n", + "{'loss': '2.3728744983673096', 'num_iter': 227840, 'lr': 0.001, 'time': '8.670446634292603 Seconds', 'norm': 0.1884765625}\n", + "{'loss': '2.3388493061065674', 'num_iter': 228352, 'lr': 0.001, 'time': '8.67458462715149 Seconds', 'norm': 0.216796875}\n", + "{'loss': '2.4088003635406494', 'num_iter': 228864, 'lr': 0.001, 'time': '8.202964305877686 Seconds', 'norm': 0.2021484375}\n", + "{'loss': '2.3256852626800537', 'num_iter': 229376, 'lr': 0.001, 'time': '8.201792240142822 Seconds', 'norm': 0.1982421875}\n", + "{'loss': '2.4867920875549316', 'num_iter': 229888, 'lr': 0.001, 'time': '13.32987642288208 Seconds', 'norm': 0.2001953125}\n", + "{'loss': '2.365682363510132', 'num_iter': 230400, 'lr': 0.001, 'time': '9.235367059707642 Seconds', 'norm': 0.1796875}\n", + "{'loss': '2.3717684745788574', 'num_iter': 230912, 'lr': 0.001, 'time': '8.923941373825073 Seconds', 'norm': 0.1767578125}\n", + "{'loss': '2.3342442512512207', 'num_iter': 231424, 'lr': 0.001, 'time': '9.293139219284058 Seconds', 'norm': 0.181640625}\n", + "{'loss': '2.3567512035369873', 'num_iter': 231936, 'lr': 0.001, 'time': '8.658114671707153 Seconds', 'norm': 0.189453125}\n", + "{'loss': '2.426687717437744', 'num_iter': 232448, 'lr': 0.001, 'time': '8.210061073303223 Seconds', 'norm': 0.19140625}\n", + "{'loss': '2.4500954151153564', 'num_iter': 232960, 'lr': 0.001, 'time': '8.300801038742065 Seconds', 'norm': 0.20703125}\n", + "{'loss': '2.389975070953369', 'num_iter': 233472, 'lr': 0.001, 'time': '8.208280563354492 Seconds', 'norm': 0.18359375}\n", + "{'loss': '2.3227341175079346', 'num_iter': 233984, 'lr': 0.001, 'time': '8.524590492248535 Seconds', 'norm': 0.19140625}\n", + "{'loss': '2.3813316822052', 'num_iter': 234496, 'lr': 0.001, 'time': '8.820230960845947 Seconds', 'norm': 0.20703125}\n", + "{'loss': '2.358562469482422', 'num_iter': 235008, 'lr': 0.001, 'time': '8.657799243927002 Seconds', 'norm': 0.2197265625}\n", + "{'loss': '2.3874619007110596', 'num_iter': 235520, 'lr': 0.001, 'time': '8.56637454032898 Seconds', 'norm': 0.2197265625}\n", + "{'loss': '2.4278903007507324', 'num_iter': 236032, 'lr': 0.001, 'time': '8.347804546356201 Seconds', 'norm': 0.1806640625}\n", + "{'loss': '2.374258279800415', 'num_iter': 236544, 'lr': 0.001, 'time': '8.379116773605347 Seconds', 'norm': 0.2431640625}\n", + "{'loss': '2.3390471935272217', 'num_iter': 237056, 'lr': 0.001, 'time': '8.6199049949646 Seconds', 'norm': 0.2451171875}\n", + "{'loss': '2.3644862174987793', 'num_iter': 237568, 'lr': 0.001, 'time': '8.466325283050537 Seconds', 'norm': 0.240234375}\n", + "{'loss': '2.434051990509033', 'num_iter': 238080, 'lr': 0.001, 'time': '8.230094194412231 Seconds', 'norm': 0.2060546875}\n", + "{'loss': '2.4160258769989014', 'num_iter': 238592, 'lr': 0.001, 'time': '8.302940368652344 Seconds', 'norm': 0.2490234375}\n", + "{'loss': '2.46889591217041', 'num_iter': 239104, 'lr': 0.001, 'time': '7.781325817108154 Seconds', 'norm': 0.2734375}\n", + "{'loss': '2.3158419132232666', 'num_iter': 239616, 'lr': 0.001, 'time': '8.933854818344116 Seconds', 'norm': 0.2294921875}\n", + "{'loss': '2.4122138023376465', 'num_iter': 240128, 'lr': 0.001, 'time': '8.912975549697876 Seconds', 'norm': 0.2333984375}\n", + "{'loss': '2.38907790184021', 'num_iter': 240640, 'lr': 0.001, 'time': '8.302465438842773 Seconds', 'norm': 0.234375}\n", + "{'loss': '2.3494873046875', 'num_iter': 241152, 'lr': 0.001, 'time': '8.485373973846436 Seconds', 'norm': 0.197265625}\n", + "{'loss': '2.4071221351623535', 'num_iter': 241664, 'lr': 0.001, 'time': '8.365293264389038 Seconds', 'norm': 0.2060546875}\n", + "{'loss': '2.412865400314331', 'num_iter': 242176, 'lr': 0.001, 'time': '8.297960042953491 Seconds', 'norm': 0.220703125}\n", + "{'loss': '2.4540276527404785', 'num_iter': 242688, 'lr': 0.001, 'time': '7.873640298843384 Seconds', 'norm': 0.2177734375}\n", + "{'loss': '2.383798599243164', 'num_iter': 243200, 'lr': 0.001, 'time': '8.313431024551392 Seconds', 'norm': 0.201171875}\n", + "{'loss': '2.3730578422546387', 'num_iter': 243712, 'lr': 0.001, 'time': '8.123436689376831 Seconds', 'norm': 0.2060546875}\n", + "{'loss': '2.3854212760925293', 'num_iter': 244224, 'lr': 0.001, 'time': '8.498823881149292 Seconds', 'norm': 0.208984375}\n", + "{'loss': '2.4542348384857178', 'num_iter': 244736, 'lr': 0.001, 'time': '8.176527738571167 Seconds', 'norm': 0.193359375}\n", + "{'loss': '2.3994383811950684', 'num_iter': 245248, 'lr': 0.001, 'time': '7.953600645065308 Seconds', 'norm': 0.2001953125}\n", + "{'loss': '2.3758578300476074', 'num_iter': 245760, 'lr': 0.001, 'time': '8.407753705978394 Seconds', 'norm': 0.232421875}\n", + "{'loss': '2.4466841220855713', 'num_iter': 246272, 'lr': 0.001, 'time': '7.725236892700195 Seconds', 'norm': 0.236328125}\n", + "{'loss': '2.2792766094207764', 'num_iter': 246784, 'lr': 0.001, 'time': '8.631680011749268 Seconds', 'norm': 0.2236328125}\n", + "{'loss': '2.323840379714966', 'num_iter': 247296, 'lr': 0.001, 'time': '8.767044067382812 Seconds', 'norm': 0.1748046875}\n", + "{'loss': '2.3303678035736084', 'num_iter': 247808, 'lr': 0.001, 'time': '8.194946050643921 Seconds', 'norm': 0.2490234375}\n", + "{'loss': '2.400864839553833', 'num_iter': 248320, 'lr': 0.001, 'time': '8.68299913406372 Seconds', 'norm': 0.21875}\n", + "{'loss': '2.339550256729126', 'num_iter': 248832, 'lr': 0.001, 'time': '9.624387502670288 Seconds', 'norm': 0.2216796875}\n", + "{'loss': '2.351720094680786', 'num_iter': 249344, 'lr': 0.001, 'time': '9.027736902236938 Seconds', 'norm': 0.197265625}\n", + "{'loss': '2.4474985599517822', 'num_iter': 249856, 'lr': 0.001, 'time': '8.032051086425781 Seconds', 'norm': 0.2080078125}\n", + "{'loss': '2.331218719482422', 'num_iter': 250368, 'lr': 0.001, 'time': '9.139688968658447 Seconds', 'norm': 0.2158203125}\n", + "{'loss': '2.45076322555542', 'num_iter': 250880, 'lr': 0.001, 'time': '8.007093906402588 Seconds', 'norm': 0.259765625}\n", + "{'loss': '2.342646598815918', 'num_iter': 251392, 'lr': 0.001, 'time': '8.487046480178833 Seconds', 'norm': 0.2119140625}\n", + "{'loss': '2.3847901821136475', 'num_iter': 251904, 'lr': 0.001, 'time': '7.976288557052612 Seconds', 'norm': 0.212890625}\n", + "{'loss': '2.347527503967285', 'num_iter': 252416, 'lr': 0.001, 'time': '8.655533790588379 Seconds', 'norm': 0.19140625}\n", + "{'loss': '2.3850958347320557', 'num_iter': 252928, 'lr': 0.001, 'time': '8.582412242889404 Seconds', 'norm': 0.216796875}\n", + "{'loss': '2.4238131046295166', 'num_iter': 253440, 'lr': 0.001, 'time': '8.276405334472656 Seconds', 'norm': 0.23046875}\n", + "{'loss': '2.3791065216064453', 'num_iter': 253952, 'lr': 0.001, 'time': '8.284763813018799 Seconds', 'norm': 0.2275390625}\n", + "{'loss': '2.3143136501312256', 'num_iter': 254464, 'lr': 0.001, 'time': '9.208547115325928 Seconds', 'norm': 0.19921875}\n", + "{'loss': '2.358023166656494', 'num_iter': 254976, 'lr': 0.001, 'time': '8.734349966049194 Seconds', 'norm': 0.2216796875}\n", + "{'loss': '2.3771891593933105', 'num_iter': 255488, 'lr': 0.001, 'time': '8.75161099433899 Seconds', 'norm': 0.2294921875}\n", + "{'loss': '2.430690050125122', 'num_iter': 256000, 'lr': 0.001, 'time': '8.25423550605774 Seconds', 'norm': 0.265625}\n", + "{'loss': '2.3713862895965576', 'num_iter': 256512, 'lr': 0.001, 'time': '8.19586443901062 Seconds', 'norm': 0.2041015625}\n", + "{'loss': '2.4567673206329346', 'num_iter': 257024, 'lr': 0.001, 'time': '8.514960289001465 Seconds', 'norm': 0.265625}\n", + "{'loss': '2.35611891746521', 'num_iter': 257536, 'lr': 0.001, 'time': '8.559271812438965 Seconds', 'norm': 0.2890625}\n", + "{'loss': '2.3605380058288574', 'num_iter': 258048, 'lr': 0.001, 'time': '8.779946565628052 Seconds', 'norm': 0.232421875}\n", + "{'loss': '2.3493683338165283', 'num_iter': 258560, 'lr': 0.001, 'time': '10.725444078445435 Seconds', 'norm': 0.21484375}\n", + "{'loss': '2.4130163192749023', 'num_iter': 259072, 'lr': 0.001, 'time': '8.22253131866455 Seconds', 'norm': 0.20703125}\n", + "{'loss': '2.3438992500305176', 'num_iter': 259584, 'lr': 0.001, 'time': '8.735976457595825 Seconds', 'norm': 0.2412109375}\n", + "{'loss': '2.3967232704162598', 'num_iter': 260096, 'lr': 0.001, 'time': '8.319757461547852 Seconds', 'norm': 0.1884765625}\n", + "{'loss': '2.351228952407837', 'num_iter': 260608, 'lr': 0.001, 'time': '8.48915958404541 Seconds', 'norm': 0.2470703125}\n", + "{'loss': '2.448064088821411', 'num_iter': 261120, 'lr': 0.001, 'time': '8.303987741470337 Seconds', 'norm': 0.201171875}\n", + "{'loss': '2.3546414375305176', 'num_iter': 261632, 'lr': 0.001, 'time': '8.227036714553833 Seconds', 'norm': 0.244140625}\n", + "{'loss': '2.4015486240386963', 'num_iter': 262144, 'lr': 0.001, 'time': '7.9424262046813965 Seconds', 'norm': 0.23828125}\n", + "{'loss': '2.392242193222046', 'num_iter': 262656, 'lr': 0.001, 'time': '13.544525384902954 Seconds', 'norm': 0.22265625}\n", + "{'loss': '2.4435842037200928', 'num_iter': 263168, 'lr': 0.001, 'time': '7.95257568359375 Seconds', 'norm': 0.2216796875}\n", + "{'loss': '2.361112356185913', 'num_iter': 263680, 'lr': 0.001, 'time': '8.616785049438477 Seconds', 'norm': 0.265625}\n", + "{'loss': '2.2966091632843018', 'num_iter': 264192, 'lr': 0.001, 'time': '8.633754968643188 Seconds', 'norm': 0.1982421875}\n", + "{'loss': '2.448230028152466', 'num_iter': 264704, 'lr': 0.001, 'time': '8.326794624328613 Seconds', 'norm': 0.220703125}\n", + "{'loss': '2.3304977416992188', 'num_iter': 265216, 'lr': 0.001, 'time': '8.988875150680542 Seconds', 'norm': 0.19140625}\n", + "{'loss': '2.385537624359131', 'num_iter': 265728, 'lr': 0.001, 'time': '9.46163535118103 Seconds', 'norm': 0.203125}\n", + "{'loss': '2.4812631607055664', 'num_iter': 266240, 'lr': 0.001, 'time': '9.105252504348755 Seconds', 'norm': 0.2431640625}\n", + "{'loss': '2.3992249965667725', 'num_iter': 266752, 'lr': 0.001, 'time': '9.222147703170776 Seconds', 'norm': 0.205078125}\n", + "{'loss': '2.388855218887329', 'num_iter': 267264, 'lr': 0.001, 'time': '9.151604652404785 Seconds', 'norm': 0.2412109375}\n", + "{'loss': '2.3794920444488525', 'num_iter': 267776, 'lr': 0.001, 'time': '8.322026014328003 Seconds', 'norm': 0.189453125}\n", + "{'loss': '2.4025790691375732', 'num_iter': 268288, 'lr': 0.001, 'time': '8.177959680557251 Seconds', 'norm': 0.251953125}\n", + "{'loss': '2.4039461612701416', 'num_iter': 268800, 'lr': 0.001, 'time': '8.373990297317505 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.386751651763916', 'num_iter': 269312, 'lr': 0.001, 'time': '8.64305567741394 Seconds', 'norm': 0.20703125}\n", + "{'loss': '2.400869607925415', 'num_iter': 269824, 'lr': 0.001, 'time': '8.28970718383789 Seconds', 'norm': 0.19921875}\n", + "{'loss': '2.333707571029663', 'num_iter': 270336, 'lr': 0.001, 'time': '8.682121992111206 Seconds', 'norm': 0.2099609375}\n", + "{'loss': '2.3656578063964844', 'num_iter': 270848, 'lr': 0.001, 'time': '8.243608236312866 Seconds', 'norm': 0.216796875}\n", + "{'loss': '2.364166259765625', 'num_iter': 271360, 'lr': 0.001, 'time': '8.175023794174194 Seconds', 'norm': 0.2080078125}\n", + "{'loss': '2.449799060821533', 'num_iter': 271872, 'lr': 0.001, 'time': '7.973111867904663 Seconds', 'norm': 0.1904296875}\n", + "{'loss': '2.316122055053711', 'num_iter': 272384, 'lr': 0.001, 'time': '9.077145338058472 Seconds', 'norm': 0.181640625}\n", + "{'loss': '2.3976595401763916', 'num_iter': 272896, 'lr': 0.001, 'time': '8.83397626876831 Seconds', 'norm': 0.20703125}\n", + "{'loss': '2.3197543621063232', 'num_iter': 273408, 'lr': 0.001, 'time': '9.112494945526123 Seconds', 'norm': 0.1865234375}\n", + "{'loss': '2.417259693145752', 'num_iter': 273920, 'lr': 0.001, 'time': '8.590454578399658 Seconds', 'norm': 0.2041015625}\n", + "{'loss': '2.383967638015747', 'num_iter': 274432, 'lr': 0.001, 'time': '8.932982921600342 Seconds', 'norm': 0.1904296875}\n", + "{'loss': '2.4591479301452637', 'num_iter': 274944, 'lr': 0.001, 'time': '8.903688907623291 Seconds', 'norm': 0.21875}\n", + "{'loss': '2.328021764755249', 'num_iter': 275456, 'lr': 0.001, 'time': '8.986218214035034 Seconds', 'norm': 0.23828125}\n", + "{'loss': '2.3581700325012207', 'num_iter': 275968, 'lr': 0.001, 'time': '8.372138738632202 Seconds', 'norm': 0.2314453125}\n", + "{'loss': '2.3342649936676025', 'num_iter': 276480, 'lr': 0.001, 'time': '8.779387712478638 Seconds', 'norm': 0.216796875}\n", + "{'loss': '2.383107900619507', 'num_iter': 276992, 'lr': 0.001, 'time': '8.067545413970947 Seconds', 'norm': 0.2001953125}\n", + "{'loss': '2.4403553009033203', 'num_iter': 277504, 'lr': 0.001, 'time': '8.369643211364746 Seconds', 'norm': 0.2314453125}\n", + "{'loss': '2.3732123374938965', 'num_iter': 278016, 'lr': 0.001, 'time': '8.51713752746582 Seconds', 'norm': 0.232421875}\n", + "{'loss': '2.3884003162384033', 'num_iter': 278528, 'lr': 0.001, 'time': '8.13892650604248 Seconds', 'norm': 0.1962890625}\n", + "{'loss': '2.421957492828369', 'num_iter': 279040, 'lr': 0.001, 'time': '8.445558071136475 Seconds', 'norm': 0.271484375}\n", + "{'loss': '2.4311976432800293', 'num_iter': 279552, 'lr': 0.001, 'time': '8.224004030227661 Seconds', 'norm': 0.2138671875}\n", + "{'loss': '2.4092817306518555', 'num_iter': 280064, 'lr': 0.001, 'time': '8.05395221710205 Seconds', 'norm': 0.1884765625}\n", + "{'loss': '2.401502847671509', 'num_iter': 280576, 'lr': 0.001, 'time': '8.084429264068604 Seconds', 'norm': 0.2236328125}\n", + "{'loss': '2.386718511581421', 'num_iter': 281088, 'lr': 0.001, 'time': '8.723917007446289 Seconds', 'norm': 0.20703125}\n", + "{'loss': '2.363805055618286', 'num_iter': 281600, 'lr': 0.001, 'time': '8.550983667373657 Seconds', 'norm': 0.1787109375}\n", + "{'loss': '2.330687999725342', 'num_iter': 282112, 'lr': 0.001, 'time': '8.887413263320923 Seconds', 'norm': 0.21484375}\n", + "{'loss': '2.370398998260498', 'num_iter': 282624, 'lr': 0.001, 'time': '8.903722524642944 Seconds', 'norm': 0.21875}\n", + "{'loss': '2.3747034072875977', 'num_iter': 283136, 'lr': 0.001, 'time': '8.498511791229248 Seconds', 'norm': 0.19921875}\n", + "{'loss': '2.4405148029327393', 'num_iter': 283648, 'lr': 0.001, 'time': '9.101063251495361 Seconds', 'norm': 0.234375}\n", + "{'loss': '2.3930764198303223', 'num_iter': 284160, 'lr': 0.001, 'time': '8.708468914031982 Seconds', 'norm': 0.2021484375}\n", + "{'loss': '2.4270787239074707', 'num_iter': 284672, 'lr': 0.001, 'time': '8.44163179397583 Seconds', 'norm': 0.20703125}\n", + "{'loss': '2.39479923248291', 'num_iter': 285184, 'lr': 0.001, 'time': '8.261706590652466 Seconds', 'norm': 0.2158203125}\n", + "{'loss': '2.3038883209228516', 'num_iter': 285696, 'lr': 0.001, 'time': '8.301404237747192 Seconds', 'norm': 0.1943359375}\n", + "{'loss': '2.3959062099456787', 'num_iter': 286208, 'lr': 0.001, 'time': '8.500529289245605 Seconds', 'norm': 0.228515625}\n", + "{'loss': '2.397590398788452', 'num_iter': 286720, 'lr': 0.001, 'time': '8.135626316070557 Seconds', 'norm': 0.25}\n", + "{'loss': '2.425551176071167', 'num_iter': 287232, 'lr': 0.001, 'time': '8.786616802215576 Seconds', 'norm': 0.28125}\n", + "{'loss': '2.4148709774017334', 'num_iter': 287744, 'lr': 0.001, 'time': '8.10781192779541 Seconds', 'norm': 0.2158203125}\n", + "{'loss': '2.3727715015411377', 'num_iter': 288256, 'lr': 0.001, 'time': '8.571322917938232 Seconds', 'norm': 0.2177734375}\n", + "{'loss': '2.3732223510742188', 'num_iter': 288768, 'lr': 0.001, 'time': '8.507109642028809 Seconds', 'norm': 0.2373046875}\n", + "{'loss': '2.411951780319214', 'num_iter': 289280, 'lr': 0.001, 'time': '8.267762422561646 Seconds', 'norm': 0.2021484375}\n", + "{'loss': '2.385762929916382', 'num_iter': 289792, 'lr': 0.001, 'time': '8.282331466674805 Seconds', 'norm': 0.21484375}\n", + "{'loss': '2.375361204147339', 'num_iter': 290304, 'lr': 0.001, 'time': '8.38056492805481 Seconds', 'norm': 0.212890625}\n", + "{'loss': '2.4230048656463623', 'num_iter': 290816, 'lr': 0.001, 'time': '8.52833604812622 Seconds', 'norm': 0.1865234375}\n", + "{'loss': '2.3321714401245117', 'num_iter': 291328, 'lr': 0.001, 'time': '8.47947072982788 Seconds', 'norm': 0.1845703125}\n", + "{'loss': '2.331327438354492', 'num_iter': 291840, 'lr': 0.001, 'time': '8.391510486602783 Seconds', 'norm': 0.19921875}\n", + "{'loss': '2.3201982975006104', 'num_iter': 292352, 'lr': 0.001, 'time': '8.988374471664429 Seconds', 'norm': 0.1943359375}\n", + "{'loss': '2.3845808506011963', 'num_iter': 292864, 'lr': 0.001, 'time': '8.909928798675537 Seconds', 'norm': 0.2265625}\n", + "{'loss': '2.3504624366760254', 'num_iter': 293376, 'lr': 0.001, 'time': '8.564331531524658 Seconds', 'norm': 0.263671875}\n", + "{'loss': '2.4387266635894775', 'num_iter': 293888, 'lr': 0.001, 'time': '7.995528936386108 Seconds', 'norm': 0.224609375}\n", + "{'loss': '2.3537988662719727', 'num_iter': 294400, 'lr': 0.001, 'time': '8.511414289474487 Seconds', 'norm': 0.1904296875}\n", + "{'loss': '2.3460607528686523', 'num_iter': 294912, 'lr': 0.001, 'time': '8.501054763793945 Seconds', 'norm': 0.216796875}\n", + "{'loss': '2.3315610885620117', 'num_iter': 295424, 'lr': 0.001, 'time': '13.464897871017456 Seconds', 'norm': 0.1884765625}\n", + "{'loss': '2.4139909744262695', 'num_iter': 295936, 'lr': 0.001, 'time': '8.57378101348877 Seconds', 'norm': 0.197265625}\n", + "{'loss': '2.371453285217285', 'num_iter': 296448, 'lr': 0.001, 'time': '8.721199989318848 Seconds', 'norm': 0.216796875}\n", + "{'loss': '2.384549856185913', 'num_iter': 296960, 'lr': 0.001, 'time': '8.604033946990967 Seconds', 'norm': 0.232421875}\n", + "{'loss': '2.3499159812927246', 'num_iter': 297472, 'lr': 0.001, 'time': '8.364481210708618 Seconds', 'norm': 0.173828125}\n", + "{'loss': '2.3874237537384033', 'num_iter': 297984, 'lr': 0.001, 'time': '8.545352935791016 Seconds', 'norm': 0.1943359375}\n", + "{'loss': '2.3864004611968994', 'num_iter': 298496, 'lr': 0.001, 'time': '8.46750259399414 Seconds', 'norm': 0.173828125}\n", + "{'loss': '2.3291914463043213', 'num_iter': 299008, 'lr': 0.001, 'time': '8.513898134231567 Seconds', 'norm': 0.205078125}\n", + "{'loss': '2.376094102859497', 'num_iter': 299520, 'lr': 0.001, 'time': '8.477492094039917 Seconds', 'norm': 0.2119140625}\n", + "{'loss': '2.3394899368286133', 'num_iter': 300032, 'lr': 0.001, 'time': '8.547684669494629 Seconds', 'norm': 0.1728515625}\n", + "{'loss': '2.415980815887451', 'num_iter': 300544, 'lr': 0.001, 'time': '9.219136953353882 Seconds', 'norm': 0.1943359375}\n", + "{'loss': '2.40695858001709', 'num_iter': 301056, 'lr': 0.001, 'time': '9.282705307006836 Seconds', 'norm': 0.1884765625}\n", + "{'loss': '2.365731954574585', 'num_iter': 301568, 'lr': 0.001, 'time': '9.060192108154297 Seconds', 'norm': 0.17578125}\n", + "{'loss': '2.3108112812042236', 'num_iter': 302080, 'lr': 0.001, 'time': '10.135092496871948 Seconds', 'norm': 0.203125}\n", + "{'loss': '2.334876775741577', 'num_iter': 302592, 'lr': 0.001, 'time': '9.33305835723877 Seconds', 'norm': 0.203125}\n", + "{'loss': '2.4437294006347656', 'num_iter': 303104, 'lr': 0.001, 'time': '10.712315559387207 Seconds', 'norm': 0.2373046875}\n", + "{'loss': '2.3769195079803467', 'num_iter': 303616, 'lr': 0.001, 'time': '8.630734920501709 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.4069664478302', 'num_iter': 304128, 'lr': 0.001, 'time': '8.255667448043823 Seconds', 'norm': 0.21484375}\n", + "{'loss': '2.3335657119750977', 'num_iter': 304640, 'lr': 0.001, 'time': '8.58766508102417 Seconds', 'norm': 0.2158203125}\n", + "{'loss': '2.3624532222747803', 'num_iter': 305152, 'lr': 0.001, 'time': '8.545783996582031 Seconds', 'norm': 0.1689453125}\n", + "{'loss': '2.350822687149048', 'num_iter': 305664, 'lr': 0.001, 'time': '8.43161940574646 Seconds', 'norm': 0.2041015625}\n", + "{'loss': '2.3336572647094727', 'num_iter': 306176, 'lr': 0.001, 'time': '8.480591058731079 Seconds', 'norm': 0.2041015625}\n", + "{'loss': '2.4290881156921387', 'num_iter': 306688, 'lr': 0.001, 'time': '8.352237939834595 Seconds', 'norm': 0.1904296875}\n", + "{'loss': '2.4103426933288574', 'num_iter': 307200, 'lr': 0.001, 'time': '8.347373962402344 Seconds', 'norm': 0.1875}\n", + "{'loss': '2.32393479347229', 'num_iter': 307712, 'lr': 0.001, 'time': '8.849784135818481 Seconds', 'norm': 0.189453125}\n", + "{'loss': '2.336074113845825', 'num_iter': 308224, 'lr': 0.001, 'time': '8.41114330291748 Seconds', 'norm': 0.1640625}\n", + "{'loss': '2.413184881210327', 'num_iter': 308736, 'lr': 0.001, 'time': '8.145277261734009 Seconds', 'norm': 0.2138671875}\n", + "{'loss': '2.3413593769073486', 'num_iter': 309248, 'lr': 0.001, 'time': '8.39972186088562 Seconds', 'norm': 0.21875}\n", + "{'loss': '2.3780415058135986', 'num_iter': 309760, 'lr': 0.001, 'time': '8.95822787284851 Seconds', 'norm': 0.1982421875}\n", + "{'loss': '2.4237754344940186', 'num_iter': 310272, 'lr': 0.001, 'time': '8.662708759307861 Seconds', 'norm': 0.1943359375}\n", + "{'loss': '2.3252556324005127', 'num_iter': 310784, 'lr': 0.001, 'time': '9.39649486541748 Seconds', 'norm': 0.205078125}\n", + "{'loss': '2.4138176441192627', 'num_iter': 311296, 'lr': 0.001, 'time': '8.096821784973145 Seconds', 'norm': 0.21875}\n", + "{'loss': '2.3686509132385254', 'num_iter': 311808, 'lr': 0.001, 'time': '8.604280710220337 Seconds', 'norm': 0.2041015625}\n", + "{'loss': '2.4012837409973145', 'num_iter': 312320, 'lr': 0.001, 'time': '8.224432229995728 Seconds', 'norm': 0.201171875}\n", + "{'loss': '2.3629026412963867', 'num_iter': 312832, 'lr': 0.001, 'time': '8.052284240722656 Seconds', 'norm': 0.2275390625}\n", + "{'loss': '2.3418285846710205', 'num_iter': 313344, 'lr': 0.001, 'time': '8.197106838226318 Seconds', 'norm': 0.2080078125}\n", + "{'loss': '2.3784403800964355', 'num_iter': 313856, 'lr': 0.001, 'time': '8.41416335105896 Seconds', 'norm': 0.205078125}\n", + "{'loss': '2.3789894580841064', 'num_iter': 314368, 'lr': 0.001, 'time': '8.561839580535889 Seconds', 'norm': 0.232421875}\n", + "{'loss': '2.3847882747650146', 'num_iter': 314880, 'lr': 0.001, 'time': '8.089531898498535 Seconds', 'norm': 0.265625}\n", + "{'loss': '2.3306243419647217', 'num_iter': 315392, 'lr': 0.001, 'time': '8.703095197677612 Seconds', 'norm': 0.2158203125}\n", + "{'loss': '2.386942148208618', 'num_iter': 315904, 'lr': 0.001, 'time': '8.010140419006348 Seconds', 'norm': 0.2099609375}\n", + "{'loss': '2.362185478210449', 'num_iter': 316416, 'lr': 0.001, 'time': '8.389809608459473 Seconds', 'norm': 0.189453125}\n", + "{'loss': '2.3671252727508545', 'num_iter': 316928, 'lr': 0.001, 'time': '7.876409530639648 Seconds', 'norm': 0.2099609375}\n", + "{'loss': '2.3849918842315674', 'num_iter': 317440, 'lr': 0.001, 'time': '8.245641231536865 Seconds', 'norm': 0.2255859375}\n", + "{'loss': '2.3321967124938965', 'num_iter': 317952, 'lr': 0.001, 'time': '9.230170726776123 Seconds', 'norm': 0.1787109375}\n", + "{'loss': '2.432971477508545', 'num_iter': 318464, 'lr': 0.001, 'time': '8.417069673538208 Seconds', 'norm': 0.205078125}\n", + "{'loss': '2.374561071395874', 'num_iter': 318976, 'lr': 0.001, 'time': '9.239708423614502 Seconds', 'norm': 0.212890625}\n", + "{'loss': '2.3547489643096924', 'num_iter': 319488, 'lr': 0.001, 'time': '9.043018817901611 Seconds', 'norm': 0.1767578125}\n", + "{'loss': '2.3750970363616943', 'num_iter': 320000, 'lr': 0.001, 'time': '8.23776388168335 Seconds', 'norm': 0.21875}\n", + "{'loss': '2.412919282913208', 'num_iter': 320512, 'lr': 0.001, 'time': '8.504985094070435 Seconds', 'norm': 0.2021484375}\n", + "{'loss': '2.462623119354248', 'num_iter': 321024, 'lr': 0.001, 'time': '8.550896883010864 Seconds', 'norm': 0.2734375}\n", + "{'loss': '2.3641278743743896', 'num_iter': 321536, 'lr': 0.001, 'time': '9.006207466125488 Seconds', 'norm': 0.349609375}\n", + "{'loss': '2.3690357208251953', 'num_iter': 322048, 'lr': 0.001, 'time': '8.179299592971802 Seconds', 'norm': 0.3359375}\n", + "{'loss': '2.396116018295288', 'num_iter': 322560, 'lr': 0.001, 'time': '8.18799352645874 Seconds', 'norm': 0.296875}\n", + "{'loss': '2.3939080238342285', 'num_iter': 323072, 'lr': 0.001, 'time': '8.38433575630188 Seconds', 'norm': 0.263671875}\n", + "{'loss': '2.39143705368042', 'num_iter': 323584, 'lr': 0.001, 'time': '8.179182767868042 Seconds', 'norm': 0.2197265625}\n", + "{'loss': '2.384146213531494', 'num_iter': 324096, 'lr': 0.001, 'time': '8.347514629364014 Seconds', 'norm': 0.26171875}\n", + "{'loss': '2.370356559753418', 'num_iter': 324608, 'lr': 0.001, 'time': '8.617391109466553 Seconds', 'norm': 0.19140625}\n", + "{'loss': '2.3611085414886475', 'num_iter': 325120, 'lr': 0.001, 'time': '8.676847457885742 Seconds', 'norm': 0.2333984375}\n", + "{'loss': '2.3861262798309326', 'num_iter': 325632, 'lr': 0.001, 'time': '8.24257206916809 Seconds', 'norm': 0.2392578125}\n", + "{'loss': '2.390904426574707', 'num_iter': 326144, 'lr': 0.001, 'time': '8.482982158660889 Seconds', 'norm': 0.24609375}\n", + "{'loss': '2.3834309577941895', 'num_iter': 326656, 'lr': 0.001, 'time': '8.589087009429932 Seconds', 'norm': 0.2392578125}\n", + "{'loss': '2.4065608978271484', 'num_iter': 327168, 'lr': 0.001, 'time': '9.224836826324463 Seconds', 'norm': 0.1953125}\n", + "{'loss': '2.384727954864502', 'num_iter': 327680, 'lr': 0.001, 'time': '8.898816347122192 Seconds', 'norm': 0.2490234375}\n", + "{'loss': '2.3574984073638916', 'num_iter': 328192, 'lr': 0.001, 'time': '14.532771110534668 Seconds', 'norm': 0.1806640625}\n", + "{'loss': '2.3504486083984375', 'num_iter': 328704, 'lr': 0.001, 'time': '8.599496841430664 Seconds', 'norm': 0.212890625}\n", + "{'loss': '2.317302942276001', 'num_iter': 329216, 'lr': 0.001, 'time': '8.97040605545044 Seconds', 'norm': 0.1748046875}\n", + "{'loss': '2.3568222522735596', 'num_iter': 329728, 'lr': 0.001, 'time': '8.283230066299438 Seconds', 'norm': 0.1953125}\n", + "{'loss': '2.3342983722686768', 'num_iter': 330240, 'lr': 0.001, 'time': '8.545572280883789 Seconds', 'norm': 0.1962890625}\n", + "{'loss': '2.4216320514678955', 'num_iter': 330752, 'lr': 0.001, 'time': '8.191247940063477 Seconds', 'norm': 0.1796875}\n", + "{'loss': '2.3296892642974854', 'num_iter': 331264, 'lr': 0.001, 'time': '8.777395963668823 Seconds', 'norm': 0.193359375}\n", + "{'loss': '2.3747518062591553', 'num_iter': 331776, 'lr': 0.001, 'time': '8.510385751724243 Seconds', 'norm': 0.1943359375}\n", + "{'loss': '2.377216100692749', 'num_iter': 332288, 'lr': 0.001, 'time': '8.405907154083252 Seconds', 'norm': 0.185546875}\n", + "{'loss': '2.3888943195343018', 'num_iter': 332800, 'lr': 0.001, 'time': '8.263776063919067 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.3225455284118652', 'num_iter': 333312, 'lr': 0.001, 'time': '8.553609132766724 Seconds', 'norm': 0.1923828125}\n", + "{'loss': '2.4023454189300537', 'num_iter': 333824, 'lr': 0.001, 'time': '8.297262907028198 Seconds', 'norm': 0.181640625}\n", + "{'loss': '2.366560697555542', 'num_iter': 334336, 'lr': 0.001, 'time': '8.221718788146973 Seconds', 'norm': 0.1806640625}\n", + "{'loss': '2.3812015056610107', 'num_iter': 334848, 'lr': 0.001, 'time': '8.31541633605957 Seconds', 'norm': 0.203125}\n", + "{'loss': '2.3630995750427246', 'num_iter': 335360, 'lr': 0.001, 'time': '9.027503252029419 Seconds', 'norm': 0.17578125}\n", + "{'loss': '2.3661887645721436', 'num_iter': 335872, 'lr': 0.001, 'time': '9.473745822906494 Seconds', 'norm': 0.2021484375}\n", + "{'loss': '2.391641616821289', 'num_iter': 336384, 'lr': 0.001, 'time': '9.089564800262451 Seconds', 'norm': 0.1630859375}\n", + "{'loss': '2.38344407081604', 'num_iter': 336896, 'lr': 0.001, 'time': '9.183728694915771 Seconds', 'norm': 0.1796875}\n", + "{'loss': '2.2898924350738525', 'num_iter': 337408, 'lr': 0.001, 'time': '8.855298042297363 Seconds', 'norm': 0.2021484375}\n", + "{'loss': '2.303185224533081', 'num_iter': 337920, 'lr': 0.001, 'time': '9.285278797149658 Seconds', 'norm': 0.171875}\n", + "{'loss': '2.3794479370117188', 'num_iter': 338432, 'lr': 0.001, 'time': '8.335030555725098 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.3647491931915283', 'num_iter': 338944, 'lr': 0.001, 'time': '8.7308349609375 Seconds', 'norm': 0.205078125}\n", + "{'loss': '2.3814191818237305', 'num_iter': 339456, 'lr': 0.001, 'time': '8.244513511657715 Seconds', 'norm': 0.2470703125}\n", + "{'loss': '2.3537018299102783', 'num_iter': 339968, 'lr': 0.001, 'time': '8.329665660858154 Seconds', 'norm': 0.2353515625}\n", + "{'loss': '2.436612129211426', 'num_iter': 340480, 'lr': 0.001, 'time': '7.838789701461792 Seconds', 'norm': 0.1943359375}\n", + "{'loss': '2.406677484512329', 'num_iter': 340992, 'lr': 0.001, 'time': '8.378950834274292 Seconds', 'norm': 0.1953125}\n", + "{'loss': '2.3736791610717773', 'num_iter': 341504, 'lr': 0.001, 'time': '8.615100860595703 Seconds', 'norm': 0.2119140625}\n", + "{'loss': '2.351391553878784', 'num_iter': 342016, 'lr': 0.001, 'time': '8.552705526351929 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.3695170879364014', 'num_iter': 342528, 'lr': 0.001, 'time': '8.173922777175903 Seconds', 'norm': 0.1787109375}\n", + "{'loss': '2.332104444503784', 'num_iter': 343040, 'lr': 0.001, 'time': '8.60319471359253 Seconds', 'norm': 0.2041015625}\n", + "{'loss': '2.38838267326355', 'num_iter': 343552, 'lr': 0.001, 'time': '8.821454048156738 Seconds', 'norm': 0.240234375}\n", + "{'loss': '2.373359441757202', 'num_iter': 344064, 'lr': 0.001, 'time': '8.222538471221924 Seconds', 'norm': 0.205078125}\n", + "{'loss': '2.4171457290649414', 'num_iter': 344576, 'lr': 0.001, 'time': '8.07952618598938 Seconds', 'norm': 0.2080078125}\n", + "{'loss': '2.3699748516082764', 'num_iter': 345088, 'lr': 0.001, 'time': '8.936387300491333 Seconds', 'norm': 0.296875}\n", + "{'loss': '2.358299732208252', 'num_iter': 345600, 'lr': 0.001, 'time': '8.713745355606079 Seconds', 'norm': 0.185546875}\n", + "{'loss': '2.419468402862549', 'num_iter': 346112, 'lr': 0.001, 'time': '8.473394393920898 Seconds', 'norm': 0.265625}\n", + "{'loss': '2.3081276416778564', 'num_iter': 346624, 'lr': 0.001, 'time': '8.564984560012817 Seconds', 'norm': 0.2373046875}\n", + "{'loss': '2.4399352073669434', 'num_iter': 347136, 'lr': 0.001, 'time': '8.338621139526367 Seconds', 'norm': 0.228515625}\n", + "{'loss': '2.3844845294952393', 'num_iter': 347648, 'lr': 0.001, 'time': '8.925156831741333 Seconds', 'norm': 0.255859375}\n", + "{'loss': '2.337373733520508', 'num_iter': 348160, 'lr': 0.001, 'time': '11.065615892410278 Seconds', 'norm': 0.279296875}\n", + "{'loss': '2.384406566619873', 'num_iter': 348672, 'lr': 0.001, 'time': '8.35679292678833 Seconds', 'norm': 0.2236328125}\n", + "{'loss': '2.3246729373931885', 'num_iter': 349184, 'lr': 0.001, 'time': '8.617259979248047 Seconds', 'norm': 0.2578125}\n", + "{'loss': '2.4116621017456055', 'num_iter': 349696, 'lr': 0.001, 'time': '8.234044075012207 Seconds', 'norm': 0.232421875}\n", + "{'loss': '2.43892240524292', 'num_iter': 350208, 'lr': 0.001, 'time': '8.18798279762268 Seconds', 'norm': 0.26953125}\n", + "{'loss': '2.382889747619629', 'num_iter': 350720, 'lr': 0.001, 'time': '8.998388051986694 Seconds', 'norm': 0.23828125}\n", + "{'loss': '2.3797965049743652', 'num_iter': 351232, 'lr': 0.001, 'time': '8.581696510314941 Seconds', 'norm': 0.1884765625}\n", + "{'loss': '2.3697423934936523', 'num_iter': 351744, 'lr': 0.001, 'time': '8.287113666534424 Seconds', 'norm': 0.2275390625}\n", + "{'loss': '2.351867914199829', 'num_iter': 352256, 'lr': 0.001, 'time': '8.495772361755371 Seconds', 'norm': 0.2421875}\n", + "{'loss': '2.366380453109741', 'num_iter': 352768, 'lr': 0.001, 'time': '8.623859405517578 Seconds', 'norm': 0.2373046875}\n", + "{'loss': '2.37353777885437', 'num_iter': 353280, 'lr': 0.001, 'time': '8.35682463645935 Seconds', 'norm': 0.25}\n", + "{'loss': '2.3585216999053955', 'num_iter': 353792, 'lr': 0.001, 'time': '8.519386768341064 Seconds', 'norm': 0.25}\n", + "{'loss': '2.4199321269989014', 'num_iter': 354304, 'lr': 0.001, 'time': '8.774182319641113 Seconds', 'norm': 0.251953125}\n", + "{'loss': '2.4429666996002197', 'num_iter': 354816, 'lr': 0.001, 'time': '8.21086597442627 Seconds', 'norm': 0.2421875}\n", + "{'loss': '2.3902385234832764', 'num_iter': 355328, 'lr': 0.001, 'time': '8.021050691604614 Seconds', 'norm': 0.2265625}\n", + "{'loss': '2.3950181007385254', 'num_iter': 355840, 'lr': 0.001, 'time': '8.270747423171997 Seconds', 'norm': 0.263671875}\n", + "{'loss': '2.3550150394439697', 'num_iter': 356352, 'lr': 0.001, 'time': '8.47206735610962 Seconds', 'norm': 0.2216796875}\n", + "{'loss': '2.3788392543792725', 'num_iter': 356864, 'lr': 0.001, 'time': '8.259821891784668 Seconds', 'norm': 0.193359375}\n", + "{'loss': '2.3775596618652344', 'num_iter': 357376, 'lr': 0.001, 'time': '8.305903196334839 Seconds', 'norm': 0.193359375}\n", + "{'loss': '2.369220018386841', 'num_iter': 357888, 'lr': 0.001, 'time': '8.522042751312256 Seconds', 'norm': 0.1796875}\n", + "{'loss': '2.4088871479034424', 'num_iter': 358400, 'lr': 0.001, 'time': '8.272263526916504 Seconds', 'norm': 0.2060546875}\n", + "{'loss': '2.3787238597869873', 'num_iter': 358912, 'lr': 0.001, 'time': '8.617843866348267 Seconds', 'norm': 0.208984375}\n", + "{'loss': '2.3562023639678955', 'num_iter': 359424, 'lr': 0.001, 'time': '8.11997389793396 Seconds', 'norm': 0.1953125}\n", + "{'loss': '2.432711601257324', 'num_iter': 359936, 'lr': 0.001, 'time': '7.785167694091797 Seconds', 'norm': 0.181640625}\n", + "{'loss': '2.375709056854248', 'num_iter': 360448, 'lr': 0.001, 'time': '8.270187377929688 Seconds', 'norm': 0.220703125}\n", + "{'loss': '2.3795032501220703', 'num_iter': 360960, 'lr': 0.001, 'time': '13.668138265609741 Seconds', 'norm': 0.2275390625}\n", + "{'loss': '2.3727798461914062', 'num_iter': 361472, 'lr': 0.001, 'time': '8.04228138923645 Seconds', 'norm': 0.203125}\n", + "{'loss': '2.3385231494903564', 'num_iter': 361984, 'lr': 0.001, 'time': '8.922245025634766 Seconds', 'norm': 0.201171875}\n", + "{'loss': '2.342078924179077', 'num_iter': 362496, 'lr': 0.001, 'time': '8.031728982925415 Seconds', 'norm': 0.1953125}\n", + "{'loss': '2.4810705184936523', 'num_iter': 363008, 'lr': 0.001, 'time': '8.214014768600464 Seconds', 'norm': 0.240234375}\n", + "{'loss': '2.382781505584717', 'num_iter': 363520, 'lr': 0.001, 'time': '8.55629014968872 Seconds', 'norm': 0.1953125}\n", + "{'loss': '2.3820607662200928', 'num_iter': 364032, 'lr': 0.001, 'time': '9.42013168334961 Seconds', 'norm': 0.2255859375}\n", + "{'loss': '2.3718576431274414', 'num_iter': 364544, 'lr': 0.001, 'time': '9.06444239616394 Seconds', 'norm': 0.193359375}\n", + "{'loss': '2.44164776802063', 'num_iter': 365056, 'lr': 0.001, 'time': '8.535747766494751 Seconds', 'norm': 0.240234375}\n", + "{'loss': '2.3684909343719482', 'num_iter': 365568, 'lr': 0.001, 'time': '8.982326984405518 Seconds', 'norm': 0.2158203125}\n", + "{'loss': '2.429417133331299', 'num_iter': 366080, 'lr': 0.001, 'time': '8.441758871078491 Seconds', 'norm': 0.2275390625}\n", + "{'loss': '2.3917396068573', 'num_iter': 366592, 'lr': 0.001, 'time': '7.958219766616821 Seconds', 'norm': 0.1962890625}\n", + "{'loss': '2.379910469055176', 'num_iter': 367104, 'lr': 0.001, 'time': '8.3921217918396 Seconds', 'norm': 0.1923828125}\n", + "{'loss': '2.3722870349884033', 'num_iter': 367616, 'lr': 0.001, 'time': '8.030806541442871 Seconds', 'norm': 0.189453125}\n", + "{'loss': '2.4068033695220947', 'num_iter': 368128, 'lr': 0.001, 'time': '7.7724456787109375 Seconds', 'norm': 0.2109375}\n", + "{'loss': '2.3436288833618164', 'num_iter': 368640, 'lr': 0.001, 'time': '8.435116291046143 Seconds', 'norm': 0.2275390625}\n", + "{'loss': '2.355989933013916', 'num_iter': 369152, 'lr': 0.001, 'time': '8.557190179824829 Seconds', 'norm': 0.22265625}\n", + "{'loss': '2.351763963699341', 'num_iter': 369664, 'lr': 0.001, 'time': '8.677687883377075 Seconds', 'norm': 0.251953125}\n", + "{'loss': '2.3739657402038574', 'num_iter': 370176, 'lr': 0.001, 'time': '8.869253396987915 Seconds', 'norm': 0.240234375}\n", + "{'loss': '2.4143691062927246', 'num_iter': 370688, 'lr': 0.001, 'time': '8.308026313781738 Seconds', 'norm': 0.212890625}\n", + "{'loss': '2.371335029602051', 'num_iter': 371200, 'lr': 0.001, 'time': '8.56001615524292 Seconds', 'norm': 0.20703125}\n", + "{'loss': '2.3410964012145996', 'num_iter': 371712, 'lr': 0.001, 'time': '8.421749114990234 Seconds', 'norm': 0.2001953125}\n", + "{'loss': '2.38055419921875', 'num_iter': 372224, 'lr': 0.001, 'time': '8.846316814422607 Seconds', 'norm': 0.220703125}\n", + "{'loss': '2.3950140476226807', 'num_iter': 372736, 'lr': 0.001, 'time': '8.569184064865112 Seconds', 'norm': 0.185546875}\n", + "{'loss': '2.3742284774780273', 'num_iter': 373248, 'lr': 0.001, 'time': '8.637691736221313 Seconds', 'norm': 0.2080078125}\n", + "{'loss': '2.4101107120513916', 'num_iter': 373760, 'lr': 0.001, 'time': '8.273067235946655 Seconds', 'norm': 0.20703125}\n", + "{'loss': '2.3644638061523438', 'num_iter': 374272, 'lr': 0.001, 'time': '8.434312105178833 Seconds', 'norm': 0.193359375}\n", + "{'loss': '2.360276937484741', 'num_iter': 374784, 'lr': 0.001, 'time': '8.523102521896362 Seconds', 'norm': 0.205078125}\n", + "{'loss': '2.4336140155792236', 'num_iter': 375296, 'lr': 0.001, 'time': '7.960329055786133 Seconds', 'norm': 0.1904296875}\n", + "{'loss': '2.3407957553863525', 'num_iter': 375808, 'lr': 0.001, 'time': '8.771158933639526 Seconds', 'norm': 0.2060546875}\n", + "{'loss': '2.3324272632598877', 'num_iter': 376320, 'lr': 0.001, 'time': '8.51830792427063 Seconds', 'norm': 0.208984375}\n", + "{'loss': '2.4212708473205566', 'num_iter': 376832, 'lr': 0.001, 'time': '8.271643877029419 Seconds', 'norm': 0.2099609375}\n", + "{'loss': '2.335904598236084', 'num_iter': 377344, 'lr': 0.001, 'time': '8.55228042602539 Seconds', 'norm': 0.1962890625}\n", + "{'loss': '2.377575159072876', 'num_iter': 377856, 'lr': 0.001, 'time': '8.381027460098267 Seconds', 'norm': 0.1923828125}\n", + "{'loss': '2.4403796195983887', 'num_iter': 378368, 'lr': 0.001, 'time': '8.233609199523926 Seconds', 'norm': 0.1953125}\n", + "{'loss': '2.4046976566314697', 'num_iter': 378880, 'lr': 0.001, 'time': '7.982059955596924 Seconds', 'norm': 0.2099609375}\n", + "{'loss': '2.3839974403381348', 'num_iter': 379392, 'lr': 0.001, 'time': '8.369484901428223 Seconds', 'norm': 0.2001953125}\n", + "{'loss': '2.406503915786743', 'num_iter': 379904, 'lr': 0.001, 'time': '8.074047327041626 Seconds', 'norm': 0.201171875}\n", + "{'loss': '2.3363699913024902', 'num_iter': 380416, 'lr': 0.001, 'time': '8.557210683822632 Seconds', 'norm': 0.2216796875}\n", + "{'loss': '2.386509895324707', 'num_iter': 380928, 'lr': 0.001, 'time': '8.391390562057495 Seconds', 'norm': 0.228515625}\n", + "{'loss': '2.36122465133667', 'num_iter': 381440, 'lr': 0.001, 'time': '8.752220869064331 Seconds', 'norm': 0.2373046875}\n", + "{'loss': '2.3688385486602783', 'num_iter': 381952, 'lr': 0.001, 'time': '8.670387268066406 Seconds', 'norm': 0.1982421875}\n", + "{'loss': '2.421541690826416', 'num_iter': 382464, 'lr': 0.001, 'time': '8.160013675689697 Seconds', 'norm': 0.20703125}\n", + "{'loss': '2.389247179031372', 'num_iter': 382976, 'lr': 0.001, 'time': '8.189028263092041 Seconds', 'norm': 0.205078125}\n", + "{'loss': '2.4452877044677734', 'num_iter': 383488, 'lr': 0.001, 'time': '7.765945196151733 Seconds', 'norm': 0.216796875}\n", + "{'loss': '2.3463711738586426', 'num_iter': 384000, 'lr': 0.001, 'time': '8.840980768203735 Seconds', 'norm': 0.2392578125}\n", + "{'loss': '2.357616662979126', 'num_iter': 384512, 'lr': 0.001, 'time': '8.521970748901367 Seconds', 'norm': 0.30078125}\n", + "{'loss': '2.334329128265381', 'num_iter': 385024, 'lr': 0.001, 'time': '8.811458110809326 Seconds', 'norm': 0.1943359375}\n", + "{'loss': '2.3956098556518555', 'num_iter': 385536, 'lr': 0.001, 'time': '8.636063814163208 Seconds', 'norm': 0.2060546875}\n", + "{'loss': '2.3428094387054443', 'num_iter': 386048, 'lr': 0.001, 'time': '8.456270217895508 Seconds', 'norm': 0.25390625}\n", + "{'loss': '2.318286657333374', 'num_iter': 386560, 'lr': 0.001, 'time': '8.511307716369629 Seconds', 'norm': 0.1875}\n", + "{'loss': '2.384647846221924', 'num_iter': 387072, 'lr': 0.001, 'time': '8.350618124008179 Seconds', 'norm': 0.236328125}\n", + "{'loss': '2.399871349334717', 'num_iter': 387584, 'lr': 0.001, 'time': '8.15837836265564 Seconds', 'norm': 0.23828125}\n", + "{'loss': '2.339489221572876', 'num_iter': 388096, 'lr': 0.001, 'time': '8.442745208740234 Seconds', 'norm': 0.201171875}\n", + "{'loss': '2.3802051544189453', 'num_iter': 388608, 'lr': 0.001, 'time': '7.9479265213012695 Seconds', 'norm': 0.216796875}\n", + "{'loss': '2.3842201232910156', 'num_iter': 389120, 'lr': 0.001, 'time': '8.213179349899292 Seconds', 'norm': 0.248046875}\n", + "{'loss': '2.4042162895202637', 'num_iter': 389632, 'lr': 0.001, 'time': '7.915804624557495 Seconds', 'norm': 0.251953125}\n", + "{'loss': '2.43757963180542', 'num_iter': 390144, 'lr': 0.001, 'time': '8.4265878200531 Seconds', 'norm': 0.22265625}\n", + "{'loss': '2.3156750202178955', 'num_iter': 390656, 'lr': 0.001, 'time': '10.18436861038208 Seconds', 'norm': 0.2109375}\n", + "{'loss': '2.3905978202819824', 'num_iter': 391168, 'lr': 0.001, 'time': '9.37023377418518 Seconds', 'norm': 0.1943359375}\n", + "{'loss': '2.3778533935546875', 'num_iter': 391680, 'lr': 0.001, 'time': '8.913928508758545 Seconds', 'norm': 0.2265625}\n", + "{'loss': '2.3408162593841553', 'num_iter': 392192, 'lr': 0.001, 'time': '8.736698627471924 Seconds', 'norm': 0.181640625}\n", + "{'loss': '2.3683762550354004', 'num_iter': 392704, 'lr': 0.001, 'time': '8.647998094558716 Seconds', 'norm': 0.1904296875}\n", + "{'loss': '2.4251418113708496', 'num_iter': 393216, 'lr': 0.001, 'time': '10.712963581085205 Seconds', 'norm': 0.1796875}\n", + "{'loss': '2.330533504486084', 'num_iter': 393728, 'lr': 0.001, 'time': '14.784240245819092 Seconds', 'norm': 0.1865234375}\n", + "{'loss': '2.363538980484009', 'num_iter': 394240, 'lr': 0.001, 'time': '8.426096677780151 Seconds', 'norm': 0.177734375}\n", + "{'loss': '2.392524242401123', 'num_iter': 394752, 'lr': 0.001, 'time': '8.191072702407837 Seconds', 'norm': 0.23046875}\n", + "{'loss': '2.350720167160034', 'num_iter': 395264, 'lr': 0.001, 'time': '8.569383382797241 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.3651092052459717', 'num_iter': 395776, 'lr': 0.001, 'time': '8.714524030685425 Seconds', 'norm': 0.2216796875}\n", + "{'loss': '2.3899641036987305', 'num_iter': 396288, 'lr': 0.001, 'time': '8.801914691925049 Seconds', 'norm': 0.173828125}\n", + "{'loss': '2.3921427726745605', 'num_iter': 396800, 'lr': 0.001, 'time': '8.537614583969116 Seconds', 'norm': 0.1748046875}\n", + "{'loss': '2.3790297508239746', 'num_iter': 397312, 'lr': 0.001, 'time': '8.342019319534302 Seconds', 'norm': 0.1689453125}\n", + "{'loss': '2.371678113937378', 'num_iter': 397824, 'lr': 0.001, 'time': '8.455769777297974 Seconds', 'norm': 0.1923828125}\n", + "{'loss': '2.4778335094451904', 'num_iter': 398336, 'lr': 0.001, 'time': '8.45968246459961 Seconds', 'norm': 0.1748046875}\n", + "{'loss': '2.388458251953125', 'num_iter': 398848, 'lr': 0.001, 'time': '9.321236371994019 Seconds', 'norm': 0.2197265625}\n", + "{'loss': '2.401665210723877', 'num_iter': 399360, 'lr': 0.001, 'time': '9.645813226699829 Seconds', 'norm': 0.203125}\n", + "{'loss': '2.3733885288238525', 'num_iter': 399872, 'lr': 0.001, 'time': '8.501755714416504 Seconds', 'norm': 0.1845703125}\n", + "{'loss': '2.3640685081481934', 'num_iter': 400384, 'lr': 0.001, 'time': '8.565459489822388 Seconds', 'norm': 0.2119140625}\n", + "{'loss': '2.354379415512085', 'num_iter': 400896, 'lr': 0.001, 'time': '8.731095790863037 Seconds', 'norm': 0.2353515625}\n", + "{'loss': '2.372793197631836', 'num_iter': 401408, 'lr': 0.001, 'time': '8.688451051712036 Seconds', 'norm': 0.19140625}\n", + "{'loss': '2.4384188652038574', 'num_iter': 401920, 'lr': 0.001, 'time': '8.044995546340942 Seconds', 'norm': 0.185546875}\n", + "{'loss': '2.375757932662964', 'num_iter': 402432, 'lr': 0.001, 'time': '8.037137031555176 Seconds', 'norm': 0.208984375}\n", + "{'loss': '2.4391725063323975', 'num_iter': 402944, 'lr': 0.001, 'time': '8.47510814666748 Seconds', 'norm': 0.2333984375}\n", + "{'loss': '2.2843031883239746', 'num_iter': 403456, 'lr': 0.001, 'time': '8.804652690887451 Seconds', 'norm': 0.193359375}\n", + "{'loss': '2.390533924102783', 'num_iter': 403968, 'lr': 0.001, 'time': '8.942518472671509 Seconds', 'norm': 0.20703125}\n", + "{'loss': '2.4129254817962646', 'num_iter': 404480, 'lr': 0.001, 'time': '8.33024787902832 Seconds', 'norm': 0.251953125}\n", + "{'loss': '2.376084327697754', 'num_iter': 404992, 'lr': 0.001, 'time': '8.258531332015991 Seconds', 'norm': 0.2578125}\n", + "{'loss': '2.392695426940918', 'num_iter': 405504, 'lr': 0.001, 'time': '8.466645240783691 Seconds', 'norm': 0.24609375}\n", + "{'loss': '2.3728458881378174', 'num_iter': 406016, 'lr': 0.001, 'time': '8.738937139511108 Seconds', 'norm': 0.2099609375}\n", + "{'loss': '2.4293134212493896', 'num_iter': 406528, 'lr': 0.001, 'time': '8.360280513763428 Seconds', 'norm': 0.2041015625}\n", + "{'loss': '2.3796472549438477', 'num_iter': 407040, 'lr': 0.001, 'time': '8.566019535064697 Seconds', 'norm': 0.2470703125}\n", + "{'loss': '2.3470354080200195', 'num_iter': 407552, 'lr': 0.001, 'time': '8.829320192337036 Seconds', 'norm': 0.3125}\n", + "{'loss': '2.2670655250549316', 'num_iter': 408064, 'lr': 0.001, 'time': '9.193796396255493 Seconds', 'norm': 0.2294921875}\n", + "{'loss': '2.329683542251587', 'num_iter': 408576, 'lr': 0.001, 'time': '9.20411205291748 Seconds', 'norm': 0.2392578125}\n", + "{'loss': '2.4627790451049805', 'num_iter': 409088, 'lr': 0.001, 'time': '8.138136863708496 Seconds', 'norm': 0.1865234375}\n", + "{'loss': '2.3983895778656006', 'num_iter': 409600, 'lr': 0.001, 'time': '8.655596494674683 Seconds', 'norm': 0.232421875}\n", + "{'loss': '2.4052984714508057', 'num_iter': 410112, 'lr': 0.001, 'time': '8.46367597579956 Seconds', 'norm': 0.19140625}\n", + "{'loss': '2.4126346111297607', 'num_iter': 410624, 'lr': 0.001, 'time': '8.578564405441284 Seconds', 'norm': 0.19921875}\n", + "{'loss': '2.4048898220062256', 'num_iter': 411136, 'lr': 0.001, 'time': '8.381089448928833 Seconds', 'norm': 0.1982421875}\n", + "{'loss': '2.352526903152466', 'num_iter': 411648, 'lr': 0.001, 'time': '9.173415422439575 Seconds', 'norm': 0.2373046875}\n", + "{'loss': '2.339158058166504', 'num_iter': 412160, 'lr': 0.001, 'time': '8.372507333755493 Seconds', 'norm': 0.201171875}\n", + "{'loss': '2.333714723587036', 'num_iter': 412672, 'lr': 0.001, 'time': '8.694286346435547 Seconds', 'norm': 0.18359375}\n", + "{'loss': '2.4139726161956787', 'num_iter': 413184, 'lr': 0.001, 'time': '7.8678436279296875 Seconds', 'norm': 0.203125}\n", + "{'loss': '2.4106993675231934', 'num_iter': 413696, 'lr': 0.001, 'time': '8.296418190002441 Seconds', 'norm': 0.228515625}\n", + "{'loss': '2.3760986328125', 'num_iter': 414208, 'lr': 0.001, 'time': '8.5747230052948 Seconds', 'norm': 0.2470703125}\n", + "{'loss': '2.3639795780181885', 'num_iter': 414720, 'lr': 0.001, 'time': '8.687668085098267 Seconds', 'norm': 0.224609375}\n", + "{'loss': '2.3750815391540527', 'num_iter': 415232, 'lr': 0.001, 'time': '8.3549964427948 Seconds', 'norm': 0.2451171875}\n", + "{'loss': '2.3527708053588867', 'num_iter': 415744, 'lr': 0.001, 'time': '8.190661191940308 Seconds', 'norm': 0.19921875}\n", + "{'loss': '2.3750665187835693', 'num_iter': 416256, 'lr': 0.001, 'time': '8.488169193267822 Seconds', 'norm': 0.28125}\n", + "{'loss': '2.39205002784729', 'num_iter': 416768, 'lr': 0.001, 'time': '8.937549114227295 Seconds', 'norm': 0.283203125}\n", + "{'loss': '2.354696035385132', 'num_iter': 417280, 'lr': 0.001, 'time': '9.636016130447388 Seconds', 'norm': 0.2021484375}\n", + "{'loss': '2.3929049968719482', 'num_iter': 417792, 'lr': 0.001, 'time': '8.373519659042358 Seconds', 'norm': 0.2275390625}\n", + "{'loss': '2.3752613067626953', 'num_iter': 418304, 'lr': 0.001, 'time': '8.45774793624878 Seconds', 'norm': 0.2236328125}\n", + "{'loss': '2.426490306854248', 'num_iter': 418816, 'lr': 0.001, 'time': '7.966102600097656 Seconds', 'norm': 0.193359375}\n", + "{'loss': '2.3632943630218506', 'num_iter': 419328, 'lr': 0.001, 'time': '8.249633312225342 Seconds', 'norm': 0.1904296875}\n", + "{'loss': '2.404209613800049', 'num_iter': 419840, 'lr': 0.001, 'time': '8.491206884384155 Seconds', 'norm': 0.2119140625}\n", + "{'loss': '2.451598644256592', 'num_iter': 420352, 'lr': 0.001, 'time': '7.860686540603638 Seconds', 'norm': 0.23828125}\n", + "{'loss': '2.3927338123321533', 'num_iter': 420864, 'lr': 0.001, 'time': '8.063232183456421 Seconds', 'norm': 0.2353515625}\n", + "{'loss': '2.437558650970459', 'num_iter': 421376, 'lr': 0.001, 'time': '8.264594316482544 Seconds', 'norm': 0.23046875}\n", + "{'loss': '2.3560588359832764', 'num_iter': 421888, 'lr': 0.001, 'time': '8.32688021659851 Seconds', 'norm': 0.259765625}\n", + "{'loss': '2.4289298057556152', 'num_iter': 422400, 'lr': 0.001, 'time': '8.207964658737183 Seconds', 'norm': 0.26171875}\n", + "{'loss': '2.356796979904175', 'num_iter': 422912, 'lr': 0.001, 'time': '8.145513534545898 Seconds', 'norm': 0.2578125}\n", + "{'loss': '2.4113709926605225', 'num_iter': 423424, 'lr': 0.001, 'time': '8.084244966506958 Seconds', 'norm': 0.2080078125}\n", + "{'loss': '2.3472721576690674', 'num_iter': 423936, 'lr': 0.001, 'time': '8.54978895187378 Seconds', 'norm': 0.2021484375}\n", + "{'loss': '2.323650360107422', 'num_iter': 424448, 'lr': 0.001, 'time': '8.50110149383545 Seconds', 'norm': 0.2138671875}\n", + "{'loss': '2.3212735652923584', 'num_iter': 424960, 'lr': 0.001, 'time': '8.67921757698059 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.313404083251953', 'num_iter': 425472, 'lr': 0.001, 'time': '8.622750997543335 Seconds', 'norm': 0.2138671875}\n", + "{'loss': '2.3760454654693604', 'num_iter': 425984, 'lr': 0.001, 'time': '8.961697816848755 Seconds', 'norm': 0.236328125}\n", + "{'loss': '2.360391855239868', 'num_iter': 426496, 'lr': 0.001, 'time': '14.816408634185791 Seconds', 'norm': 0.2138671875}\n", + "{'loss': '2.368701696395874', 'num_iter': 427008, 'lr': 0.001, 'time': '8.420179843902588 Seconds', 'norm': 0.2060546875}\n", + "{'loss': '2.3470919132232666', 'num_iter': 427520, 'lr': 0.001, 'time': '8.416290521621704 Seconds', 'norm': 0.2236328125}\n", + "{'loss': '2.429783344268799', 'num_iter': 428032, 'lr': 0.001, 'time': '8.16409969329834 Seconds', 'norm': 0.1875}\n", + "{'loss': '2.3814609050750732', 'num_iter': 428544, 'lr': 0.001, 'time': '8.370206594467163 Seconds', 'norm': 0.1748046875}\n", + "{'loss': '2.4116275310516357', 'num_iter': 429056, 'lr': 0.001, 'time': '8.035132646560669 Seconds', 'norm': 0.173828125}\n", + "{'loss': '2.3713903427124023', 'num_iter': 429568, 'lr': 0.001, 'time': '8.298620700836182 Seconds', 'norm': 0.2197265625}\n", + "{'loss': '2.3307623863220215', 'num_iter': 430080, 'lr': 0.001, 'time': '8.47738790512085 Seconds', 'norm': 0.1640625}\n", + "{'loss': '2.4084393978118896', 'num_iter': 430592, 'lr': 0.001, 'time': '8.024546384811401 Seconds', 'norm': 0.16796875}\n", + "{'loss': '2.3230910301208496', 'num_iter': 431104, 'lr': 0.001, 'time': '8.706751585006714 Seconds', 'norm': 0.1884765625}\n", + "{'loss': '2.3433265686035156', 'num_iter': 431616, 'lr': 0.001, 'time': '8.390235900878906 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.4173598289489746', 'num_iter': 432128, 'lr': 0.001, 'time': '7.890843629837036 Seconds', 'norm': 0.173828125}\n", + "{'loss': '2.3870465755462646', 'num_iter': 432640, 'lr': 0.001, 'time': '8.019165992736816 Seconds', 'norm': 0.203125}\n", + "{'loss': '2.3554000854492188', 'num_iter': 433152, 'lr': 0.001, 'time': '8.79294753074646 Seconds', 'norm': 0.1865234375}\n", + "{'loss': '2.382319688796997', 'num_iter': 433664, 'lr': 0.001, 'time': '8.212408065795898 Seconds', 'norm': 0.2021484375}\n", + "{'loss': '2.387840747833252', 'num_iter': 434176, 'lr': 0.001, 'time': '9.062758445739746 Seconds', 'norm': 0.255859375}\n", + "{'loss': '2.359999656677246', 'num_iter': 434688, 'lr': 0.001, 'time': '9.097012281417847 Seconds', 'norm': 0.19921875}\n", + "{'loss': '2.3492836952209473', 'num_iter': 435200, 'lr': 0.001, 'time': '8.802151441574097 Seconds', 'norm': 0.193359375}\n", + "{'loss': '2.335460662841797', 'num_iter': 435712, 'lr': 0.001, 'time': '8.49456000328064 Seconds', 'norm': 0.2109375}\n", + "{'loss': '2.426262140274048', 'num_iter': 436224, 'lr': 0.001, 'time': '7.820612668991089 Seconds', 'norm': 0.2001953125}\n", + "{'loss': '2.371767997741699', 'num_iter': 436736, 'lr': 0.001, 'time': '8.385245084762573 Seconds', 'norm': 0.24609375}\n", + "{'loss': '2.4270973205566406', 'num_iter': 437248, 'lr': 0.001, 'time': '8.645596265792847 Seconds', 'norm': 0.21484375}\n", + "{'loss': '2.4100735187530518', 'num_iter': 437760, 'lr': 0.001, 'time': '8.411604166030884 Seconds', 'norm': 0.228515625}\n", + "{'loss': '2.4299685955047607', 'num_iter': 438272, 'lr': 0.001, 'time': '8.397337913513184 Seconds', 'norm': 0.2001953125}\n", + "{'loss': '2.3664865493774414', 'num_iter': 438784, 'lr': 0.001, 'time': '11.202840328216553 Seconds', 'norm': 0.267578125}\n", + "{'loss': '2.4003891944885254', 'num_iter': 439296, 'lr': 0.001, 'time': '8.450385808944702 Seconds', 'norm': 0.18359375}\n", + "{'loss': '2.4564998149871826', 'num_iter': 439808, 'lr': 0.001, 'time': '8.00091004371643 Seconds', 'norm': 0.2021484375}\n", + "{'loss': '2.362941026687622', 'num_iter': 440320, 'lr': 0.001, 'time': '8.44153904914856 Seconds', 'norm': 0.255859375}\n", + "{'loss': '2.297790050506592', 'num_iter': 440832, 'lr': 0.001, 'time': '9.09417724609375 Seconds', 'norm': 0.2138671875}\n", + "{'loss': '2.4719252586364746', 'num_iter': 441344, 'lr': 0.001, 'time': '7.951310396194458 Seconds', 'norm': 0.205078125}\n", + "{'loss': '2.3687024116516113', 'num_iter': 441856, 'lr': 0.001, 'time': '8.233053207397461 Seconds', 'norm': 0.232421875}\n", + "{'loss': '2.399784564971924', 'num_iter': 442368, 'lr': 0.001, 'time': '7.965995788574219 Seconds', 'norm': 0.216796875}\n", + "{'loss': '2.3668437004089355', 'num_iter': 442880, 'lr': 0.001, 'time': '9.1836576461792 Seconds', 'norm': 0.2734375}\n", + "{'loss': '2.4053683280944824', 'num_iter': 443392, 'lr': 0.001, 'time': '8.996765851974487 Seconds', 'norm': 0.251953125}\n", + "{'loss': '2.337644100189209', 'num_iter': 443904, 'lr': 0.001, 'time': '8.723351001739502 Seconds', 'norm': 0.23046875}\n", + "{'loss': '2.3764424324035645', 'num_iter': 444416, 'lr': 0.001, 'time': '8.493955373764038 Seconds', 'norm': 0.181640625}\n", + "{'loss': '2.357375383377075', 'num_iter': 444928, 'lr': 0.001, 'time': '8.587746620178223 Seconds', 'norm': 0.234375}\n", + "{'loss': '2.3794615268707275', 'num_iter': 445440, 'lr': 0.001, 'time': '8.793220281600952 Seconds', 'norm': 0.234375}\n", + "{'loss': '2.3730883598327637', 'num_iter': 445952, 'lr': 0.001, 'time': '8.136606454849243 Seconds', 'norm': 0.2177734375}\n", + "{'loss': '2.3420424461364746', 'num_iter': 446464, 'lr': 0.001, 'time': '8.530357360839844 Seconds', 'norm': 0.244140625}\n", + "{'loss': '2.4283790588378906', 'num_iter': 446976, 'lr': 0.001, 'time': '8.089701414108276 Seconds', 'norm': 0.2294921875}\n", + "{'loss': '2.364197254180908', 'num_iter': 447488, 'lr': 0.001, 'time': '8.731513261795044 Seconds', 'norm': 0.201171875}\n", + "{'loss': '2.399249792098999', 'num_iter': 448000, 'lr': 0.001, 'time': '8.21769905090332 Seconds', 'norm': 0.1904296875}\n", + "{'loss': '2.4335882663726807', 'num_iter': 448512, 'lr': 0.001, 'time': '8.377237796783447 Seconds', 'norm': 0.201171875}\n", + "{'loss': '2.4006102085113525', 'num_iter': 449024, 'lr': 0.001, 'time': '8.082024097442627 Seconds', 'norm': 0.2041015625}\n", + "{'loss': '2.408008098602295', 'num_iter': 449536, 'lr': 0.001, 'time': '8.571084022521973 Seconds', 'norm': 0.2314453125}\n", + "{'loss': '2.2652509212493896', 'num_iter': 450048, 'lr': 0.001, 'time': '8.913176536560059 Seconds', 'norm': 0.2001953125}\n", + "{'loss': '2.3373382091522217', 'num_iter': 450560, 'lr': 0.001, 'time': '8.795184850692749 Seconds', 'norm': 0.1865234375}\n", + "{'loss': '2.411929130554199', 'num_iter': 451072, 'lr': 0.001, 'time': '8.65637493133545 Seconds', 'norm': 0.1875}\n", + "{'loss': '2.356407642364502', 'num_iter': 451584, 'lr': 0.001, 'time': '8.356616497039795 Seconds', 'norm': 0.1923828125}\n", + "{'loss': '2.387885570526123', 'num_iter': 452096, 'lr': 0.001, 'time': '8.915687799453735 Seconds', 'norm': 0.2236328125}\n", + "{'loss': '2.3866195678710938', 'num_iter': 452608, 'lr': 0.001, 'time': '8.876915693283081 Seconds', 'norm': 0.216796875}\n", + "{'loss': '2.4586849212646484', 'num_iter': 453120, 'lr': 0.001, 'time': '7.948384761810303 Seconds', 'norm': 0.20703125}\n", + "{'loss': '2.4246647357940674', 'num_iter': 453632, 'lr': 0.001, 'time': '7.839402675628662 Seconds', 'norm': 0.1767578125}\n", + "{'loss': '2.329982042312622', 'num_iter': 454144, 'lr': 0.001, 'time': '8.43845820426941 Seconds', 'norm': 0.197265625}\n", + "{'loss': '2.3129801750183105', 'num_iter': 454656, 'lr': 0.001, 'time': '8.744166612625122 Seconds', 'norm': 0.2080078125}\n", + "{'loss': '2.4189136028289795', 'num_iter': 455168, 'lr': 0.001, 'time': '7.886698246002197 Seconds', 'norm': 0.2041015625}\n", + "{'loss': '2.3915693759918213', 'num_iter': 455680, 'lr': 0.001, 'time': '8.073156833648682 Seconds', 'norm': 0.1669921875}\n", + "{'loss': '2.3864991664886475', 'num_iter': 456192, 'lr': 0.001, 'time': '8.216456651687622 Seconds', 'norm': 0.201171875}\n", + "{'loss': '2.376561403274536', 'num_iter': 456704, 'lr': 0.001, 'time': '8.266462326049805 Seconds', 'norm': 0.2041015625}\n", + "{'loss': '2.3458783626556396', 'num_iter': 457216, 'lr': 0.001, 'time': '8.130363464355469 Seconds', 'norm': 0.2216796875}\n", + "{'loss': '2.3506593704223633', 'num_iter': 457728, 'lr': 0.001, 'time': '8.144657135009766 Seconds', 'norm': 0.2158203125}\n", + "{'loss': '2.4056694507598877', 'num_iter': 458240, 'lr': 0.001, 'time': '8.644380331039429 Seconds', 'norm': 0.228515625}\n", + "{'loss': '2.3171944618225098', 'num_iter': 458752, 'lr': 0.001, 'time': '8.691620111465454 Seconds', 'norm': 0.2236328125}\n", + "{'loss': '2.379934310913086', 'num_iter': 459264, 'lr': 0.001, 'time': '16.02401328086853 Seconds', 'norm': 0.1884765625}\n", + "{'loss': '2.368543863296509', 'num_iter': 459776, 'lr': 0.001, 'time': '8.232171535491943 Seconds', 'norm': 0.205078125}\n", + "{'loss': '2.418208122253418', 'num_iter': 460288, 'lr': 0.001, 'time': '8.799515962600708 Seconds', 'norm': 0.212890625}\n", + "{'loss': '2.381540298461914', 'num_iter': 460800, 'lr': 0.001, 'time': '9.329339027404785 Seconds', 'norm': 0.1943359375}\n", + "{'loss': '2.3536040782928467', 'num_iter': 461312, 'lr': 0.001, 'time': '9.533224821090698 Seconds', 'norm': 0.1875}\n", + "{'loss': '2.3677797317504883', 'num_iter': 461824, 'lr': 0.001, 'time': '8.597682237625122 Seconds', 'norm': 0.1953125}\n", + "{'loss': '2.43990159034729', 'num_iter': 462336, 'lr': 0.001, 'time': '8.578311681747437 Seconds', 'norm': 0.185546875}\n", + "{'loss': '2.441256284713745', 'num_iter': 462848, 'lr': 0.001, 'time': '8.23148488998413 Seconds', 'norm': 0.1806640625}\n", + "{'loss': '2.3965487480163574', 'num_iter': 463360, 'lr': 0.001, 'time': '8.193925857543945 Seconds', 'norm': 0.2333984375}\n", + "{'loss': '2.3727118968963623', 'num_iter': 463872, 'lr': 0.001, 'time': '8.5469331741333 Seconds', 'norm': 0.1875}\n", + "{'loss': '2.4100823402404785', 'num_iter': 464384, 'lr': 0.001, 'time': '8.237367630004883 Seconds', 'norm': 0.2021484375}\n", + "{'loss': '2.3725318908691406', 'num_iter': 464896, 'lr': 0.001, 'time': '8.214614152908325 Seconds', 'norm': 0.205078125}\n", + "{'loss': '2.3863420486450195', 'num_iter': 465408, 'lr': 0.001, 'time': '8.207295179367065 Seconds', 'norm': 0.208984375}\n", + "{'loss': '2.409487724304199', 'num_iter': 465920, 'lr': 0.001, 'time': '7.918585300445557 Seconds', 'norm': 0.208984375}\n", + "{'loss': '2.3612053394317627', 'num_iter': 466432, 'lr': 0.001, 'time': '8.13570261001587 Seconds', 'norm': 0.2216796875}\n", + "{'loss': '2.415224552154541', 'num_iter': 466944, 'lr': 0.001, 'time': '8.22749376296997 Seconds', 'norm': 0.2333984375}\n", + "{'loss': '2.3340158462524414', 'num_iter': 467456, 'lr': 0.001, 'time': '8.539586305618286 Seconds', 'norm': 0.203125}\n", + "{'loss': '2.3257997035980225', 'num_iter': 467968, 'lr': 0.001, 'time': '8.659716367721558 Seconds', 'norm': 0.2099609375}\n", + "{'loss': '2.3839058876037598', 'num_iter': 468480, 'lr': 0.001, 'time': '8.563130617141724 Seconds', 'norm': 0.2080078125}\n", + "{'loss': '2.36264967918396', 'num_iter': 468992, 'lr': 0.001, 'time': '8.844634056091309 Seconds', 'norm': 0.244140625}\n", + "{'loss': '2.3602776527404785', 'num_iter': 469504, 'lr': 0.001, 'time': '9.018509864807129 Seconds', 'norm': 0.193359375}\n", + "{'loss': '2.406189203262329', 'num_iter': 470016, 'lr': 0.001, 'time': '8.937938690185547 Seconds', 'norm': 0.22265625}\n", + "{'loss': '2.402311325073242', 'num_iter': 470528, 'lr': 0.001, 'time': '8.953642845153809 Seconds', 'norm': 0.251953125}\n", + "{'loss': '2.3344388008117676', 'num_iter': 471040, 'lr': 0.001, 'time': '9.162652015686035 Seconds', 'norm': 0.1982421875}\n", + "{'loss': '2.352252960205078', 'num_iter': 471552, 'lr': 0.001, 'time': '8.538694620132446 Seconds', 'norm': 0.1953125}\n", + "{'loss': '2.3593435287475586', 'num_iter': 472064, 'lr': 0.001, 'time': '8.479600191116333 Seconds', 'norm': 0.1923828125}\n", + "{'loss': '2.382215738296509', 'num_iter': 472576, 'lr': 0.001, 'time': '8.135757446289062 Seconds', 'norm': 0.1884765625}\n", + "{'loss': '2.3404409885406494', 'num_iter': 473088, 'lr': 0.001, 'time': '8.302355289459229 Seconds', 'norm': 0.1982421875}\n", + "{'loss': '2.390848398208618', 'num_iter': 473600, 'lr': 0.001, 'time': '8.376328468322754 Seconds', 'norm': 0.1962890625}\n", + "{'loss': '2.348055362701416', 'num_iter': 474112, 'lr': 0.001, 'time': '8.287508487701416 Seconds', 'norm': 0.216796875}\n", + "{'loss': '2.354001522064209', 'num_iter': 474624, 'lr': 0.001, 'time': '8.470701217651367 Seconds', 'norm': 0.2138671875}\n", + "{'loss': '2.3449935913085938', 'num_iter': 475136, 'lr': 0.001, 'time': '8.9324951171875 Seconds', 'norm': 0.1806640625}\n", + "{'loss': '2.333287239074707', 'num_iter': 475648, 'lr': 0.001, 'time': '8.595960140228271 Seconds', 'norm': 0.185546875}\n", + "{'loss': '2.3774924278259277', 'num_iter': 476160, 'lr': 0.001, 'time': '8.404988050460815 Seconds', 'norm': 0.1962890625}\n", + "{'loss': '2.3467748165130615', 'num_iter': 476672, 'lr': 0.001, 'time': '8.202607154846191 Seconds', 'norm': 0.2333984375}\n", + "{'loss': '2.3311097621917725', 'num_iter': 477184, 'lr': 0.001, 'time': '8.554301738739014 Seconds', 'norm': 0.240234375}\n", + "{'loss': '2.3121092319488525', 'num_iter': 477696, 'lr': 0.001, 'time': '8.477089881896973 Seconds', 'norm': 0.2890625}\n", + "{'loss': '2.3786628246307373', 'num_iter': 478208, 'lr': 0.001, 'time': '8.489261388778687 Seconds', 'norm': 0.25390625}\n", + "{'loss': '2.337364673614502', 'num_iter': 478720, 'lr': 0.001, 'time': '9.139958143234253 Seconds', 'norm': 0.234375}\n", + "{'loss': '2.4064409732818604', 'num_iter': 479232, 'lr': 0.001, 'time': '9.389977216720581 Seconds', 'norm': 0.2578125}\n", + "{'loss': '2.3612942695617676', 'num_iter': 479744, 'lr': 0.001, 'time': '9.034945011138916 Seconds', 'norm': 0.224609375}\n", + "{'loss': '2.3026418685913086', 'num_iter': 480256, 'lr': 0.001, 'time': '8.54091215133667 Seconds', 'norm': 0.2294921875}\n", + "{'loss': '2.348909616470337', 'num_iter': 480768, 'lr': 0.001, 'time': '8.438228368759155 Seconds', 'norm': 0.1953125}\n", + "{'loss': '2.4111037254333496', 'num_iter': 481280, 'lr': 0.001, 'time': '8.249490022659302 Seconds', 'norm': 0.2021484375}\n", + "{'loss': '2.3276188373565674', 'num_iter': 481792, 'lr': 0.001, 'time': '9.018401145935059 Seconds', 'norm': 0.1865234375}\n", + "{'loss': '2.4113802909851074', 'num_iter': 482304, 'lr': 0.001, 'time': '8.449106693267822 Seconds', 'norm': 0.21484375}\n", + "{'loss': '2.3467493057250977', 'num_iter': 482816, 'lr': 0.001, 'time': '8.386046171188354 Seconds', 'norm': 0.2197265625}\n", + "{'loss': '2.362426996231079', 'num_iter': 483328, 'lr': 0.001, 'time': '11.147133827209473 Seconds', 'norm': 0.16015625}\n", + "{'loss': '2.4662275314331055', 'num_iter': 483840, 'lr': 0.001, 'time': '7.901785373687744 Seconds', 'norm': 0.203125}\n", + "{'loss': '2.4295666217803955', 'num_iter': 484352, 'lr': 0.001, 'time': '8.287553071975708 Seconds', 'norm': 0.1943359375}\n", + "{'loss': '2.3434576988220215', 'num_iter': 484864, 'lr': 0.001, 'time': '8.194469451904297 Seconds', 'norm': 0.1943359375}\n", + "{'loss': '2.341464042663574', 'num_iter': 485376, 'lr': 0.001, 'time': '8.370094060897827 Seconds', 'norm': 0.171875}\n", + "{'loss': '2.3726272583007812', 'num_iter': 485888, 'lr': 0.001, 'time': '8.955700159072876 Seconds', 'norm': 0.1865234375}\n", + "{'loss': '2.3876588344573975', 'num_iter': 486400, 'lr': 0.001, 'time': '8.19834303855896 Seconds', 'norm': 0.2001953125}\n", + "{'loss': '2.4076972007751465', 'num_iter': 486912, 'lr': 0.001, 'time': '8.247403621673584 Seconds', 'norm': 0.2099609375}\n", + "{'loss': '2.4125359058380127', 'num_iter': 487424, 'lr': 0.001, 'time': '8.285757541656494 Seconds', 'norm': 0.19140625}\n", + "{'loss': '2.397003650665283', 'num_iter': 487936, 'lr': 0.001, 'time': '8.908341646194458 Seconds', 'norm': 0.18359375}\n", + "{'loss': '2.377776861190796', 'num_iter': 488448, 'lr': 0.001, 'time': '9.041271924972534 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.392592430114746', 'num_iter': 488960, 'lr': 0.001, 'time': '8.676827430725098 Seconds', 'norm': 0.2158203125}\n", + "{'loss': '2.471567392349243', 'num_iter': 489472, 'lr': 0.001, 'time': '8.05391788482666 Seconds', 'norm': 0.1904296875}\n", + "{'loss': '2.3785459995269775', 'num_iter': 489984, 'lr': 0.001, 'time': '8.382973909378052 Seconds', 'norm': 0.2060546875}\n", + "{'loss': '2.3001551628112793', 'num_iter': 490496, 'lr': 0.001, 'time': '8.051852941513062 Seconds', 'norm': 0.1982421875}\n", + "{'loss': '2.3762669563293457', 'num_iter': 491008, 'lr': 0.001, 'time': '8.449697971343994 Seconds', 'norm': 0.166015625}\n", + "{'loss': '2.363896608352661', 'num_iter': 491520, 'lr': 0.001, 'time': '8.567900657653809 Seconds', 'norm': 0.2119140625}\n", + "{'loss': '2.366039991378784', 'num_iter': 492032, 'lr': 0.001, 'time': '13.601678371429443 Seconds', 'norm': 0.283203125}\n", + "{'loss': '2.39947247505188', 'num_iter': 492544, 'lr': 0.001, 'time': '8.35796046257019 Seconds', 'norm': 0.291015625}\n", + "{'loss': '2.418290853500366', 'num_iter': 493056, 'lr': 0.001, 'time': '8.084415912628174 Seconds', 'norm': 0.30078125}\n", + "{'loss': '2.359290599822998', 'num_iter': 493568, 'lr': 0.001, 'time': '8.67256474494934 Seconds', 'norm': 0.2294921875}\n", + "{'loss': '2.378432512283325', 'num_iter': 494080, 'lr': 0.001, 'time': '8.188647031784058 Seconds', 'norm': 0.265625}\n", + "{'loss': '2.420287847518921', 'num_iter': 494592, 'lr': 0.001, 'time': '8.16368842124939 Seconds', 'norm': 0.2265625}\n", + "{'loss': '2.3036482334136963', 'num_iter': 495104, 'lr': 0.001, 'time': '9.02479600906372 Seconds', 'norm': 0.1787109375}\n", + "{'loss': '2.4056458473205566', 'num_iter': 495616, 'lr': 0.001, 'time': '8.140930891036987 Seconds', 'norm': 0.24609375}\n", + "{'loss': '2.388606548309326', 'num_iter': 496128, 'lr': 0.001, 'time': '8.198585510253906 Seconds', 'norm': 0.189453125}\n", + "{'loss': '2.411092519760132', 'num_iter': 496640, 'lr': 0.001, 'time': '8.870475053787231 Seconds', 'norm': 0.20703125}\n", + "{'loss': '2.3758797645568848', 'num_iter': 497152, 'lr': 0.001, 'time': '8.580946445465088 Seconds', 'norm': 0.1875}\n", + "{'loss': '2.360112428665161', 'num_iter': 497664, 'lr': 0.001, 'time': '9.028778791427612 Seconds', 'norm': 0.1962890625}\n", + "{'loss': '2.392457962036133', 'num_iter': 498176, 'lr': 0.001, 'time': '9.043829917907715 Seconds', 'norm': 0.2099609375}\n", + "{'loss': '2.401127338409424', 'num_iter': 498688, 'lr': 0.001, 'time': '8.383143186569214 Seconds', 'norm': 0.224609375}\n", + "{'loss': '2.4024276733398438', 'num_iter': 499200, 'lr': 0.001, 'time': '8.350994110107422 Seconds', 'norm': 0.2001953125}\n", + "{'loss': '2.323856830596924', 'num_iter': 499712, 'lr': 0.001, 'time': '8.615326166152954 Seconds', 'norm': 0.2119140625}\n", + "{'loss': '2.39193058013916', 'num_iter': 500224, 'lr': 0.001, 'time': '8.265206336975098 Seconds', 'norm': 0.19921875}\n", + "{'loss': '2.371000289916992', 'num_iter': 500736, 'lr': 0.001, 'time': '8.38897442817688 Seconds', 'norm': 0.1904296875}\n", + "{'loss': '2.293903112411499', 'num_iter': 501248, 'lr': 0.001, 'time': '8.841327428817749 Seconds', 'norm': 0.2373046875}\n", + "{'loss': '2.3905701637268066', 'num_iter': 501760, 'lr': 0.001, 'time': '8.056654691696167 Seconds', 'norm': 0.2138671875}\n", + "{'loss': '2.3754734992980957', 'num_iter': 502272, 'lr': 0.001, 'time': '8.494338750839233 Seconds', 'norm': 0.1875}\n", + "{'loss': '2.4096264839172363', 'num_iter': 502784, 'lr': 0.001, 'time': '8.464124917984009 Seconds', 'norm': 0.236328125}\n", + "{'loss': '2.3774335384368896', 'num_iter': 503296, 'lr': 0.001, 'time': '8.483112573623657 Seconds', 'norm': 0.244140625}\n", + "{'loss': '2.3121869564056396', 'num_iter': 503808, 'lr': 0.001, 'time': '8.581482887268066 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.3369736671447754', 'num_iter': 504320, 'lr': 0.001, 'time': '8.920711040496826 Seconds', 'norm': 0.2041015625}\n", + "{'loss': '2.3857884407043457', 'num_iter': 504832, 'lr': 0.001, 'time': '8.898962020874023 Seconds', 'norm': 0.181640625}\n", + "{'loss': '2.418686628341675', 'num_iter': 505344, 'lr': 0.001, 'time': '8.683974027633667 Seconds', 'norm': 0.1796875}\n", + "{'loss': '2.310661554336548', 'num_iter': 505856, 'lr': 0.001, 'time': '9.940396070480347 Seconds', 'norm': 0.1591796875}\n", + "{'loss': '2.409733533859253', 'num_iter': 506368, 'lr': 0.001, 'time': '9.417585611343384 Seconds', 'norm': 0.193359375}\n", + "{'loss': '2.3392553329467773', 'num_iter': 506880, 'lr': 0.001, 'time': '8.4288809299469 Seconds', 'norm': 0.212890625}\n", + "{'loss': '2.435041666030884', 'num_iter': 507392, 'lr': 0.001, 'time': '8.152979850769043 Seconds', 'norm': 0.2080078125}\n", + "{'loss': '2.3447811603546143', 'num_iter': 507904, 'lr': 0.001, 'time': '8.511025667190552 Seconds', 'norm': 0.2099609375}\n", + "{'loss': '2.3672096729278564', 'num_iter': 508416, 'lr': 0.001, 'time': '8.543021440505981 Seconds', 'norm': 0.1865234375}\n", + "{'loss': '2.3263514041900635', 'num_iter': 508928, 'lr': 0.001, 'time': '8.853461027145386 Seconds', 'norm': 0.1796875}\n", + "{'loss': '2.3519973754882812', 'num_iter': 509440, 'lr': 0.001, 'time': '8.602471351623535 Seconds', 'norm': 0.1962890625}\n", + "{'loss': '2.3760907649993896', 'num_iter': 509952, 'lr': 0.001, 'time': '8.4967622756958 Seconds', 'norm': 0.20703125}\n", + "{'loss': '2.3209567070007324', 'num_iter': 510464, 'lr': 0.001, 'time': '8.946675300598145 Seconds', 'norm': 0.2314453125}\n", + "{'loss': '2.3991098403930664', 'num_iter': 510976, 'lr': 0.001, 'time': '8.399133920669556 Seconds', 'norm': 0.208984375}\n", + "{'loss': '2.3704538345336914', 'num_iter': 511488, 'lr': 0.001, 'time': '8.12042498588562 Seconds', 'norm': 0.203125}\n", + "{'loss': '2.374629020690918', 'num_iter': 512000, 'lr': 0.001, 'time': '8.60893177986145 Seconds', 'norm': 0.19140625}\n", + "{'loss': '2.3634965419769287', 'num_iter': 512512, 'lr': 0.001, 'time': '8.509299278259277 Seconds', 'norm': 0.2138671875}\n", + "{'loss': '2.369128942489624', 'num_iter': 513024, 'lr': 0.001, 'time': '8.855254650115967 Seconds', 'norm': 0.1806640625}\n", + "{'loss': '2.3511173725128174', 'num_iter': 513536, 'lr': 0.001, 'time': '9.62635326385498 Seconds', 'norm': 0.2099609375}\n", + "{'loss': '2.4101855754852295', 'num_iter': 514048, 'lr': 0.001, 'time': '8.5211820602417 Seconds', 'norm': 0.2216796875}\n", + "{'loss': '2.4070825576782227', 'num_iter': 514560, 'lr': 0.001, 'time': '9.119117975234985 Seconds', 'norm': 0.20703125}\n", + "{'loss': '2.4129981994628906', 'num_iter': 515072, 'lr': 0.001, 'time': '8.65252947807312 Seconds', 'norm': 0.3125}\n", + "{'loss': '2.346876621246338', 'num_iter': 515584, 'lr': 0.001, 'time': '8.852932214736938 Seconds', 'norm': 0.255859375}\n", + "{'loss': '2.3596718311309814', 'num_iter': 516096, 'lr': 0.001, 'time': '8.366950035095215 Seconds', 'norm': 0.1953125}\n", + "{'loss': '2.3780057430267334', 'num_iter': 516608, 'lr': 0.001, 'time': '8.282512664794922 Seconds', 'norm': 0.2373046875}\n", + "{'loss': '2.401291847229004', 'num_iter': 517120, 'lr': 0.001, 'time': '8.261131286621094 Seconds', 'norm': 0.2099609375}\n", + "{'loss': '2.4375083446502686', 'num_iter': 517632, 'lr': 0.001, 'time': '8.39326024055481 Seconds', 'norm': 0.2412109375}\n", + "{'loss': '2.336082696914673', 'num_iter': 518144, 'lr': 0.001, 'time': '8.69446611404419 Seconds', 'norm': 0.2373046875}\n", + "{'loss': '2.420104742050171', 'num_iter': 518656, 'lr': 0.001, 'time': '8.330755710601807 Seconds', 'norm': 0.2255859375}\n", + "{'loss': '2.3193917274475098', 'num_iter': 519168, 'lr': 0.001, 'time': '8.25812554359436 Seconds', 'norm': 0.2021484375}\n", + "{'loss': '2.3978116512298584', 'num_iter': 519680, 'lr': 0.001, 'time': '8.219613075256348 Seconds', 'norm': 0.1943359375}\n", + "{'loss': '2.3465352058410645', 'num_iter': 520192, 'lr': 0.001, 'time': '8.543850183486938 Seconds', 'norm': 0.2138671875}\n", + "{'loss': '2.358715295791626', 'num_iter': 520704, 'lr': 0.001, 'time': '8.325904846191406 Seconds', 'norm': 0.19140625}\n", + "{'loss': '2.3471877574920654', 'num_iter': 521216, 'lr': 0.001, 'time': '8.214564085006714 Seconds', 'norm': 0.1923828125}\n", + "{'loss': '2.2903223037719727', 'num_iter': 521728, 'lr': 0.001, 'time': '9.3233482837677 Seconds', 'norm': 0.177734375}\n", + "{'loss': '2.411752939224243', 'num_iter': 522240, 'lr': 0.001, 'time': '8.61816668510437 Seconds', 'norm': 0.21484375}\n", + "{'loss': '2.3507497310638428', 'num_iter': 522752, 'lr': 0.001, 'time': '8.917038440704346 Seconds', 'norm': 0.1923828125}\n", + "{'loss': '2.3592119216918945', 'num_iter': 523264, 'lr': 0.001, 'time': '9.550814390182495 Seconds', 'norm': 0.2431640625}\n", + "{'loss': '2.3860673904418945', 'num_iter': 523776, 'lr': 0.001, 'time': '8.83174729347229 Seconds', 'norm': 0.2451171875}\n", + "{'loss': '2.387636184692383', 'num_iter': 524288, 'lr': 0.001, 'time': '8.70369005203247 Seconds', 'norm': 0.201171875}\n", + "{'loss': '2.392026424407959', 'num_iter': 524800, 'lr': 0.001, 'time': '13.869131803512573 Seconds', 'norm': 0.1806640625}\n", + "{'loss': '2.331899642944336', 'num_iter': 525312, 'lr': 0.001, 'time': '8.832511901855469 Seconds', 'norm': 0.2119140625}\n", + "{'loss': '2.3543336391448975', 'num_iter': 525824, 'lr': 0.001, 'time': '8.46614670753479 Seconds', 'norm': 0.1962890625}\n", + "{'loss': '2.320516586303711', 'num_iter': 526336, 'lr': 0.001, 'time': '8.482982873916626 Seconds', 'norm': 0.19140625}\n", + "{'loss': '2.430494546890259', 'num_iter': 526848, 'lr': 0.001, 'time': '7.889341831207275 Seconds', 'norm': 0.2177734375}\n", + "{'loss': '2.4082071781158447', 'num_iter': 527360, 'lr': 0.001, 'time': '8.43980884552002 Seconds', 'norm': 0.19140625}\n", + "{'loss': '2.4150171279907227', 'num_iter': 527872, 'lr': 0.001, 'time': '8.003417015075684 Seconds', 'norm': 0.19140625}\n", + "{'loss': '2.40669322013855', 'num_iter': 528384, 'lr': 0.001, 'time': '8.771125555038452 Seconds', 'norm': 0.1865234375}\n", + "{'loss': '2.3950188159942627', 'num_iter': 528896, 'lr': 0.001, 'time': '10.83291244506836 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.3368732929229736', 'num_iter': 529408, 'lr': 0.001, 'time': '8.541741371154785 Seconds', 'norm': 0.2197265625}\n", + "{'loss': '2.3503334522247314', 'num_iter': 529920, 'lr': 0.001, 'time': '8.551754713058472 Seconds', 'norm': 0.24609375}\n", + "{'loss': '2.416189670562744', 'num_iter': 530432, 'lr': 0.001, 'time': '8.240530252456665 Seconds', 'norm': 0.1953125}\n", + "{'loss': '2.3444883823394775', 'num_iter': 530944, 'lr': 0.001, 'time': '8.571240901947021 Seconds', 'norm': 0.208984375}\n", + "{'loss': '2.39701509475708', 'num_iter': 531456, 'lr': 0.001, 'time': '9.361477613449097 Seconds', 'norm': 0.21875}\n", + "{'loss': '2.350649356842041', 'num_iter': 531968, 'lr': 0.001, 'time': '9.15995740890503 Seconds', 'norm': 0.2099609375}\n", + "{'loss': '2.3602843284606934', 'num_iter': 532480, 'lr': 0.001, 'time': '10.009470462799072 Seconds', 'norm': 0.2001953125}\n", + "{'loss': '2.37050724029541', 'num_iter': 532992, 'lr': 0.001, 'time': '8.937512159347534 Seconds', 'norm': 0.1943359375}\n", + "{'loss': '2.3421285152435303', 'num_iter': 533504, 'lr': 0.001, 'time': '8.604907035827637 Seconds', 'norm': 0.18359375}\n", + "{'loss': '2.3568050861358643', 'num_iter': 534016, 'lr': 0.001, 'time': '8.86905837059021 Seconds', 'norm': 0.1962890625}\n", + "{'loss': '2.3571176528930664', 'num_iter': 534528, 'lr': 0.001, 'time': '8.6142737865448 Seconds', 'norm': 0.20703125}\n", + "{'loss': '2.343407392501831', 'num_iter': 535040, 'lr': 0.001, 'time': '9.014743566513062 Seconds', 'norm': 0.1708984375}\n", + "{'loss': '2.3706538677215576', 'num_iter': 535552, 'lr': 0.001, 'time': '8.337255239486694 Seconds', 'norm': 0.1865234375}\n", + "{'loss': '2.494264602661133', 'num_iter': 536064, 'lr': 0.001, 'time': '8.202021360397339 Seconds', 'norm': 0.1904296875}\n", + "{'loss': '2.3794639110565186', 'num_iter': 536576, 'lr': 0.001, 'time': '8.234781980514526 Seconds', 'norm': 0.2099609375}\n", + "{'loss': '2.3787899017333984', 'num_iter': 537088, 'lr': 0.001, 'time': '8.596692323684692 Seconds', 'norm': 0.1845703125}\n", + "{'loss': '2.3557255268096924', 'num_iter': 537600, 'lr': 0.001, 'time': '8.3247549533844 Seconds', 'norm': 0.1767578125}\n", + "{'loss': '2.343777894973755', 'num_iter': 538112, 'lr': 0.001, 'time': '8.359620571136475 Seconds', 'norm': 0.1982421875}\n", + "{'loss': '2.4125304222106934', 'num_iter': 538624, 'lr': 0.001, 'time': '8.106018781661987 Seconds', 'norm': 0.1875}\n", + "{'loss': '2.4094390869140625', 'num_iter': 539136, 'lr': 0.001, 'time': '8.267824649810791 Seconds', 'norm': 0.2578125}\n", + "{'loss': '2.365715742111206', 'num_iter': 539648, 'lr': 0.001, 'time': '8.955572605133057 Seconds', 'norm': 0.28515625}\n", + "{'loss': '2.3388702869415283', 'num_iter': 540160, 'lr': 0.001, 'time': '8.99897050857544 Seconds', 'norm': 0.25}\n", + "{'loss': '2.406221389770508', 'num_iter': 540672, 'lr': 0.001, 'time': '9.394127368927002 Seconds', 'norm': 0.34765625}\n", + "{'loss': '2.415149450302124', 'num_iter': 541184, 'lr': 0.001, 'time': '9.11366581916809 Seconds', 'norm': 0.302734375}\n", + "{'loss': '2.3966617584228516', 'num_iter': 541696, 'lr': 0.001, 'time': '8.94565200805664 Seconds', 'norm': 0.19921875}\n", + "{'loss': '2.339439868927002', 'num_iter': 542208, 'lr': 0.001, 'time': '8.875508785247803 Seconds', 'norm': 0.26171875}\n", + "{'loss': '2.410937786102295', 'num_iter': 542720, 'lr': 0.001, 'time': '7.7604100704193115 Seconds', 'norm': 0.2294921875}\n", + "{'loss': '2.344268321990967', 'num_iter': 543232, 'lr': 0.001, 'time': '8.778756141662598 Seconds', 'norm': 0.2109375}\n", + "{'loss': '2.3661224842071533', 'num_iter': 543744, 'lr': 0.001, 'time': '8.848215341567993 Seconds', 'norm': 0.240234375}\n", + "{'loss': '2.465405225753784', 'num_iter': 544256, 'lr': 0.001, 'time': '8.194020986557007 Seconds', 'norm': 0.224609375}\n", + "{'loss': '2.349555492401123', 'num_iter': 544768, 'lr': 0.001, 'time': '9.065053224563599 Seconds', 'norm': 0.197265625}\n", + "{'loss': '2.3266756534576416', 'num_iter': 545280, 'lr': 0.001, 'time': '8.463916778564453 Seconds', 'norm': 0.216796875}\n", + "{'loss': '2.402050495147705', 'num_iter': 545792, 'lr': 0.001, 'time': '8.247084856033325 Seconds', 'norm': 0.2236328125}\n", + "{'loss': '2.3578617572784424', 'num_iter': 546304, 'lr': 0.001, 'time': '8.428520441055298 Seconds', 'norm': 0.20703125}\n", + "{'loss': '2.427959680557251', 'num_iter': 546816, 'lr': 0.001, 'time': '8.165271520614624 Seconds', 'norm': 0.2158203125}\n", + "{'loss': '2.4632697105407715', 'num_iter': 547328, 'lr': 0.001, 'time': '8.314631462097168 Seconds', 'norm': 0.208984375}\n", + "{'loss': '2.3906478881835938', 'num_iter': 547840, 'lr': 0.001, 'time': '8.745691061019897 Seconds', 'norm': 0.2001953125}\n", + "{'loss': '2.359147071838379', 'num_iter': 548352, 'lr': 0.001, 'time': '8.265934228897095 Seconds', 'norm': 0.1884765625}\n", + "{'loss': '2.329312801361084', 'num_iter': 548864, 'lr': 0.001, 'time': '8.45524787902832 Seconds', 'norm': 0.2099609375}\n", + "{'loss': '2.412430763244629', 'num_iter': 549376, 'lr': 0.001, 'time': '8.118746280670166 Seconds', 'norm': 0.2099609375}\n", + "{'loss': '2.400629758834839', 'num_iter': 549888, 'lr': 0.001, 'time': '8.536701679229736 Seconds', 'norm': 0.1845703125}\n", + "{'loss': '2.34966778755188', 'num_iter': 550400, 'lr': 0.001, 'time': '9.076826810836792 Seconds', 'norm': 0.1953125}\n", + "{'loss': '2.418316125869751', 'num_iter': 550912, 'lr': 0.001, 'time': '8.336113929748535 Seconds', 'norm': 0.193359375}\n", + "{'loss': '2.36924147605896', 'num_iter': 551424, 'lr': 0.001, 'time': '8.258720397949219 Seconds', 'norm': 0.177734375}\n", + "{'loss': '2.2986791133880615', 'num_iter': 551936, 'lr': 0.001, 'time': '8.439860105514526 Seconds', 'norm': 0.205078125}\n", + "{'loss': '2.401566505432129', 'num_iter': 552448, 'lr': 0.001, 'time': '8.286494255065918 Seconds', 'norm': 0.1953125}\n", + "{'loss': '2.4251410961151123', 'num_iter': 552960, 'lr': 0.001, 'time': '8.11871337890625 Seconds', 'norm': 0.19140625}\n", + "{'loss': '2.379188299179077', 'num_iter': 553472, 'lr': 0.001, 'time': '8.102362632751465 Seconds', 'norm': 0.19921875}\n", + "{'loss': '2.3485360145568848', 'num_iter': 553984, 'lr': 0.001, 'time': '8.301153659820557 Seconds', 'norm': 0.193359375}\n", + "{'loss': '2.359606981277466', 'num_iter': 554496, 'lr': 0.001, 'time': '8.484722375869751 Seconds', 'norm': 0.1787109375}\n", + "{'loss': '2.3937478065490723', 'num_iter': 555008, 'lr': 0.001, 'time': '8.385018587112427 Seconds', 'norm': 0.2421875}\n", + "{'loss': '2.346998691558838', 'num_iter': 555520, 'lr': 0.001, 'time': '8.276255130767822 Seconds', 'norm': 0.2470703125}\n", + "{'loss': '2.3520264625549316', 'num_iter': 556032, 'lr': 0.001, 'time': '8.379949569702148 Seconds', 'norm': 0.2421875}\n", + "{'loss': '2.3089756965637207', 'num_iter': 556544, 'lr': 0.001, 'time': '8.717106103897095 Seconds', 'norm': 0.19921875}\n", + "{'loss': '2.370455026626587', 'num_iter': 557056, 'lr': 0.001, 'time': '8.910163879394531 Seconds', 'norm': 0.2294921875}\n", + "{'loss': '2.33139705657959', 'num_iter': 557568, 'lr': 0.001, 'time': '13.867101192474365 Seconds', 'norm': 0.2177734375}\n", + "{'loss': '2.386568546295166', 'num_iter': 558080, 'lr': 0.001, 'time': '9.296029567718506 Seconds', 'norm': 0.265625}\n", + "{'loss': '2.388340473175049', 'num_iter': 558592, 'lr': 0.001, 'time': '9.166133880615234 Seconds', 'norm': 0.203125}\n", + "{'loss': '2.313556671142578', 'num_iter': 559104, 'lr': 0.001, 'time': '9.786152839660645 Seconds', 'norm': 0.20703125}\n", + "{'loss': '2.379908323287964', 'num_iter': 559616, 'lr': 0.001, 'time': '9.591379642486572 Seconds', 'norm': 0.2099609375}\n", + "{'loss': '2.4103622436523438', 'num_iter': 560128, 'lr': 0.001, 'time': '8.39347243309021 Seconds', 'norm': 0.244140625}\n", + "{'loss': '2.3503577709198', 'num_iter': 560640, 'lr': 0.001, 'time': '8.441413640975952 Seconds', 'norm': 0.212890625}\n", + "{'loss': '2.4097888469696045', 'num_iter': 561152, 'lr': 0.001, 'time': '8.394853353500366 Seconds', 'norm': 0.2353515625}\n", + "{'loss': '2.348428964614868', 'num_iter': 561664, 'lr': 0.001, 'time': '8.320677042007446 Seconds', 'norm': 0.201171875}\n", + "{'loss': '2.4182467460632324', 'num_iter': 562176, 'lr': 0.001, 'time': '8.367506742477417 Seconds', 'norm': 0.228515625}\n", + "{'loss': '2.386864423751831', 'num_iter': 562688, 'lr': 0.001, 'time': '8.076138496398926 Seconds', 'norm': 0.232421875}\n", + "{'loss': '2.335475206375122', 'num_iter': 563200, 'lr': 0.001, 'time': '8.601854801177979 Seconds', 'norm': 0.19140625}\n", + "{'loss': '2.3614563941955566', 'num_iter': 563712, 'lr': 0.001, 'time': '8.215994119644165 Seconds', 'norm': 0.2099609375}\n", + "{'loss': '2.407736301422119', 'num_iter': 564224, 'lr': 0.001, 'time': '8.159382581710815 Seconds', 'norm': 0.2216796875}\n", + "{'loss': '2.3601784706115723', 'num_iter': 564736, 'lr': 0.001, 'time': '8.558767795562744 Seconds', 'norm': 0.197265625}\n", + "{'loss': '2.4046101570129395', 'num_iter': 565248, 'lr': 0.001, 'time': '8.294793844223022 Seconds', 'norm': 0.2216796875}\n", + "{'loss': '2.3016159534454346', 'num_iter': 565760, 'lr': 0.001, 'time': '8.72349214553833 Seconds', 'norm': 0.1875}\n", + "{'loss': '2.339411973953247', 'num_iter': 566272, 'lr': 0.001, 'time': '8.480000734329224 Seconds', 'norm': 0.2138671875}\n", + "{'loss': '2.3110368251800537', 'num_iter': 566784, 'lr': 0.001, 'time': '9.044867515563965 Seconds', 'norm': 0.2099609375}\n", + "{'loss': '2.3640358448028564', 'num_iter': 567296, 'lr': 0.001, 'time': '8.769799947738647 Seconds', 'norm': 0.240234375}\n", + "{'loss': '2.3883798122406006', 'num_iter': 567808, 'lr': 0.001, 'time': '9.066588163375854 Seconds', 'norm': 0.169921875}\n", + "{'loss': '2.3822174072265625', 'num_iter': 568320, 'lr': 0.001, 'time': '9.034048080444336 Seconds', 'norm': 0.2119140625}\n", + "{'loss': '2.318671941757202', 'num_iter': 568832, 'lr': 0.001, 'time': '8.589978456497192 Seconds', 'norm': 0.189453125}\n", + "{'loss': '2.3528995513916016', 'num_iter': 569344, 'lr': 0.001, 'time': '8.690327405929565 Seconds', 'norm': 0.1767578125}\n", + "{'loss': '2.3915724754333496', 'num_iter': 569856, 'lr': 0.001, 'time': '8.67330026626587 Seconds', 'norm': 0.189453125}\n", + "{'loss': '2.3851430416107178', 'num_iter': 570368, 'lr': 0.001, 'time': '8.993835926055908 Seconds', 'norm': 0.1865234375}\n", + "{'loss': '2.4249587059020996', 'num_iter': 570880, 'lr': 0.001, 'time': '8.622015237808228 Seconds', 'norm': 0.19921875}\n", + "{'loss': '2.2986483573913574', 'num_iter': 571392, 'lr': 0.001, 'time': '9.129920959472656 Seconds', 'norm': 0.1962890625}\n", + "{'loss': '2.3269946575164795', 'num_iter': 571904, 'lr': 0.001, 'time': '8.771793127059937 Seconds', 'norm': 0.197265625}\n", + "{'loss': '2.3768930435180664', 'num_iter': 572416, 'lr': 0.001, 'time': '8.366975784301758 Seconds', 'norm': 0.2119140625}\n", + "{'loss': '2.384307861328125', 'num_iter': 572928, 'lr': 0.001, 'time': '8.440441370010376 Seconds', 'norm': 0.201171875}\n", + "{'loss': '2.365325927734375', 'num_iter': 573440, 'lr': 0.001, 'time': '8.718467473983765 Seconds', 'norm': 0.25390625}\n", + "{'loss': '2.418166399002075', 'num_iter': 573952, 'lr': 0.001, 'time': '10.52768325805664 Seconds', 'norm': 0.263671875}\n", + "{'loss': '2.4196560382843018', 'num_iter': 574464, 'lr': 0.001, 'time': '8.299775838851929 Seconds', 'norm': 0.259765625}\n", + "{'loss': '2.4219486713409424', 'num_iter': 574976, 'lr': 0.001, 'time': '8.10412073135376 Seconds', 'norm': 0.2119140625}\n", + "{'loss': '2.290261745452881', 'num_iter': 575488, 'lr': 0.001, 'time': '8.410136699676514 Seconds', 'norm': 0.2138671875}\n", + "{'loss': '2.41316556930542', 'num_iter': 576000, 'lr': 0.001, 'time': '8.756744861602783 Seconds', 'norm': 0.185546875}\n", + "{'loss': '2.3451333045959473', 'num_iter': 576512, 'lr': 0.001, 'time': '9.08216404914856 Seconds', 'norm': 0.20703125}\n", + "{'loss': '2.350375175476074', 'num_iter': 577024, 'lr': 0.001, 'time': '9.447808504104614 Seconds', 'norm': 0.216796875}\n", + "{'loss': '2.3843014240264893', 'num_iter': 577536, 'lr': 0.001, 'time': '8.924794912338257 Seconds', 'norm': 0.2060546875}\n", + "{'loss': '2.358842134475708', 'num_iter': 578048, 'lr': 0.001, 'time': '8.657910108566284 Seconds', 'norm': 0.2109375}\n", + "{'loss': '2.460190534591675', 'num_iter': 578560, 'lr': 0.001, 'time': '7.865793466567993 Seconds', 'norm': 0.2236328125}\n", + "{'loss': '2.35128116607666', 'num_iter': 579072, 'lr': 0.001, 'time': '8.071422815322876 Seconds', 'norm': 0.19140625}\n", + "{'loss': '2.384986162185669', 'num_iter': 579584, 'lr': 0.001, 'time': '8.088286876678467 Seconds', 'norm': 0.1689453125}\n", + "{'loss': '2.3074166774749756', 'num_iter': 580096, 'lr': 0.001, 'time': '8.40653395652771 Seconds', 'norm': 0.2060546875}\n", + "{'loss': '2.3917601108551025', 'num_iter': 580608, 'lr': 0.001, 'time': '8.72053837776184 Seconds', 'norm': 0.1953125}\n", + "{'loss': '2.382786750793457', 'num_iter': 581120, 'lr': 0.001, 'time': '8.115113496780396 Seconds', 'norm': 0.2294921875}\n", + "{'loss': '2.378631830215454', 'num_iter': 581632, 'lr': 0.001, 'time': '8.229153156280518 Seconds', 'norm': 0.2236328125}\n", + "{'loss': '2.3791255950927734', 'num_iter': 582144, 'lr': 0.001, 'time': '8.107864141464233 Seconds', 'norm': 0.2431640625}\n", + "{'loss': '2.3877782821655273', 'num_iter': 582656, 'lr': 0.001, 'time': '8.024527788162231 Seconds', 'norm': 0.1845703125}\n", + "{'loss': '2.3151209354400635', 'num_iter': 583168, 'lr': 0.001, 'time': '8.64098072052002 Seconds', 'norm': 0.1708984375}\n", + "{'loss': '2.362719774246216', 'num_iter': 583680, 'lr': 0.001, 'time': '8.657645463943481 Seconds', 'norm': 0.1806640625}\n", + "{'loss': '2.380748748779297', 'num_iter': 584192, 'lr': 0.001, 'time': '8.590943813323975 Seconds', 'norm': 0.20703125}\n", + "{'loss': '2.4285359382629395', 'num_iter': 584704, 'lr': 0.001, 'time': '7.842537879943848 Seconds', 'norm': 0.212890625}\n", + "{'loss': '2.387359142303467', 'num_iter': 585216, 'lr': 0.001, 'time': '8.260374307632446 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.3678834438323975', 'num_iter': 585728, 'lr': 0.001, 'time': '8.11666750907898 Seconds', 'norm': 0.1728515625}\n", + "{'loss': '2.374826192855835', 'num_iter': 586240, 'lr': 0.001, 'time': '8.790680885314941 Seconds', 'norm': 0.216796875}\n", + "{'loss': '2.3979573249816895', 'num_iter': 586752, 'lr': 0.001, 'time': '8.175591468811035 Seconds', 'norm': 0.205078125}\n", + "{'loss': '2.3127472400665283', 'num_iter': 587264, 'lr': 0.001, 'time': '9.348510265350342 Seconds', 'norm': 0.212890625}\n", + "{'loss': '2.3610973358154297', 'num_iter': 587776, 'lr': 0.001, 'time': '8.46545147895813 Seconds', 'norm': 0.1845703125}\n", + "{'loss': '2.360898017883301', 'num_iter': 588288, 'lr': 0.001, 'time': '8.350005388259888 Seconds', 'norm': 0.1884765625}\n", + "{'loss': '2.344270706176758', 'num_iter': 588800, 'lr': 0.001, 'time': '8.46185040473938 Seconds', 'norm': 0.232421875}\n", + "{'loss': '2.35512638092041', 'num_iter': 589312, 'lr': 0.001, 'time': '8.385868787765503 Seconds', 'norm': 0.2021484375}\n", + "{'loss': '2.3928940296173096', 'num_iter': 589824, 'lr': 0.001, 'time': '8.690425395965576 Seconds', 'norm': 0.1806640625}\n", + "{'loss': '2.3400585651397705', 'num_iter': 590336, 'lr': 0.001, 'time': '14.140808820724487 Seconds', 'norm': 0.228515625}\n", + "{'loss': '2.3961544036865234', 'num_iter': 590848, 'lr': 0.001, 'time': '8.318113803863525 Seconds', 'norm': 0.216796875}\n", + "{'loss': '2.4200711250305176', 'num_iter': 591360, 'lr': 0.001, 'time': '8.047547817230225 Seconds', 'norm': 0.1982421875}\n", + "{'loss': '2.4539577960968018', 'num_iter': 591872, 'lr': 0.001, 'time': '8.174566745758057 Seconds', 'norm': 0.2353515625}\n", + "{'loss': '2.367643117904663', 'num_iter': 592384, 'lr': 0.001, 'time': '8.16866397857666 Seconds', 'norm': 0.2275390625}\n", + "{'loss': '2.349362373352051', 'num_iter': 592896, 'lr': 0.001, 'time': '8.430105924606323 Seconds', 'norm': 0.1708984375}\n", + "{'loss': '2.3128409385681152', 'num_iter': 593408, 'lr': 0.001, 'time': '8.70234990119934 Seconds', 'norm': 0.1806640625}\n", + "{'loss': '2.428396463394165', 'num_iter': 593920, 'lr': 0.001, 'time': '8.978009700775146 Seconds', 'norm': 0.177734375}\n", + "{'loss': '2.382199287414551', 'num_iter': 594432, 'lr': 0.001, 'time': '9.32444715499878 Seconds', 'norm': 0.1982421875}\n", + "{'loss': '2.3814010620117188', 'num_iter': 594944, 'lr': 0.001, 'time': '8.186692953109741 Seconds', 'norm': 0.1796875}\n", + "{'loss': '2.3372552394866943', 'num_iter': 595456, 'lr': 0.001, 'time': '7.981539487838745 Seconds', 'norm': 0.2021484375}\n", + "{'loss': '2.3621573448181152', 'num_iter': 595968, 'lr': 0.001, 'time': '8.455300331115723 Seconds', 'norm': 0.1953125}\n", + "{'loss': '2.402200698852539', 'num_iter': 596480, 'lr': 0.001, 'time': '8.308866024017334 Seconds', 'norm': 0.1953125}\n", + "{'loss': '2.334552049636841', 'num_iter': 596992, 'lr': 0.001, 'time': '8.644901514053345 Seconds', 'norm': 0.1884765625}\n", + "{'loss': '2.3480000495910645', 'num_iter': 597504, 'lr': 0.001, 'time': '8.136777639389038 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.355496883392334', 'num_iter': 598016, 'lr': 0.001, 'time': '8.678923606872559 Seconds', 'norm': 0.224609375}\n", + "{'loss': '2.3377432823181152', 'num_iter': 598528, 'lr': 0.001, 'time': '8.857391834259033 Seconds', 'norm': 0.1923828125}\n", + "{'loss': '2.33004093170166', 'num_iter': 599040, 'lr': 0.001, 'time': '8.63906717300415 Seconds', 'norm': 0.2236328125}\n", + "{'loss': '2.38208270072937', 'num_iter': 599552, 'lr': 0.001, 'time': '8.163987874984741 Seconds', 'norm': 0.1943359375}\n", + "{'loss': '2.3487648963928223', 'num_iter': 600064, 'lr': 0.001, 'time': '8.366794347763062 Seconds', 'norm': 0.18359375}\n", + "{'loss': '2.3554086685180664', 'num_iter': 600576, 'lr': 0.001, 'time': '8.202547788619995 Seconds', 'norm': 0.193359375}\n", + "{'loss': '2.3091366291046143', 'num_iter': 601088, 'lr': 0.001, 'time': '9.333231210708618 Seconds', 'norm': 0.2431640625}\n", + "{'loss': '2.387678384780884', 'num_iter': 601600, 'lr': 0.001, 'time': '8.772621870040894 Seconds', 'norm': 0.220703125}\n", + "{'loss': '2.4188191890716553', 'num_iter': 602112, 'lr': 0.001, 'time': '8.311651945114136 Seconds', 'norm': 0.1904296875}\n", + "{'loss': '2.3478856086730957', 'num_iter': 602624, 'lr': 0.001, 'time': '8.801607131958008 Seconds', 'norm': 0.17578125}\n", + "{'loss': '2.315805435180664', 'num_iter': 603136, 'lr': 0.001, 'time': '9.011109352111816 Seconds', 'norm': 0.2021484375}\n", + "{'loss': '2.3993921279907227', 'num_iter': 603648, 'lr': 0.001, 'time': '8.275681495666504 Seconds', 'norm': 0.2216796875}\n", + "{'loss': '2.378356456756592', 'num_iter': 604160, 'lr': 0.001, 'time': '8.467104434967041 Seconds', 'norm': 0.21484375}\n", + "{'loss': '2.3717172145843506', 'num_iter': 604672, 'lr': 0.001, 'time': '8.819077491760254 Seconds', 'norm': 0.2275390625}\n", + "{'loss': '2.406420946121216', 'num_iter': 605184, 'lr': 0.001, 'time': '8.42766809463501 Seconds', 'norm': 0.1806640625}\n", + "{'loss': '2.308781623840332', 'num_iter': 605696, 'lr': 0.001, 'time': '9.180200815200806 Seconds', 'norm': 0.21875}\n", + "{'loss': '2.394643783569336', 'num_iter': 606208, 'lr': 0.001, 'time': '8.148174047470093 Seconds', 'norm': 0.1962890625}\n", + "{'loss': '2.383091688156128', 'num_iter': 606720, 'lr': 0.001, 'time': '7.9498536586761475 Seconds', 'norm': 0.189453125}\n", + "{'loss': '2.3648366928100586', 'num_iter': 607232, 'lr': 0.001, 'time': '8.404695749282837 Seconds', 'norm': 0.1845703125}\n", + "{'loss': '2.3548495769500732', 'num_iter': 607744, 'lr': 0.001, 'time': '8.70843243598938 Seconds', 'norm': 0.2275390625}\n", + "{'loss': '2.339198589324951', 'num_iter': 608256, 'lr': 0.001, 'time': '8.902112245559692 Seconds', 'norm': 0.19921875}\n", + "{'loss': '2.4118130207061768', 'num_iter': 608768, 'lr': 0.001, 'time': '8.46761417388916 Seconds', 'norm': 0.2001953125}\n", + "{'loss': '2.425410270690918', 'num_iter': 609280, 'lr': 0.001, 'time': '7.797924518585205 Seconds', 'norm': 0.181640625}\n", + "{'loss': '2.41405987739563', 'num_iter': 609792, 'lr': 0.001, 'time': '8.31475019454956 Seconds', 'norm': 0.21875}\n", + "{'loss': '2.3168928623199463', 'num_iter': 610304, 'lr': 0.001, 'time': '8.628028631210327 Seconds', 'norm': 0.296875}\n", + "{'loss': '2.43030047416687', 'num_iter': 610816, 'lr': 0.001, 'time': '8.341989278793335 Seconds', 'norm': 0.33203125}\n", + "{'loss': '2.327176570892334', 'num_iter': 611328, 'lr': 0.001, 'time': '9.063476324081421 Seconds', 'norm': 0.24609375}\n", + "{'loss': '2.348038911819458', 'num_iter': 611840, 'lr': 0.001, 'time': '9.25037932395935 Seconds', 'norm': 0.2119140625}\n", + "{'loss': '2.384190082550049', 'num_iter': 612352, 'lr': 0.001, 'time': '8.67262578010559 Seconds', 'norm': 0.30859375}\n", + "{'loss': '2.3889453411102295', 'num_iter': 612864, 'lr': 0.001, 'time': '8.494995594024658 Seconds', 'norm': 0.21875}\n", + "{'loss': '2.300053596496582', 'num_iter': 613376, 'lr': 0.001, 'time': '8.412558555603027 Seconds', 'norm': 0.255859375}\n", + "{'loss': '2.386064052581787', 'num_iter': 613888, 'lr': 0.001, 'time': '8.071101665496826 Seconds', 'norm': 0.271484375}\n", + "{'loss': '2.381972551345825', 'num_iter': 614400, 'lr': 0.001, 'time': '8.205599308013916 Seconds', 'norm': 0.2177734375}\n", + "{'loss': '2.410676956176758', 'num_iter': 614912, 'lr': 0.001, 'time': '8.049220085144043 Seconds', 'norm': 0.255859375}\n", + "{'loss': '2.403089761734009', 'num_iter': 615424, 'lr': 0.001, 'time': '8.123032569885254 Seconds', 'norm': 0.212890625}\n", + "{'loss': '2.4044899940490723', 'num_iter': 615936, 'lr': 0.001, 'time': '7.929422378540039 Seconds', 'norm': 0.234375}\n", + "{'loss': '2.4092636108398438', 'num_iter': 616448, 'lr': 0.001, 'time': '8.01581358909607 Seconds', 'norm': 0.2255859375}\n", + "{'loss': '2.384211301803589', 'num_iter': 616960, 'lr': 0.001, 'time': '8.500221014022827 Seconds', 'norm': 0.1982421875}\n", + "{'loss': '2.4001681804656982', 'num_iter': 617472, 'lr': 0.001, 'time': '7.688342571258545 Seconds', 'norm': 0.259765625}\n", + "{'loss': '2.4118332862854004', 'num_iter': 617984, 'lr': 0.001, 'time': '7.856211185455322 Seconds', 'norm': 0.2392578125}\n", + "{'loss': '2.4008543491363525', 'num_iter': 618496, 'lr': 0.001, 'time': '8.08182430267334 Seconds', 'norm': 0.203125}\n", + "{'loss': '2.482863187789917', 'num_iter': 619008, 'lr': 0.001, 'time': '10.343130826950073 Seconds', 'norm': 0.2275390625}\n", + "{'loss': '2.316138505935669', 'num_iter': 619520, 'lr': 0.001, 'time': '8.784104347229004 Seconds', 'norm': 0.2109375}\n", + "{'loss': '2.3878493309020996', 'num_iter': 620032, 'lr': 0.001, 'time': '9.142448425292969 Seconds', 'norm': 0.181640625}\n", + "{'loss': '2.3631863594055176', 'num_iter': 620544, 'lr': 0.001, 'time': '8.916922569274902 Seconds', 'norm': 0.2138671875}\n", + "{'loss': '2.386092185974121', 'num_iter': 621056, 'lr': 0.001, 'time': '8.32183051109314 Seconds', 'norm': 0.212890625}\n", + "{'loss': '2.3358213901519775', 'num_iter': 621568, 'lr': 0.001, 'time': '9.10470700263977 Seconds', 'norm': 0.212890625}\n", + "{'loss': '2.451328754425049', 'num_iter': 622080, 'lr': 0.001, 'time': '8.294664859771729 Seconds', 'norm': 0.216796875}\n", + "{'loss': '2.386120557785034', 'num_iter': 622592, 'lr': 0.001, 'time': '8.177609920501709 Seconds', 'norm': 0.2255859375}\n", + "{'loss': '2.3520267009735107', 'num_iter': 623104, 'lr': 0.001, 'time': '13.353434085845947 Seconds', 'norm': 0.1865234375}\n", + "{'loss': '2.4219276905059814', 'num_iter': 623616, 'lr': 0.001, 'time': '8.270801782608032 Seconds', 'norm': 0.212890625}\n", + "{'loss': '2.423961639404297', 'num_iter': 624128, 'lr': 0.001, 'time': '8.008416652679443 Seconds', 'norm': 0.232421875}\n", + "{'loss': '2.4242100715637207', 'num_iter': 624640, 'lr': 0.001, 'time': '8.155817031860352 Seconds', 'norm': 0.1845703125}\n", + "{'loss': '2.4555981159210205', 'num_iter': 625152, 'lr': 0.001, 'time': '8.128122329711914 Seconds', 'norm': 0.212890625}\n", + "{'loss': '2.4281721115112305', 'num_iter': 625664, 'lr': 0.001, 'time': '8.035748958587646 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.369981050491333', 'num_iter': 626176, 'lr': 0.001, 'time': '8.855613946914673 Seconds', 'norm': 0.1982421875}\n", + "{'loss': '2.3076047897338867', 'num_iter': 626688, 'lr': 0.001, 'time': '9.062339305877686 Seconds', 'norm': 0.1640625}\n", + "{'loss': '2.3987555503845215', 'num_iter': 627200, 'lr': 0.001, 'time': '8.513155221939087 Seconds', 'norm': 0.189453125}\n", + "{'loss': '2.4323537349700928', 'num_iter': 627712, 'lr': 0.001, 'time': '8.415394067764282 Seconds', 'norm': 0.1689453125}\n", + "{'loss': '2.3409252166748047', 'num_iter': 628224, 'lr': 0.001, 'time': '8.49165940284729 Seconds', 'norm': 0.1787109375}\n", + "{'loss': '2.3550291061401367', 'num_iter': 628736, 'lr': 0.001, 'time': '9.040108442306519 Seconds', 'norm': 0.177734375}\n", + "{'loss': '2.360970973968506', 'num_iter': 629248, 'lr': 0.001, 'time': '9.177815675735474 Seconds', 'norm': 0.1904296875}\n", + "{'loss': '2.402338743209839', 'num_iter': 629760, 'lr': 0.001, 'time': '9.248363256454468 Seconds', 'norm': 0.16796875}\n", + "{'loss': '2.350450038909912', 'num_iter': 630272, 'lr': 0.001, 'time': '9.000217914581299 Seconds', 'norm': 0.169921875}\n", + "{'loss': '2.3485400676727295', 'num_iter': 630784, 'lr': 0.001, 'time': '7.980488538742065 Seconds', 'norm': 0.18359375}\n", + "{'loss': '2.3808484077453613', 'num_iter': 631296, 'lr': 0.001, 'time': '8.351171493530273 Seconds', 'norm': 0.1806640625}\n", + "{'loss': '2.3459527492523193', 'num_iter': 631808, 'lr': 0.001, 'time': '8.370725631713867 Seconds', 'norm': 0.1689453125}\n", + "{'loss': '2.416860342025757', 'num_iter': 632320, 'lr': 0.001, 'time': '7.916487693786621 Seconds', 'norm': 0.28125}\n", + "{'loss': '2.3642325401306152', 'num_iter': 632832, 'lr': 0.001, 'time': '8.307975769042969 Seconds', 'norm': 0.2470703125}\n", + "{'loss': '2.3842837810516357', 'num_iter': 633344, 'lr': 0.001, 'time': '8.773618936538696 Seconds', 'norm': 0.287109375}\n", + "{'loss': '2.389166831970215', 'num_iter': 633856, 'lr': 0.001, 'time': '8.093811750411987 Seconds', 'norm': 0.197265625}\n", + "{'loss': '2.353649854660034', 'num_iter': 634368, 'lr': 0.001, 'time': '8.572394132614136 Seconds', 'norm': 0.2158203125}\n", + "{'loss': '2.3125908374786377', 'num_iter': 634880, 'lr': 0.001, 'time': '8.746089696884155 Seconds', 'norm': 0.220703125}\n", + "{'loss': '2.4130027294158936', 'num_iter': 635392, 'lr': 0.001, 'time': '8.248673439025879 Seconds', 'norm': 0.1943359375}\n", + "{'loss': '2.3473968505859375', 'num_iter': 635904, 'lr': 0.001, 'time': '8.382711172103882 Seconds', 'norm': 0.216796875}\n", + "{'loss': '2.404981851577759', 'num_iter': 636416, 'lr': 0.001, 'time': '7.738506078720093 Seconds', 'norm': 0.21875}\n", + "{'loss': '2.3809351921081543', 'num_iter': 636928, 'lr': 0.001, 'time': '8.158390760421753 Seconds', 'norm': 0.197265625}\n", + "{'loss': '2.3915021419525146', 'num_iter': 637440, 'lr': 0.001, 'time': '8.937549829483032 Seconds', 'norm': 0.185546875}\n", + "{'loss': '2.382065534591675', 'num_iter': 637952, 'lr': 0.001, 'time': '8.908023357391357 Seconds', 'norm': 0.2177734375}\n", + "{'loss': '2.352999210357666', 'num_iter': 638464, 'lr': 0.001, 'time': '9.0807044506073 Seconds', 'norm': 0.208984375}\n", + "{'loss': '2.3589696884155273', 'num_iter': 638976, 'lr': 0.001, 'time': '8.512571334838867 Seconds', 'norm': 0.2021484375}\n", + "{'loss': '2.3489813804626465', 'num_iter': 639488, 'lr': 0.001, 'time': '8.395131826400757 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.351274251937866', 'num_iter': 640000, 'lr': 0.001, 'time': '8.690725088119507 Seconds', 'norm': 0.203125}\n", + "{'loss': '2.292980194091797', 'num_iter': 640512, 'lr': 0.001, 'time': '8.796347618103027 Seconds', 'norm': 0.2138671875}\n", + "{'loss': '2.4013984203338623', 'num_iter': 641024, 'lr': 0.001, 'time': '8.015042304992676 Seconds', 'norm': 0.1708984375}\n", + "{'loss': '2.374925136566162', 'num_iter': 641536, 'lr': 0.001, 'time': '8.02759838104248 Seconds', 'norm': 0.224609375}\n", + "{'loss': '2.359703779220581', 'num_iter': 642048, 'lr': 0.001, 'time': '8.41412901878357 Seconds', 'norm': 0.2021484375}\n", + "{'loss': '2.3444433212280273', 'num_iter': 642560, 'lr': 0.001, 'time': '8.478111505508423 Seconds', 'norm': 0.201171875}\n", + "{'loss': '2.3819189071655273', 'num_iter': 643072, 'lr': 0.001, 'time': '8.84437370300293 Seconds', 'norm': 0.244140625}\n", + "{'loss': '2.3917396068573', 'num_iter': 643584, 'lr': 0.001, 'time': '8.611928224563599 Seconds', 'norm': 0.2197265625}\n", + "{'loss': '2.3864684104919434', 'num_iter': 644096, 'lr': 0.001, 'time': '8.089583396911621 Seconds', 'norm': 0.228515625}\n", + "{'loss': '2.3744330406188965', 'num_iter': 644608, 'lr': 0.001, 'time': '8.392117738723755 Seconds', 'norm': 0.23046875}\n", + "{'loss': '2.3959555625915527', 'num_iter': 645120, 'lr': 0.001, 'time': '8.105306386947632 Seconds', 'norm': 0.189453125}\n", + "{'loss': '2.3590407371520996', 'num_iter': 645632, 'lr': 0.001, 'time': '8.829973697662354 Seconds', 'norm': 0.169921875}\n", + "{'loss': '2.3936283588409424', 'num_iter': 646144, 'lr': 0.001, 'time': '8.801921606063843 Seconds', 'norm': 0.2060546875}\n", + "{'loss': '2.4319217205047607', 'num_iter': 646656, 'lr': 0.001, 'time': '9.356077194213867 Seconds', 'norm': 0.1767578125}\n", + "{'loss': '2.370333671569824', 'num_iter': 647168, 'lr': 0.001, 'time': '8.896085262298584 Seconds', 'norm': 0.2119140625}\n", + "{'loss': '2.3910374641418457', 'num_iter': 647680, 'lr': 0.001, 'time': '8.507910966873169 Seconds', 'norm': 0.1845703125}\n", + "{'loss': '2.3300390243530273', 'num_iter': 648192, 'lr': 0.001, 'time': '8.909533023834229 Seconds', 'norm': 0.1728515625}\n", + "{'loss': '2.38446044921875', 'num_iter': 648704, 'lr': 0.001, 'time': '8.545660257339478 Seconds', 'norm': 0.18359375}\n", + "{'loss': '2.3647894859313965', 'num_iter': 649216, 'lr': 0.001, 'time': '8.729231119155884 Seconds', 'norm': 0.1787109375}\n", + "{'loss': '2.3467750549316406', 'num_iter': 649728, 'lr': 0.001, 'time': '8.33518648147583 Seconds', 'norm': 0.2080078125}\n", + "{'loss': '2.393139600753784', 'num_iter': 650240, 'lr': 0.001, 'time': '8.298295497894287 Seconds', 'norm': 0.193359375}\n", + "{'loss': '2.3532838821411133', 'num_iter': 650752, 'lr': 0.001, 'time': '8.194882154464722 Seconds', 'norm': 0.2578125}\n", + "{'loss': '2.3924577236175537', 'num_iter': 651264, 'lr': 0.001, 'time': '7.988342523574829 Seconds', 'norm': 0.208984375}\n", + "{'loss': '2.3247697353363037', 'num_iter': 651776, 'lr': 0.001, 'time': '8.483417272567749 Seconds', 'norm': 0.1875}\n", + "{'loss': '2.374284029006958', 'num_iter': 652288, 'lr': 0.001, 'time': '8.562801599502563 Seconds', 'norm': 0.205078125}\n", + "{'loss': '2.3267228603363037', 'num_iter': 652800, 'lr': 0.001, 'time': '8.958294153213501 Seconds', 'norm': 0.2373046875}\n", + "{'loss': '2.3532586097717285', 'num_iter': 653312, 'lr': 0.001, 'time': '8.830855131149292 Seconds', 'norm': 0.2041015625}\n", + "{'loss': '2.3467612266540527', 'num_iter': 653824, 'lr': 0.001, 'time': '8.2748281955719 Seconds', 'norm': 0.1923828125}\n", + "{'loss': '2.3345677852630615', 'num_iter': 654336, 'lr': 0.001, 'time': '8.473280668258667 Seconds', 'norm': 0.181640625}\n", + "{'loss': '2.2374520301818848', 'num_iter': 654848, 'lr': 0.001, 'time': '9.528331518173218 Seconds', 'norm': 0.1953125}\n", + "{'loss': '2.3981587886810303', 'num_iter': 655360, 'lr': 0.001, 'time': '8.673738479614258 Seconds', 'norm': 0.2021484375}\n", + "{'loss': '2.3497040271759033', 'num_iter': 655872, 'lr': 0.001, 'time': '14.328133344650269 Seconds', 'norm': 0.19921875}\n", + "{'loss': '2.339686870574951', 'num_iter': 656384, 'lr': 0.001, 'time': '8.379092693328857 Seconds', 'norm': 0.2265625}\n", + "{'loss': '2.3845388889312744', 'num_iter': 656896, 'lr': 0.001, 'time': '8.012322664260864 Seconds', 'norm': 0.2001953125}\n", + "{'loss': '2.332637071609497', 'num_iter': 657408, 'lr': 0.001, 'time': '8.846370697021484 Seconds', 'norm': 0.2119140625}\n", + "{'loss': '2.3946821689605713', 'num_iter': 657920, 'lr': 0.001, 'time': '8.433468341827393 Seconds', 'norm': 0.2392578125}\n", + "{'loss': '2.399364471435547', 'num_iter': 658432, 'lr': 0.001, 'time': '8.246594667434692 Seconds', 'norm': 0.19140625}\n", + "{'loss': '2.4059174060821533', 'num_iter': 658944, 'lr': 0.001, 'time': '8.012151002883911 Seconds', 'norm': 0.224609375}\n", + "{'loss': '2.4295337200164795', 'num_iter': 659456, 'lr': 0.001, 'time': '8.716046333312988 Seconds', 'norm': 0.265625}\n", + "{'loss': '2.2841134071350098', 'num_iter': 659968, 'lr': 0.001, 'time': '9.00838017463684 Seconds', 'norm': 0.1962890625}\n", + "{'loss': '2.3642125129699707', 'num_iter': 660480, 'lr': 0.001, 'time': '8.37971305847168 Seconds', 'norm': 0.2041015625}\n", + "{'loss': '2.330617666244507', 'num_iter': 660992, 'lr': 0.001, 'time': '8.442567110061646 Seconds', 'norm': 0.2275390625}\n", + "{'loss': '2.4595072269439697', 'num_iter': 661504, 'lr': 0.001, 'time': '8.055999517440796 Seconds', 'norm': 0.1650390625}\n", + "{'loss': '2.387519359588623', 'num_iter': 662016, 'lr': 0.001, 'time': '8.22522759437561 Seconds', 'norm': 0.251953125}\n", + "{'loss': '2.427163600921631', 'num_iter': 662528, 'lr': 0.001, 'time': '7.9897589683532715 Seconds', 'norm': 0.21875}\n", + "{'loss': '2.3353583812713623', 'num_iter': 663040, 'lr': 0.001, 'time': '8.687860488891602 Seconds', 'norm': 0.1904296875}\n", + "{'loss': '2.3904309272766113', 'num_iter': 663552, 'lr': 0.001, 'time': '9.080407619476318 Seconds', 'norm': 0.21484375}\n", + "{'loss': '2.346625566482544', 'num_iter': 664064, 'lr': 0.001, 'time': '12.215171813964844 Seconds', 'norm': 0.181640625}\n", + "{'loss': '2.4009482860565186', 'num_iter': 664576, 'lr': 0.001, 'time': '8.816033601760864 Seconds', 'norm': 0.205078125}\n", + "{'loss': '2.4144814014434814', 'num_iter': 665088, 'lr': 0.001, 'time': '8.993697881698608 Seconds', 'norm': 0.248046875}\n", + "{'loss': '2.38824725151062', 'num_iter': 665600, 'lr': 0.001, 'time': '8.25989055633545 Seconds', 'norm': 0.21875}\n", + "{'loss': '2.337035894393921', 'num_iter': 666112, 'lr': 0.001, 'time': '8.396650075912476 Seconds', 'norm': 0.1806640625}\n", + "{'loss': '2.366887092590332', 'num_iter': 666624, 'lr': 0.001, 'time': '8.011780261993408 Seconds', 'norm': 0.1787109375}\n", + "{'loss': '2.3839101791381836', 'num_iter': 667136, 'lr': 0.001, 'time': '8.311134815216064 Seconds', 'norm': 0.181640625}\n", + "{'loss': '2.3517966270446777', 'num_iter': 667648, 'lr': 0.001, 'time': '8.250505208969116 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.423513412475586', 'num_iter': 668160, 'lr': 0.001, 'time': '7.814132928848267 Seconds', 'norm': 0.2109375}\n", + "{'loss': '2.3465211391448975', 'num_iter': 668672, 'lr': 0.001, 'time': '8.334900379180908 Seconds', 'norm': 0.2275390625}\n", + "{'loss': '2.333303213119507', 'num_iter': 669184, 'lr': 0.001, 'time': '8.226865530014038 Seconds', 'norm': 0.19140625}\n", + "{'loss': '2.3723294734954834', 'num_iter': 669696, 'lr': 0.001, 'time': '8.089698791503906 Seconds', 'norm': 0.1875}\n", + "{'loss': '2.3438475131988525', 'num_iter': 670208, 'lr': 0.001, 'time': '8.545056104660034 Seconds', 'norm': 0.185546875}\n", + "{'loss': '2.3451623916625977', 'num_iter': 670720, 'lr': 0.001, 'time': '8.73672890663147 Seconds', 'norm': 0.2412109375}\n", + "{'loss': '2.367323637008667', 'num_iter': 671232, 'lr': 0.001, 'time': '8.173423051834106 Seconds', 'norm': 0.2314453125}\n", + "{'loss': '2.4064366817474365', 'num_iter': 671744, 'lr': 0.001, 'time': '8.308980703353882 Seconds', 'norm': 0.2734375}\n", + "{'loss': '2.451188087463379', 'num_iter': 672256, 'lr': 0.001, 'time': '8.042839765548706 Seconds', 'norm': 0.234375}\n", + "{'loss': '2.43576979637146', 'num_iter': 672768, 'lr': 0.001, 'time': '7.9308459758758545 Seconds', 'norm': 0.23046875}\n", + "{'loss': '2.4370577335357666', 'num_iter': 673280, 'lr': 0.001, 'time': '8.233245134353638 Seconds', 'norm': 0.248046875}\n", + "{'loss': '2.3719675540924072', 'num_iter': 673792, 'lr': 0.001, 'time': '9.063046932220459 Seconds', 'norm': 0.22265625}\n", + "{'loss': '2.399625062942505', 'num_iter': 674304, 'lr': 0.001, 'time': '8.83639907836914 Seconds', 'norm': 0.212890625}\n", + "{'loss': '2.371668815612793', 'num_iter': 674816, 'lr': 0.001, 'time': '8.811591148376465 Seconds', 'norm': 0.263671875}\n", + "{'loss': '2.3530454635620117', 'num_iter': 675328, 'lr': 0.001, 'time': '8.853727340698242 Seconds', 'norm': 0.2294921875}\n", + "{'loss': '2.3834002017974854', 'num_iter': 675840, 'lr': 0.001, 'time': '8.286590576171875 Seconds', 'norm': 0.205078125}\n", + "{'loss': '2.4113717079162598', 'num_iter': 676352, 'lr': 0.001, 'time': '8.373921155929565 Seconds', 'norm': 0.2275390625}\n", + "{'loss': '2.329602003097534', 'num_iter': 676864, 'lr': 0.001, 'time': '8.643926620483398 Seconds', 'norm': 0.2578125}\n", + "{'loss': '2.405308246612549', 'num_iter': 677376, 'lr': 0.001, 'time': '8.348886013031006 Seconds', 'norm': 0.2138671875}\n", + "{'loss': '2.4455180168151855', 'num_iter': 677888, 'lr': 0.001, 'time': '8.0652334690094 Seconds', 'norm': 0.2158203125}\n", + "{'loss': '2.3772966861724854', 'num_iter': 678400, 'lr': 0.001, 'time': '7.993346691131592 Seconds', 'norm': 0.2158203125}\n", + "{'loss': '2.4025447368621826', 'num_iter': 678912, 'lr': 0.001, 'time': '7.97290825843811 Seconds', 'norm': 0.2412109375}\n", + "{'loss': '2.4037673473358154', 'num_iter': 679424, 'lr': 0.001, 'time': '8.221102237701416 Seconds', 'norm': 0.267578125}\n", + "{'loss': '2.3877718448638916', 'num_iter': 679936, 'lr': 0.001, 'time': '8.353402137756348 Seconds', 'norm': 0.267578125}\n", + "{'loss': '2.450486660003662', 'num_iter': 680448, 'lr': 0.001, 'time': '8.212786436080933 Seconds', 'norm': 0.2265625}\n", + "{'loss': '2.4074106216430664', 'num_iter': 680960, 'lr': 0.001, 'time': '8.362509965896606 Seconds', 'norm': 0.19140625}\n", + "{'loss': '2.390801429748535', 'num_iter': 681472, 'lr': 0.001, 'time': '8.20784616470337 Seconds', 'norm': 0.251953125}\n", + "{'loss': '2.4242875576019287', 'num_iter': 681984, 'lr': 0.001, 'time': '8.406002759933472 Seconds', 'norm': 0.2138671875}\n", + "{'loss': '2.3238768577575684', 'num_iter': 682496, 'lr': 0.001, 'time': '9.554839372634888 Seconds', 'norm': 0.1923828125}\n", + "{'loss': '2.3097805976867676', 'num_iter': 683008, 'lr': 0.001, 'time': '9.30188250541687 Seconds', 'norm': 0.220703125}\n", + "{'loss': '2.3167126178741455', 'num_iter': 683520, 'lr': 0.001, 'time': '8.562800884246826 Seconds', 'norm': 0.2021484375}\n", + "{'loss': '2.39569354057312', 'num_iter': 684032, 'lr': 0.001, 'time': '8.208535194396973 Seconds', 'norm': 0.2294921875}\n", + "{'loss': '2.3596465587615967', 'num_iter': 684544, 'lr': 0.001, 'time': '8.599244117736816 Seconds', 'norm': 0.2431640625}\n", + "{'loss': '2.461616039276123', 'num_iter': 685056, 'lr': 0.001, 'time': '8.065354347229004 Seconds', 'norm': 0.2099609375}\n", + "{'loss': '2.4226646423339844', 'num_iter': 685568, 'lr': 0.001, 'time': '7.9741058349609375 Seconds', 'norm': 0.1904296875}\n", + "{'loss': '2.3892011642456055', 'num_iter': 686080, 'lr': 0.001, 'time': '8.27155089378357 Seconds', 'norm': 0.1796875}\n", + "{'loss': '2.414668083190918', 'num_iter': 686592, 'lr': 0.001, 'time': '8.38721251487732 Seconds', 'norm': 0.185546875}\n", + "{'loss': '2.387638568878174', 'num_iter': 687104, 'lr': 0.001, 'time': '8.230915307998657 Seconds', 'norm': 0.1875}\n", + "{'loss': '2.4185614585876465', 'num_iter': 687616, 'lr': 0.001, 'time': '8.015135765075684 Seconds', 'norm': 0.17578125}\n", + "{'loss': '2.4360361099243164', 'num_iter': 688128, 'lr': 0.001, 'time': '7.82700777053833 Seconds', 'norm': 0.21875}\n", + "{'loss': '2.3382253646850586', 'num_iter': 688640, 'lr': 0.001, 'time': '14.422387599945068 Seconds', 'norm': 0.1845703125}\n", + "{'loss': '2.4163129329681396', 'num_iter': 689152, 'lr': 0.001, 'time': '8.031607389450073 Seconds', 'norm': 0.19140625}\n", + "{'loss': '2.417785167694092', 'num_iter': 689664, 'lr': 0.001, 'time': '8.566045999526978 Seconds', 'norm': 0.18359375}\n", + "{'loss': '2.3660600185394287', 'num_iter': 690176, 'lr': 0.001, 'time': '8.779966354370117 Seconds', 'norm': 0.1953125}\n", + "{'loss': '2.3807523250579834', 'num_iter': 690688, 'lr': 0.001, 'time': '9.497091293334961 Seconds', 'norm': 0.1875}\n", + "{'loss': '2.3878567218780518', 'num_iter': 691200, 'lr': 0.001, 'time': '9.027363777160645 Seconds', 'norm': 0.181640625}\n", + "{'loss': '2.406618595123291', 'num_iter': 691712, 'lr': 0.001, 'time': '8.893636226654053 Seconds', 'norm': 0.1875}\n", + "{'loss': '2.257495641708374', 'num_iter': 692224, 'lr': 0.001, 'time': '8.941604614257812 Seconds', 'norm': 0.17578125}\n", + "{'loss': '2.409153461456299', 'num_iter': 692736, 'lr': 0.001, 'time': '8.427781343460083 Seconds', 'norm': 0.1748046875}\n", + "{'loss': '2.4108755588531494', 'num_iter': 693248, 'lr': 0.001, 'time': '8.164016962051392 Seconds', 'norm': 0.1552734375}\n", + "{'loss': '2.394221067428589', 'num_iter': 693760, 'lr': 0.001, 'time': '8.152847290039062 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.3284800052642822', 'num_iter': 694272, 'lr': 0.001, 'time': '8.540859699249268 Seconds', 'norm': 0.1728515625}\n", + "{'loss': '2.333846092224121', 'num_iter': 694784, 'lr': 0.001, 'time': '8.652411937713623 Seconds', 'norm': 0.1728515625}\n", + "{'loss': '2.3685507774353027', 'num_iter': 695296, 'lr': 0.001, 'time': '8.25937557220459 Seconds', 'norm': 0.2001953125}\n", + "{'loss': '2.3840270042419434', 'num_iter': 695808, 'lr': 0.001, 'time': '8.16422438621521 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.338435173034668', 'num_iter': 696320, 'lr': 0.001, 'time': '8.365530252456665 Seconds', 'norm': 0.1845703125}\n", + "{'loss': '2.3768246173858643', 'num_iter': 696832, 'lr': 0.001, 'time': '8.208390712738037 Seconds', 'norm': 0.216796875}\n", + "{'loss': '2.346557855606079', 'num_iter': 697344, 'lr': 0.001, 'time': '8.295830965042114 Seconds', 'norm': 0.21484375}\n", + "{'loss': '2.3652188777923584', 'num_iter': 697856, 'lr': 0.001, 'time': '8.390692234039307 Seconds', 'norm': 0.259765625}\n", + "{'loss': '2.3218917846679688', 'num_iter': 698368, 'lr': 0.001, 'time': '8.561245918273926 Seconds', 'norm': 0.232421875}\n", + "{'loss': '2.317875385284424', 'num_iter': 698880, 'lr': 0.001, 'time': '8.095212697982788 Seconds', 'norm': 0.208984375}\n", + "{'loss': '2.351576089859009', 'num_iter': 699392, 'lr': 0.001, 'time': '8.75148892402649 Seconds', 'norm': 0.2451171875}\n", + "{'loss': '2.4038450717926025', 'num_iter': 699904, 'lr': 0.001, 'time': '9.00093388557434 Seconds', 'norm': 0.251953125}\n", + "{'loss': '2.3461754322052', 'num_iter': 700416, 'lr': 0.001, 'time': '9.543450117111206 Seconds', 'norm': 0.21484375}\n", + "{'loss': '2.3590645790100098', 'num_iter': 700928, 'lr': 0.001, 'time': '8.214807271957397 Seconds', 'norm': 0.2275390625}\n", + "{'loss': '2.4140963554382324', 'num_iter': 701440, 'lr': 0.001, 'time': '8.104125022888184 Seconds', 'norm': 0.25}\n", + "{'loss': '2.35477876663208', 'num_iter': 701952, 'lr': 0.001, 'time': '8.482100486755371 Seconds', 'norm': 0.2041015625}\n", + "{'loss': '2.3457119464874268', 'num_iter': 702464, 'lr': 0.001, 'time': '8.95743989944458 Seconds', 'norm': 0.19921875}\n", + "{'loss': '2.3447766304016113', 'num_iter': 702976, 'lr': 0.001, 'time': '8.580894947052002 Seconds', 'norm': 0.197265625}\n", + "{'loss': '2.2708234786987305', 'num_iter': 703488, 'lr': 0.001, 'time': '9.325986862182617 Seconds', 'norm': 0.2197265625}\n", + "{'loss': '2.354858875274658', 'num_iter': 704000, 'lr': 0.001, 'time': '8.229377508163452 Seconds', 'norm': 0.2001953125}\n", + "{'loss': '2.427642822265625', 'num_iter': 704512, 'lr': 0.001, 'time': '8.364332437515259 Seconds', 'norm': 0.189453125}\n", + "{'loss': '2.3820040225982666', 'num_iter': 705024, 'lr': 0.001, 'time': '8.516388416290283 Seconds', 'norm': 0.224609375}\n", + "{'loss': '2.272197723388672', 'num_iter': 705536, 'lr': 0.001, 'time': '8.683784484863281 Seconds', 'norm': 0.1923828125}\n", + "{'loss': '2.368396759033203', 'num_iter': 706048, 'lr': 0.001, 'time': '8.24307632446289 Seconds', 'norm': 0.1943359375}\n", + "{'loss': '2.304460048675537', 'num_iter': 706560, 'lr': 0.001, 'time': '8.845685482025146 Seconds', 'norm': 0.240234375}\n", + "{'loss': '2.4523932933807373', 'num_iter': 707072, 'lr': 0.001, 'time': '8.514280796051025 Seconds', 'norm': 0.2197265625}\n", + "{'loss': '2.3459036350250244', 'num_iter': 707584, 'lr': 0.001, 'time': '8.994317531585693 Seconds', 'norm': 0.1943359375}\n", + "{'loss': '2.3827950954437256', 'num_iter': 708096, 'lr': 0.001, 'time': '8.18508267402649 Seconds', 'norm': 0.1982421875}\n", + "{'loss': '2.373605966567993', 'num_iter': 708608, 'lr': 0.001, 'time': '9.021398782730103 Seconds', 'norm': 0.1884765625}\n", + "{'loss': '2.3458356857299805', 'num_iter': 709120, 'lr': 0.001, 'time': '11.316767692565918 Seconds', 'norm': 0.201171875}\n", + "{'loss': '2.351620674133301', 'num_iter': 709632, 'lr': 0.001, 'time': '8.785996675491333 Seconds', 'norm': 0.1630859375}\n", + "{'loss': '2.4232828617095947', 'num_iter': 710144, 'lr': 0.001, 'time': '8.139104843139648 Seconds', 'norm': 0.2333984375}\n", + "{'loss': '2.3750877380371094', 'num_iter': 710656, 'lr': 0.001, 'time': '8.511762619018555 Seconds', 'norm': 0.2041015625}\n", + "{'loss': '2.3744094371795654', 'num_iter': 711168, 'lr': 0.001, 'time': '8.184514999389648 Seconds', 'norm': 0.1904296875}\n", + "{'loss': '2.389399528503418', 'num_iter': 711680, 'lr': 0.001, 'time': '8.176487445831299 Seconds', 'norm': 0.1982421875}\n", + "{'loss': '2.3525607585906982', 'num_iter': 712192, 'lr': 0.001, 'time': '8.202296018600464 Seconds', 'norm': 0.1787109375}\n", + "{'loss': '2.354349374771118', 'num_iter': 712704, 'lr': 0.001, 'time': '8.409164428710938 Seconds', 'norm': 0.16015625}\n", + "{'loss': '2.3285441398620605', 'num_iter': 713216, 'lr': 0.001, 'time': '8.70640516281128 Seconds', 'norm': 0.1787109375}\n", + "{'loss': '2.318723678588867', 'num_iter': 713728, 'lr': 0.001, 'time': '8.705013275146484 Seconds', 'norm': 0.1591796875}\n", + "{'loss': '2.336822509765625', 'num_iter': 714240, 'lr': 0.001, 'time': '8.937048435211182 Seconds', 'norm': 0.2001953125}\n", + "{'loss': '2.4067983627319336', 'num_iter': 714752, 'lr': 0.001, 'time': '8.208343505859375 Seconds', 'norm': 0.197265625}\n", + "{'loss': '2.408275604248047', 'num_iter': 715264, 'lr': 0.001, 'time': '8.623353958129883 Seconds', 'norm': 0.1875}\n", + "{'loss': '2.363598346710205', 'num_iter': 715776, 'lr': 0.001, 'time': '8.31336236000061 Seconds', 'norm': 0.1875}\n", + "{'loss': '2.4100029468536377', 'num_iter': 716288, 'lr': 0.001, 'time': '8.233347177505493 Seconds', 'norm': 0.26171875}\n", + "{'loss': '2.3889777660369873', 'num_iter': 716800, 'lr': 0.001, 'time': '8.50455641746521 Seconds', 'norm': 0.189453125}\n", + "{'loss': '2.364786386489868', 'num_iter': 717312, 'lr': 0.001, 'time': '9.332595825195312 Seconds', 'norm': 0.2275390625}\n", + "{'loss': '2.373316526412964', 'num_iter': 717824, 'lr': 0.001, 'time': '9.273631572723389 Seconds', 'norm': 0.1767578125}\n", + "{'loss': '2.3847739696502686', 'num_iter': 718336, 'lr': 0.001, 'time': '8.284353971481323 Seconds', 'norm': 0.1904296875}\n", + "{'loss': '2.461705207824707', 'num_iter': 718848, 'lr': 0.001, 'time': '8.030536651611328 Seconds', 'norm': 0.2119140625}\n", + "{'loss': '2.3041930198669434', 'num_iter': 719360, 'lr': 0.001, 'time': '8.932831525802612 Seconds', 'norm': 0.236328125}\n", + "{'loss': '2.3417656421661377', 'num_iter': 719872, 'lr': 0.001, 'time': '8.312368154525757 Seconds', 'norm': 0.22265625}\n", + "{'loss': '2.443758726119995', 'num_iter': 720384, 'lr': 0.001, 'time': '8.119982242584229 Seconds', 'norm': 0.2421875}\n", + "{'loss': '2.4424824714660645', 'num_iter': 720896, 'lr': 0.001, 'time': '8.063218593597412 Seconds', 'norm': 0.240234375}\n", + "{'loss': '2.307960033416748', 'num_iter': 721408, 'lr': 0.001, 'time': '13.64086627960205 Seconds', 'norm': 0.212890625}\n", + "{'loss': '2.3848776817321777', 'num_iter': 721920, 'lr': 0.001, 'time': '8.311319828033447 Seconds', 'norm': 0.2333984375}\n", + "{'loss': '2.4086740016937256', 'num_iter': 722432, 'lr': 0.001, 'time': '7.956038475036621 Seconds', 'norm': 0.263671875}\n", + "{'loss': '2.3606317043304443', 'num_iter': 722944, 'lr': 0.001, 'time': '8.639246940612793 Seconds', 'norm': 0.2373046875}\n", + "{'loss': '2.4137496948242188', 'num_iter': 723456, 'lr': 0.001, 'time': '8.204015016555786 Seconds', 'norm': 0.173828125}\n", + "{'loss': '2.317003011703491', 'num_iter': 723968, 'lr': 0.001, 'time': '8.685787200927734 Seconds', 'norm': 0.236328125}\n", + "{'loss': '2.30420184135437', 'num_iter': 724480, 'lr': 0.001, 'time': '8.622467994689941 Seconds', 'norm': 0.212890625}\n", + "{'loss': '2.3825230598449707', 'num_iter': 724992, 'lr': 0.001, 'time': '8.366938829421997 Seconds', 'norm': 0.1640625}\n", + "{'loss': '2.333308696746826', 'num_iter': 725504, 'lr': 0.001, 'time': '8.888290882110596 Seconds', 'norm': 0.205078125}\n", + "{'loss': '2.342539072036743', 'num_iter': 726016, 'lr': 0.001, 'time': '9.418739080429077 Seconds', 'norm': 0.173828125}\n", + "{'loss': '2.3932571411132812', 'num_iter': 726528, 'lr': 0.001, 'time': '8.812716245651245 Seconds', 'norm': 0.2119140625}\n", + "{'loss': '2.3493404388427734', 'num_iter': 727040, 'lr': 0.001, 'time': '8.226553201675415 Seconds', 'norm': 0.232421875}\n", + "{'loss': '2.358929395675659', 'num_iter': 727552, 'lr': 0.001, 'time': '7.921343564987183 Seconds', 'norm': 0.1845703125}\n", + "{'loss': '2.3867580890655518', 'num_iter': 728064, 'lr': 0.001, 'time': '7.938372611999512 Seconds', 'norm': 0.2265625}\n", + "{'loss': '2.395355224609375', 'num_iter': 728576, 'lr': 0.001, 'time': '8.669138431549072 Seconds', 'norm': 0.1962890625}\n", + "{'loss': '2.3649954795837402', 'num_iter': 729088, 'lr': 0.001, 'time': '8.9230375289917 Seconds', 'norm': 0.212890625}\n", + "{'loss': '2.3724656105041504', 'num_iter': 729600, 'lr': 0.001, 'time': '7.909025192260742 Seconds', 'norm': 0.220703125}\n", + "{'loss': '2.4369328022003174', 'num_iter': 730112, 'lr': 0.001, 'time': '7.782778978347778 Seconds', 'norm': 0.236328125}\n", + "{'loss': '2.3988771438598633', 'num_iter': 730624, 'lr': 0.001, 'time': '7.978844404220581 Seconds', 'norm': 0.171875}\n", + "{'loss': '2.3580620288848877', 'num_iter': 731136, 'lr': 0.001, 'time': '8.320365190505981 Seconds', 'norm': 0.1806640625}\n", + "{'loss': '2.3697896003723145', 'num_iter': 731648, 'lr': 0.001, 'time': '8.829343557357788 Seconds', 'norm': 0.212890625}\n", + "{'loss': '2.3580338954925537', 'num_iter': 732160, 'lr': 0.001, 'time': '8.733550786972046 Seconds', 'norm': 0.2197265625}\n", + "{'loss': '2.377913475036621', 'num_iter': 732672, 'lr': 0.001, 'time': '8.319149255752563 Seconds', 'norm': 0.2041015625}\n", + "{'loss': '2.331618309020996', 'num_iter': 733184, 'lr': 0.001, 'time': '8.061727046966553 Seconds', 'norm': 0.1923828125}\n", + "{'loss': '2.3210737705230713', 'num_iter': 733696, 'lr': 0.001, 'time': '8.42910385131836 Seconds', 'norm': 0.3125}\n", + "{'loss': '2.425745725631714', 'num_iter': 734208, 'lr': 0.001, 'time': '8.098965406417847 Seconds', 'norm': 0.25390625}\n", + "{'loss': '2.39971923828125', 'num_iter': 734720, 'lr': 0.001, 'time': '8.6471107006073 Seconds', 'norm': 0.1962890625}\n", + "{'loss': '2.3125109672546387', 'num_iter': 735232, 'lr': 0.001, 'time': '9.84987187385559 Seconds', 'norm': 0.255859375}\n", + "{'loss': '2.403399705886841', 'num_iter': 735744, 'lr': 0.001, 'time': '8.827985286712646 Seconds', 'norm': 0.26171875}\n", + "{'loss': '2.3966715335845947', 'num_iter': 736256, 'lr': 0.001, 'time': '8.230252265930176 Seconds', 'norm': 0.265625}\n", + "{'loss': '2.355990171432495', 'num_iter': 736768, 'lr': 0.001, 'time': '8.379397630691528 Seconds', 'norm': 0.2451171875}\n", + "{'loss': '2.384606122970581', 'num_iter': 737280, 'lr': 0.001, 'time': '8.578991174697876 Seconds', 'norm': 0.2392578125}\n", + "{'loss': '2.3989052772521973', 'num_iter': 737792, 'lr': 0.001, 'time': '7.991072654724121 Seconds', 'norm': 0.185546875}\n", + "{'loss': '2.382087230682373', 'num_iter': 738304, 'lr': 0.001, 'time': '8.38313603401184 Seconds', 'norm': 0.25}\n", + "{'loss': '2.3820204734802246', 'num_iter': 738816, 'lr': 0.001, 'time': '8.548571348190308 Seconds', 'norm': 0.20703125}\n", + "{'loss': '2.3028879165649414', 'num_iter': 739328, 'lr': 0.001, 'time': '9.087586879730225 Seconds', 'norm': 0.2294921875}\n", + "{'loss': '2.326913356781006', 'num_iter': 739840, 'lr': 0.001, 'time': '8.729943037033081 Seconds', 'norm': 0.23046875}\n", + "{'loss': '2.356125593185425', 'num_iter': 740352, 'lr': 0.001, 'time': '8.725183248519897 Seconds', 'norm': 0.193359375}\n", + "{'loss': '2.3536503314971924', 'num_iter': 740864, 'lr': 0.001, 'time': '8.565994501113892 Seconds', 'norm': 0.2041015625}\n", + "{'loss': '2.3541839122772217', 'num_iter': 741376, 'lr': 0.001, 'time': '7.933438301086426 Seconds', 'norm': 0.203125}\n", + "{'loss': '2.37839674949646', 'num_iter': 741888, 'lr': 0.001, 'time': '8.555763721466064 Seconds', 'norm': 0.25390625}\n", + "{'loss': '2.3403372764587402', 'num_iter': 742400, 'lr': 0.001, 'time': '8.398051261901855 Seconds', 'norm': 0.1943359375}\n", + "{'loss': '2.3570215702056885', 'num_iter': 742912, 'lr': 0.001, 'time': '8.16980528831482 Seconds', 'norm': 0.2734375}\n", + "{'loss': '2.4021148681640625', 'num_iter': 743424, 'lr': 0.001, 'time': '8.52931809425354 Seconds', 'norm': 0.20703125}\n", + "{'loss': '2.3736371994018555', 'num_iter': 743936, 'lr': 0.001, 'time': '8.654454231262207 Seconds', 'norm': 0.1728515625}\n", + "{'loss': '2.374248743057251', 'num_iter': 744448, 'lr': 0.001, 'time': '8.960526943206787 Seconds', 'norm': 0.2197265625}\n", + "{'loss': '2.3605589866638184', 'num_iter': 744960, 'lr': 0.001, 'time': '8.694749593734741 Seconds', 'norm': 0.205078125}\n", + "{'loss': '2.3621160984039307', 'num_iter': 745472, 'lr': 0.001, 'time': '8.040616512298584 Seconds', 'norm': 0.1796875}\n", + "{'loss': '2.312272548675537', 'num_iter': 745984, 'lr': 0.001, 'time': '8.523743867874146 Seconds', 'norm': 0.2177734375}\n", + "{'loss': '2.367387533187866', 'num_iter': 746496, 'lr': 0.001, 'time': '8.025292873382568 Seconds', 'norm': 0.1689453125}\n", + "{'loss': '2.405468225479126', 'num_iter': 747008, 'lr': 0.001, 'time': '8.0407555103302 Seconds', 'norm': 0.232421875}\n", + "{'loss': '2.409609794616699', 'num_iter': 747520, 'lr': 0.001, 'time': '7.9649858474731445 Seconds', 'norm': 0.1806640625}\n", + "{'loss': '2.3688409328460693', 'num_iter': 748032, 'lr': 0.001, 'time': '8.178140640258789 Seconds', 'norm': 0.189453125}\n", + "{'loss': '2.379575729370117', 'num_iter': 748544, 'lr': 0.001, 'time': '7.950344562530518 Seconds', 'norm': 0.1982421875}\n", + "{'loss': '2.3986763954162598', 'num_iter': 749056, 'lr': 0.001, 'time': '8.658401489257812 Seconds', 'norm': 0.19140625}\n", + "{'loss': '2.345642566680908', 'num_iter': 749568, 'lr': 0.001, 'time': '8.3818519115448 Seconds', 'norm': 0.2177734375}\n", + "{'loss': '2.363121509552002', 'num_iter': 750080, 'lr': 0.001, 'time': '8.38347601890564 Seconds', 'norm': 0.18359375}\n", + "{'loss': '2.3543601036071777', 'num_iter': 750592, 'lr': 0.001, 'time': '8.307822942733765 Seconds', 'norm': 0.2041015625}\n", + "{'loss': '2.378981590270996', 'num_iter': 751104, 'lr': 0.001, 'time': '8.249279499053955 Seconds', 'norm': 0.1884765625}\n", + "{'loss': '2.3764312267303467', 'num_iter': 751616, 'lr': 0.001, 'time': '8.425573110580444 Seconds', 'norm': 0.2080078125}\n", + "{'loss': '2.3549182415008545', 'num_iter': 752128, 'lr': 0.001, 'time': '8.26303744316101 Seconds', 'norm': 0.1982421875}\n", + "{'loss': '2.3405380249023438', 'num_iter': 752640, 'lr': 0.001, 'time': '8.857691049575806 Seconds', 'norm': 0.2216796875}\n", + "{'loss': '2.384268283843994', 'num_iter': 753152, 'lr': 0.001, 'time': '8.923279523849487 Seconds', 'norm': 0.2265625}\n", + "{'loss': '2.3971126079559326', 'num_iter': 753664, 'lr': 0.001, 'time': '8.650435447692871 Seconds', 'norm': 0.2177734375}\n", + "{'loss': '2.3343822956085205', 'num_iter': 754176, 'lr': 0.001, 'time': '16.65895938873291 Seconds', 'norm': 0.181640625}\n", + "{'loss': '2.390303611755371', 'num_iter': 754688, 'lr': 0.001, 'time': '8.126669645309448 Seconds', 'norm': 0.205078125}\n", + "{'loss': '2.3236641883850098', 'num_iter': 755200, 'lr': 0.001, 'time': '9.06437611579895 Seconds', 'norm': 0.232421875}\n", + "{'loss': '2.365595579147339', 'num_iter': 755712, 'lr': 0.001, 'time': '8.514541864395142 Seconds', 'norm': 0.1953125}\n", + "{'loss': '2.4152071475982666', 'num_iter': 756224, 'lr': 0.001, 'time': '7.981194019317627 Seconds', 'norm': 0.212890625}\n", + "{'loss': '2.3619813919067383', 'num_iter': 756736, 'lr': 0.001, 'time': '8.874803304672241 Seconds', 'norm': 0.265625}\n", + "{'loss': '2.3461225032806396', 'num_iter': 757248, 'lr': 0.001, 'time': '8.454149723052979 Seconds', 'norm': 0.21484375}\n", + "{'loss': '2.344787836074829', 'num_iter': 757760, 'lr': 0.001, 'time': '8.37270188331604 Seconds', 'norm': 0.22265625}\n", + "{'loss': '2.4288649559020996', 'num_iter': 758272, 'lr': 0.001, 'time': '8.324777364730835 Seconds', 'norm': 0.185546875}\n", + "{'loss': '2.3976051807403564', 'num_iter': 758784, 'lr': 0.001, 'time': '8.183255672454834 Seconds', 'norm': 0.2001953125}\n", + "{'loss': '2.3751511573791504', 'num_iter': 759296, 'lr': 0.001, 'time': '8.152550458908081 Seconds', 'norm': 0.1923828125}\n", + "{'loss': '2.390070915222168', 'num_iter': 759808, 'lr': 0.001, 'time': '8.33932375907898 Seconds', 'norm': 0.1875}\n", + "{'loss': '2.4489383697509766', 'num_iter': 760320, 'lr': 0.001, 'time': '7.709362745285034 Seconds', 'norm': 0.2373046875}\n", + "{'loss': '2.369088649749756', 'num_iter': 760832, 'lr': 0.001, 'time': '8.471125841140747 Seconds', 'norm': 0.185546875}\n", + "{'loss': '2.396308183670044', 'num_iter': 761344, 'lr': 0.001, 'time': '9.008852005004883 Seconds', 'norm': 0.1865234375}\n", + "{'loss': '2.456833839416504', 'num_iter': 761856, 'lr': 0.001, 'time': '8.92526650428772 Seconds', 'norm': 0.2275390625}\n", + "{'loss': '2.421004295349121', 'num_iter': 762368, 'lr': 0.001, 'time': '8.818985223770142 Seconds', 'norm': 0.25}\n", + "{'loss': '2.3722565174102783', 'num_iter': 762880, 'lr': 0.001, 'time': '9.545832395553589 Seconds', 'norm': 0.1728515625}\n", + "{'loss': '2.434572696685791', 'num_iter': 763392, 'lr': 0.001, 'time': '8.516699075698853 Seconds', 'norm': 0.2333984375}\n", + "{'loss': '2.323272943496704', 'num_iter': 763904, 'lr': 0.001, 'time': '8.421615362167358 Seconds', 'norm': 0.205078125}\n", + "{'loss': '2.365391969680786', 'num_iter': 764416, 'lr': 0.001, 'time': '8.464998960494995 Seconds', 'norm': 0.1806640625}\n", + "{'loss': '2.3682587146759033', 'num_iter': 764928, 'lr': 0.001, 'time': '8.224392890930176 Seconds', 'norm': 0.2041015625}\n", + "{'loss': '2.3705270290374756', 'num_iter': 765440, 'lr': 0.001, 'time': '8.421123027801514 Seconds', 'norm': 0.21875}\n", + "{'loss': '2.3268826007843018', 'num_iter': 765952, 'lr': 0.001, 'time': '8.581361055374146 Seconds', 'norm': 0.251953125}\n", + "{'loss': '2.375770092010498', 'num_iter': 766464, 'lr': 0.001, 'time': '8.474016189575195 Seconds', 'norm': 0.232421875}\n", + "{'loss': '2.407719373703003', 'num_iter': 766976, 'lr': 0.001, 'time': '8.113345623016357 Seconds', 'norm': 0.203125}\n", + "{'loss': '2.392106294631958', 'num_iter': 767488, 'lr': 0.001, 'time': '8.216251850128174 Seconds', 'norm': 0.2119140625}\n", + "{'loss': '2.3944501876831055', 'num_iter': 768000, 'lr': 0.001, 'time': '8.246667385101318 Seconds', 'norm': 0.193359375}\n", + "{'loss': '2.4022397994995117', 'num_iter': 768512, 'lr': 0.001, 'time': '8.402244091033936 Seconds', 'norm': 0.2158203125}\n", + "{'loss': '2.3760335445404053', 'num_iter': 769024, 'lr': 0.001, 'time': '8.45118761062622 Seconds', 'norm': 0.205078125}\n", + "{'loss': '2.382223606109619', 'num_iter': 769536, 'lr': 0.001, 'time': '8.302098989486694 Seconds', 'norm': 0.21484375}\n", + "{'loss': '2.44134259223938', 'num_iter': 770048, 'lr': 0.001, 'time': '8.022501230239868 Seconds', 'norm': 0.208984375}\n", + "{'loss': '2.422524929046631', 'num_iter': 770560, 'lr': 0.001, 'time': '8.164295196533203 Seconds', 'norm': 0.2177734375}\n", + "{'loss': '2.326840877532959', 'num_iter': 771072, 'lr': 0.001, 'time': '9.191735029220581 Seconds', 'norm': 0.169921875}\n", + "{'loss': '2.3942060470581055', 'num_iter': 771584, 'lr': 0.001, 'time': '8.963021516799927 Seconds', 'norm': 0.203125}\n", + "{'loss': '2.3671324253082275', 'num_iter': 772096, 'lr': 0.001, 'time': '9.017479658126831 Seconds', 'norm': 0.2041015625}\n", + "{'loss': '2.4198648929595947', 'num_iter': 772608, 'lr': 0.001, 'time': '8.21358036994934 Seconds', 'norm': 0.236328125}\n", + "{'loss': '2.397974729537964', 'num_iter': 773120, 'lr': 0.001, 'time': '8.482250928878784 Seconds', 'norm': 0.228515625}\n", + "{'loss': '2.2736976146698', 'num_iter': 773632, 'lr': 0.001, 'time': '9.014837980270386 Seconds', 'norm': 0.1923828125}\n", + "{'loss': '2.373763084411621', 'num_iter': 774144, 'lr': 0.001, 'time': '8.637209177017212 Seconds', 'norm': 0.2119140625}\n", + "{'loss': '2.4454782009124756', 'num_iter': 774656, 'lr': 0.001, 'time': '8.354460716247559 Seconds', 'norm': 0.244140625}\n", + "{'loss': '2.37192964553833', 'num_iter': 775168, 'lr': 0.001, 'time': '8.342412948608398 Seconds', 'norm': 0.2470703125}\n", + "{'loss': '2.4010133743286133', 'num_iter': 775680, 'lr': 0.001, 'time': '8.179382562637329 Seconds', 'norm': 0.25}\n", + "{'loss': '2.381502151489258', 'num_iter': 776192, 'lr': 0.001, 'time': '8.515563011169434 Seconds', 'norm': 0.189453125}\n", + "{'loss': '2.3604209423065186', 'num_iter': 776704, 'lr': 0.001, 'time': '8.532121181488037 Seconds', 'norm': 0.1806640625}\n", + "{'loss': '2.4262919425964355', 'num_iter': 777216, 'lr': 0.001, 'time': '8.154269456863403 Seconds', 'norm': 0.19921875}\n", + "{'loss': '2.3888309001922607', 'num_iter': 777728, 'lr': 0.001, 'time': '8.270989656448364 Seconds', 'norm': 0.17578125}\n", + "{'loss': '2.3659980297088623', 'num_iter': 778240, 'lr': 0.001, 'time': '8.23820948600769 Seconds', 'norm': 0.236328125}\n", + "{'loss': '2.339022636413574', 'num_iter': 778752, 'lr': 0.001, 'time': '8.46707820892334 Seconds', 'norm': 0.1875}\n", + "{'loss': '2.322268009185791', 'num_iter': 779264, 'lr': 0.001, 'time': '9.614853382110596 Seconds', 'norm': 0.2265625}\n", + "{'loss': '2.3091750144958496', 'num_iter': 779776, 'lr': 0.001, 'time': '9.514936208724976 Seconds', 'norm': 0.1982421875}\n", + "{'loss': '2.320230007171631', 'num_iter': 780288, 'lr': 0.001, 'time': '9.030786275863647 Seconds', 'norm': 0.15625}\n", + "{'loss': '2.340228319168091', 'num_iter': 780800, 'lr': 0.001, 'time': '9.044099569320679 Seconds', 'norm': 0.16796875}\n", + "{'loss': '2.3585407733917236', 'num_iter': 781312, 'lr': 0.001, 'time': '8.274870872497559 Seconds', 'norm': 0.251953125}\n", + "{'loss': '2.393615961074829', 'num_iter': 781824, 'lr': 0.001, 'time': '8.626776456832886 Seconds', 'norm': 0.2138671875}\n", + "{'loss': '2.3572030067443848', 'num_iter': 782336, 'lr': 0.001, 'time': '8.257884502410889 Seconds', 'norm': 0.189453125}\n", + "{'loss': '2.3881568908691406', 'num_iter': 782848, 'lr': 0.001, 'time': '8.827781677246094 Seconds', 'norm': 0.181640625}\n", + "{'loss': '2.3094027042388916', 'num_iter': 783360, 'lr': 0.001, 'time': '8.859702825546265 Seconds', 'norm': 0.154296875}\n", + "{'loss': '2.3488481044769287', 'num_iter': 783872, 'lr': 0.001, 'time': '8.464357614517212 Seconds', 'norm': 0.181640625}\n", + "{'loss': '2.389861583709717', 'num_iter': 784384, 'lr': 0.001, 'time': '8.42518949508667 Seconds', 'norm': 0.1875}\n", + "{'loss': '2.36502742767334', 'num_iter': 784896, 'lr': 0.001, 'time': '8.215100288391113 Seconds', 'norm': 0.2216796875}\n", + "{'loss': '2.3794209957122803', 'num_iter': 785408, 'lr': 0.001, 'time': '8.759578227996826 Seconds', 'norm': 0.2138671875}\n", + "{'loss': '2.3992080688476562', 'num_iter': 785920, 'lr': 0.001, 'time': '8.023003339767456 Seconds', 'norm': 0.2412109375}\n", + "{'loss': '2.3347768783569336', 'num_iter': 786432, 'lr': 0.001, 'time': '8.501240968704224 Seconds', 'norm': 0.24609375}\n", + "{'loss': '2.3850347995758057', 'num_iter': 786944, 'lr': 0.001, 'time': '13.61018443107605 Seconds', 'norm': 0.2177734375}\n", + "{'loss': '2.416193723678589', 'num_iter': 787456, 'lr': 0.001, 'time': '8.032252788543701 Seconds', 'norm': 0.177734375}\n", + "{'loss': '2.3835291862487793', 'num_iter': 787968, 'lr': 0.001, 'time': '8.536860227584839 Seconds', 'norm': 0.216796875}\n", + "{'loss': '2.361847400665283', 'num_iter': 788480, 'lr': 0.001, 'time': '9.49849534034729 Seconds', 'norm': 0.1865234375}\n", + "{'loss': '2.3317060470581055', 'num_iter': 788992, 'lr': 0.001, 'time': '9.646146774291992 Seconds', 'norm': 0.1806640625}\n", + "{'loss': '2.387059211730957', 'num_iter': 789504, 'lr': 0.001, 'time': '9.141222715377808 Seconds', 'norm': 0.259765625}\n", + "{'loss': '2.4352519512176514', 'num_iter': 790016, 'lr': 0.001, 'time': '8.384026527404785 Seconds', 'norm': 0.185546875}\n", + "{'loss': '2.377103567123413', 'num_iter': 790528, 'lr': 0.001, 'time': '8.12414026260376 Seconds', 'norm': 0.166015625}\n", + "{'loss': '2.314842700958252', 'num_iter': 791040, 'lr': 0.001, 'time': '8.509775638580322 Seconds', 'norm': 0.1630859375}\n", + "{'loss': '2.335874080657959', 'num_iter': 791552, 'lr': 0.001, 'time': '8.701306104660034 Seconds', 'norm': 0.1875}\n", + "{'loss': '2.4305436611175537', 'num_iter': 792064, 'lr': 0.001, 'time': '8.028557777404785 Seconds', 'norm': 0.2119140625}\n", + "{'loss': '2.4318575859069824', 'num_iter': 792576, 'lr': 0.001, 'time': '8.062023878097534 Seconds', 'norm': 0.2021484375}\n", + "{'loss': '2.437730312347412', 'num_iter': 793088, 'lr': 0.001, 'time': '8.275224924087524 Seconds', 'norm': 0.2119140625}\n", + "{'loss': '2.379845380783081', 'num_iter': 793600, 'lr': 0.001, 'time': '8.558674573898315 Seconds', 'norm': 0.205078125}\n", + "{'loss': '2.4003446102142334', 'num_iter': 794112, 'lr': 0.001, 'time': '8.7104172706604 Seconds', 'norm': 0.1875}\n", + "{'loss': '2.412031412124634', 'num_iter': 794624, 'lr': 0.001, 'time': '8.364750623703003 Seconds', 'norm': 0.173828125}\n", + "{'loss': '2.257524251937866', 'num_iter': 795136, 'lr': 0.001, 'time': '9.307363271713257 Seconds', 'norm': 0.1708984375}\n", + "{'loss': '2.396242380142212', 'num_iter': 795648, 'lr': 0.001, 'time': '8.224562406539917 Seconds', 'norm': 0.2001953125}\n", + "{'loss': '2.3139827251434326', 'num_iter': 796160, 'lr': 0.001, 'time': '8.84564995765686 Seconds', 'norm': 0.2041015625}\n", + "{'loss': '2.402904510498047', 'num_iter': 796672, 'lr': 0.001, 'time': '8.161415100097656 Seconds', 'norm': 0.2275390625}\n", + "{'loss': '2.307274580001831', 'num_iter': 797184, 'lr': 0.001, 'time': '8.710855484008789 Seconds', 'norm': 0.23046875}\n", + "{'loss': '2.39093279838562', 'num_iter': 797696, 'lr': 0.001, 'time': '9.198604822158813 Seconds', 'norm': 0.1796875}\n", + "{'loss': '2.3860392570495605', 'num_iter': 798208, 'lr': 0.001, 'time': '8.564092636108398 Seconds', 'norm': 0.212890625}\n", + "{'loss': '2.3963725566864014', 'num_iter': 798720, 'lr': 0.001, 'time': '11.040833950042725 Seconds', 'norm': 0.2275390625}\n", + "{'loss': '2.3934688568115234', 'num_iter': 799232, 'lr': 0.001, 'time': '8.104708433151245 Seconds', 'norm': 0.26171875}\n", + "{'loss': '2.4020044803619385', 'num_iter': 799744, 'lr': 0.001, 'time': '8.442531824111938 Seconds', 'norm': 0.263671875}\n", + "{'loss': '2.3381028175354004', 'num_iter': 800256, 'lr': 0.001, 'time': '8.434221029281616 Seconds', 'norm': 0.23828125}\n", + "{'loss': '2.3605990409851074', 'num_iter': 800768, 'lr': 0.001, 'time': '8.304632186889648 Seconds', 'norm': 0.201171875}\n", + "{'loss': '2.3290207386016846', 'num_iter': 801280, 'lr': 0.001, 'time': '8.58152723312378 Seconds', 'norm': 0.1962890625}\n", + "{'loss': '2.4196085929870605', 'num_iter': 801792, 'lr': 0.001, 'time': '8.1789391040802 Seconds', 'norm': 0.267578125}\n", + "{'loss': '2.3820576667785645', 'num_iter': 802304, 'lr': 0.001, 'time': '8.091378927230835 Seconds', 'norm': 0.2080078125}\n", + "{'loss': '2.3877813816070557', 'num_iter': 802816, 'lr': 0.001, 'time': '8.134432315826416 Seconds', 'norm': 0.1767578125}\n", + "{'loss': '2.417102336883545', 'num_iter': 803328, 'lr': 0.001, 'time': '8.262167692184448 Seconds', 'norm': 0.2275390625}\n", + "{'loss': '2.402839422225952', 'num_iter': 803840, 'lr': 0.001, 'time': '8.455349206924438 Seconds', 'norm': 0.205078125}\n", + "{'loss': '2.3800411224365234', 'num_iter': 804352, 'lr': 0.001, 'time': '8.39750337600708 Seconds', 'norm': 0.2236328125}\n", + "{'loss': '2.40425705909729', 'num_iter': 804864, 'lr': 0.001, 'time': '8.065066576004028 Seconds', 'norm': 0.2578125}\n", + "{'loss': '2.4008138179779053', 'num_iter': 805376, 'lr': 0.001, 'time': '8.655503273010254 Seconds', 'norm': 0.2138671875}\n", + "{'loss': '2.4031410217285156', 'num_iter': 805888, 'lr': 0.001, 'time': '8.881433963775635 Seconds', 'norm': 0.228515625}\n", + "{'loss': '2.390742540359497', 'num_iter': 806400, 'lr': 0.001, 'time': '8.707999229431152 Seconds', 'norm': 0.2294921875}\n", + "{'loss': '2.411825656890869', 'num_iter': 806912, 'lr': 0.001, 'time': '8.85196566581726 Seconds', 'norm': 0.185546875}\n", + "{'loss': '2.3640103340148926', 'num_iter': 807424, 'lr': 0.001, 'time': '8.594155550003052 Seconds', 'norm': 0.2001953125}\n", + "{'loss': '2.435727834701538', 'num_iter': 807936, 'lr': 0.001, 'time': '8.006418943405151 Seconds', 'norm': 0.1708984375}\n", + "{'loss': '2.3375298976898193', 'num_iter': 808448, 'lr': 0.001, 'time': '8.862213850021362 Seconds', 'norm': 0.2001953125}\n", + "{'loss': '2.3577218055725098', 'num_iter': 808960, 'lr': 0.001, 'time': '8.348987579345703 Seconds', 'norm': 0.1787109375}\n", + "{'loss': '2.4608328342437744', 'num_iter': 809472, 'lr': 0.001, 'time': '8.179567098617554 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.4652903079986572', 'num_iter': 809984, 'lr': 0.001, 'time': '8.256867408752441 Seconds', 'norm': 0.203125}\n", + "{'loss': '2.3549892902374268', 'num_iter': 810496, 'lr': 0.001, 'time': '8.567514896392822 Seconds', 'norm': 0.1767578125}\n", + "{'loss': '2.3743984699249268', 'num_iter': 811008, 'lr': 0.001, 'time': '8.412676095962524 Seconds', 'norm': 0.212890625}\n", + "{'loss': '2.3540031909942627', 'num_iter': 811520, 'lr': 0.001, 'time': '8.509904861450195 Seconds', 'norm': 0.203125}\n", + "{'loss': '2.3654332160949707', 'num_iter': 812032, 'lr': 0.001, 'time': '9.024910688400269 Seconds', 'norm': 0.19921875}\n", + "{'loss': '2.3596818447113037', 'num_iter': 812544, 'lr': 0.001, 'time': '8.459216594696045 Seconds', 'norm': 0.1962890625}\n", + "{'loss': '2.3674545288085938', 'num_iter': 813056, 'lr': 0.001, 'time': '8.811742544174194 Seconds', 'norm': 0.240234375}\n", + "{'loss': '2.3790316581726074', 'num_iter': 813568, 'lr': 0.001, 'time': '8.24921727180481 Seconds', 'norm': 0.2421875}\n", + "{'loss': '2.3246021270751953', 'num_iter': 814080, 'lr': 0.001, 'time': '8.932905435562134 Seconds', 'norm': 0.193359375}\n", + "{'loss': '2.3965399265289307', 'num_iter': 814592, 'lr': 0.001, 'time': '8.659034252166748 Seconds', 'norm': 0.1904296875}\n", + "{'loss': '2.3410146236419678', 'num_iter': 815104, 'lr': 0.001, 'time': '8.640002489089966 Seconds', 'norm': 0.212890625}\n", + "{'loss': '2.391836404800415', 'num_iter': 815616, 'lr': 0.001, 'time': '8.811690092086792 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.3235061168670654', 'num_iter': 816128, 'lr': 0.001, 'time': '9.04945421218872 Seconds', 'norm': 0.1708984375}\n", + "{'loss': '2.3914248943328857', 'num_iter': 816640, 'lr': 0.001, 'time': '7.9528374671936035 Seconds', 'norm': 0.1796875}\n", + "{'loss': '2.4081735610961914', 'num_iter': 817152, 'lr': 0.001, 'time': '8.15797209739685 Seconds', 'norm': 0.228515625}\n", + "{'loss': '2.3528330326080322', 'num_iter': 817664, 'lr': 0.001, 'time': '8.16339373588562 Seconds', 'norm': 0.1591796875}\n", + "{'loss': '2.387908697128296', 'num_iter': 818176, 'lr': 0.001, 'time': '8.326883316040039 Seconds', 'norm': 0.240234375}\n", + "{'loss': '2.3904027938842773', 'num_iter': 818688, 'lr': 0.001, 'time': '8.046897649765015 Seconds', 'norm': 0.2216796875}\n", + "{'loss': '2.431567907333374', 'num_iter': 819200, 'lr': 0.001, 'time': '7.909595727920532 Seconds', 'norm': 0.205078125}\n", + "{'loss': '2.3610732555389404', 'num_iter': 819712, 'lr': 0.001, 'time': '13.377800703048706 Seconds', 'norm': 0.2001953125}\n", + "{'loss': '2.3650944232940674', 'num_iter': 820224, 'lr': 0.001, 'time': '8.655376434326172 Seconds', 'norm': 0.224609375}\n", + "{'loss': '2.37011981010437', 'num_iter': 820736, 'lr': 0.001, 'time': '7.918459415435791 Seconds', 'norm': 0.244140625}\n", + "{'loss': '2.3734867572784424', 'num_iter': 821248, 'lr': 0.001, 'time': '8.289844512939453 Seconds', 'norm': 0.2177734375}\n", + "{'loss': '2.313455104827881', 'num_iter': 821760, 'lr': 0.001, 'time': '8.689671993255615 Seconds', 'norm': 0.18359375}\n", + "{'loss': '2.366702079772949', 'num_iter': 822272, 'lr': 0.001, 'time': '8.212247371673584 Seconds', 'norm': 0.22265625}\n", + "{'loss': '2.395612955093384', 'num_iter': 822784, 'lr': 0.001, 'time': '8.27155351638794 Seconds', 'norm': 0.197265625}\n", + "{'loss': '2.361896276473999', 'num_iter': 823296, 'lr': 0.001, 'time': '8.45076847076416 Seconds', 'norm': 0.2080078125}\n", + "{'loss': '2.3900461196899414', 'num_iter': 823808, 'lr': 0.001, 'time': '8.925167322158813 Seconds', 'norm': 0.2197265625}\n", + "{'loss': '2.469900131225586', 'num_iter': 824320, 'lr': 0.001, 'time': '8.902791261672974 Seconds', 'norm': 0.2294921875}\n", + "{'loss': '2.466205596923828', 'num_iter': 824832, 'lr': 0.001, 'time': '8.346994400024414 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.3738057613372803', 'num_iter': 825344, 'lr': 0.001, 'time': '8.702542066574097 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.299743175506592', 'num_iter': 825856, 'lr': 0.001, 'time': '8.815749168395996 Seconds', 'norm': 0.181640625}\n", + "{'loss': '2.452866315841675', 'num_iter': 826368, 'lr': 0.001, 'time': '8.229902029037476 Seconds', 'norm': 0.1982421875}\n", + "{'loss': '2.3249733448028564', 'num_iter': 826880, 'lr': 0.001, 'time': '8.401159763336182 Seconds', 'norm': 0.185546875}\n", + "{'loss': '2.436631679534912', 'num_iter': 827392, 'lr': 0.001, 'time': '7.765510082244873 Seconds', 'norm': 0.1884765625}\n", + "{'loss': '2.4192638397216797', 'num_iter': 827904, 'lr': 0.001, 'time': '7.856826543807983 Seconds', 'norm': 0.2470703125}\n", + "{'loss': '2.4046075344085693', 'num_iter': 828416, 'lr': 0.001, 'time': '8.55822491645813 Seconds', 'norm': 0.2138671875}\n", + "{'loss': '2.381617307662964', 'num_iter': 828928, 'lr': 0.001, 'time': '8.233508825302124 Seconds', 'norm': 0.2041015625}\n", + "{'loss': '2.3748064041137695', 'num_iter': 829440, 'lr': 0.001, 'time': '8.233696222305298 Seconds', 'norm': 0.2119140625}\n", + "{'loss': '2.347290515899658', 'num_iter': 829952, 'lr': 0.001, 'time': '8.861613750457764 Seconds', 'norm': 0.23046875}\n", + "{'loss': '2.367659568786621', 'num_iter': 830464, 'lr': 0.001, 'time': '8.653537273406982 Seconds', 'norm': 0.1708984375}\n", + "{'loss': '2.3971714973449707', 'num_iter': 830976, 'lr': 0.001, 'time': '8.232024908065796 Seconds', 'norm': 0.2041015625}\n", + "{'loss': '2.3571016788482666', 'num_iter': 831488, 'lr': 0.001, 'time': '8.437031984329224 Seconds', 'norm': 0.2158203125}\n", + "{'loss': '2.365427017211914', 'num_iter': 832000, 'lr': 0.001, 'time': '8.470939636230469 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.3459866046905518', 'num_iter': 832512, 'lr': 0.001, 'time': '8.561110734939575 Seconds', 'norm': 0.220703125}\n", + "{'loss': '2.340026378631592', 'num_iter': 833024, 'lr': 0.001, 'time': '8.619605541229248 Seconds', 'norm': 0.1708984375}\n", + "{'loss': '2.3224551677703857', 'num_iter': 833536, 'lr': 0.001, 'time': '10.029144763946533 Seconds', 'norm': 0.2421875}\n", + "{'loss': '2.3270957469940186', 'num_iter': 834048, 'lr': 0.001, 'time': '8.957895278930664 Seconds', 'norm': 0.21875}\n", + "{'loss': '2.31284761428833', 'num_iter': 834560, 'lr': 0.001, 'time': '8.674668550491333 Seconds', 'norm': 0.205078125}\n", + "{'loss': '2.3357772827148438', 'num_iter': 835072, 'lr': 0.001, 'time': '8.870048761367798 Seconds', 'norm': 0.189453125}\n", + "{'loss': '2.3145790100097656', 'num_iter': 835584, 'lr': 0.001, 'time': '8.790587902069092 Seconds', 'norm': 0.203125}\n", + "{'loss': '2.3980395793914795', 'num_iter': 836096, 'lr': 0.001, 'time': '8.39026689529419 Seconds', 'norm': 0.1787109375}\n", + "{'loss': '2.379431962966919', 'num_iter': 836608, 'lr': 0.001, 'time': '8.598976135253906 Seconds', 'norm': 0.203125}\n", + "{'loss': '2.391331434249878', 'num_iter': 837120, 'lr': 0.001, 'time': '8.369424819946289 Seconds', 'norm': 0.2392578125}\n", + "{'loss': '2.466007947921753', 'num_iter': 837632, 'lr': 0.001, 'time': '7.817820072174072 Seconds', 'norm': 0.1943359375}\n", + "{'loss': '2.3833696842193604', 'num_iter': 838144, 'lr': 0.001, 'time': '8.538643836975098 Seconds', 'norm': 0.1943359375}\n", + "{'loss': '2.357712984085083', 'num_iter': 838656, 'lr': 0.001, 'time': '8.857611894607544 Seconds', 'norm': 0.216796875}\n", + "{'loss': '2.3197128772735596', 'num_iter': 839168, 'lr': 0.001, 'time': '8.410500764846802 Seconds', 'norm': 0.173828125}\n", + "{'loss': '2.355614185333252', 'num_iter': 839680, 'lr': 0.001, 'time': '8.265276670455933 Seconds', 'norm': 0.20703125}\n", + "{'loss': '2.41454815864563', 'num_iter': 840192, 'lr': 0.001, 'time': '8.018587112426758 Seconds', 'norm': 0.208984375}\n", + "{'loss': '2.375319719314575', 'num_iter': 840704, 'lr': 0.001, 'time': '8.180247068405151 Seconds', 'norm': 0.271484375}\n", + "{'loss': '2.2976133823394775', 'num_iter': 841216, 'lr': 0.001, 'time': '8.890983819961548 Seconds', 'norm': 0.197265625}\n", + "{'loss': '2.329407215118408', 'num_iter': 841728, 'lr': 0.001, 'time': '9.111454248428345 Seconds', 'norm': 0.2041015625}\n", + "{'loss': '2.3478198051452637', 'num_iter': 842240, 'lr': 0.001, 'time': '8.646996974945068 Seconds', 'norm': 0.2578125}\n", + "{'loss': '2.3950037956237793', 'num_iter': 842752, 'lr': 0.001, 'time': '9.06429123878479 Seconds', 'norm': 0.177734375}\n", + "{'loss': '2.402510404586792', 'num_iter': 843264, 'lr': 0.001, 'time': '8.287485599517822 Seconds', 'norm': 0.1875}\n", + "{'loss': '2.2829320430755615', 'num_iter': 843776, 'lr': 0.001, 'time': '9.289963960647583 Seconds', 'norm': 0.193359375}\n", + "{'loss': '2.313001871109009', 'num_iter': 844288, 'lr': 0.001, 'time': '10.802819728851318 Seconds', 'norm': 0.2021484375}\n", + "{'loss': '2.3587570190429688', 'num_iter': 844800, 'lr': 0.001, 'time': '8.129775524139404 Seconds', 'norm': 0.21875}\n", + "{'loss': '2.3718669414520264', 'num_iter': 845312, 'lr': 0.001, 'time': '8.484242916107178 Seconds', 'norm': 0.2265625}\n", + "{'loss': '2.421088695526123', 'num_iter': 845824, 'lr': 0.001, 'time': '8.260185718536377 Seconds', 'norm': 0.2236328125}\n", + "{'loss': '2.406956195831299', 'num_iter': 846336, 'lr': 0.001, 'time': '8.28414511680603 Seconds', 'norm': 0.2255859375}\n", + "{'loss': '2.3645875453948975', 'num_iter': 846848, 'lr': 0.001, 'time': '8.48759126663208 Seconds', 'norm': 0.197265625}\n", + "{'loss': '2.314699649810791', 'num_iter': 847360, 'lr': 0.001, 'time': '8.932655572891235 Seconds', 'norm': 0.16796875}\n", + "{'loss': '2.3960394859313965', 'num_iter': 847872, 'lr': 0.001, 'time': '8.137982606887817 Seconds', 'norm': 0.1982421875}\n", + "{'loss': '2.349632978439331', 'num_iter': 848384, 'lr': 0.001, 'time': '8.24105978012085 Seconds', 'norm': 0.1806640625}\n", + "{'loss': '2.405681848526001', 'num_iter': 848896, 'lr': 0.001, 'time': '8.206320524215698 Seconds', 'norm': 0.212890625}\n", + "{'loss': '2.4043679237365723', 'num_iter': 849408, 'lr': 0.001, 'time': '8.692046165466309 Seconds', 'norm': 0.193359375}\n", + "{'loss': '2.297464370727539', 'num_iter': 849920, 'lr': 0.001, 'time': '8.693520069122314 Seconds', 'norm': 0.1982421875}\n", + "{'loss': '2.357583999633789', 'num_iter': 850432, 'lr': 0.001, 'time': '8.84159255027771 Seconds', 'norm': 0.19140625}\n", + "{'loss': '2.380152463912964', 'num_iter': 850944, 'lr': 0.001, 'time': '8.556318998336792 Seconds', 'norm': 0.1767578125}\n", + "{'loss': '2.3244118690490723', 'num_iter': 851456, 'lr': 0.001, 'time': '9.170279741287231 Seconds', 'norm': 0.18359375}\n", + "{'loss': '2.371706962585449', 'num_iter': 851968, 'lr': 0.001, 'time': '8.03391695022583 Seconds', 'norm': 0.193359375}\n", + "{'loss': '2.3490447998046875', 'num_iter': 852480, 'lr': 0.001, 'time': '14.445624828338623 Seconds', 'norm': 0.2060546875}\n", + "{'loss': '2.418030261993408', 'num_iter': 852992, 'lr': 0.001, 'time': '8.070535898208618 Seconds', 'norm': 0.1767578125}\n", + "{'loss': '2.3791966438293457', 'num_iter': 853504, 'lr': 0.001, 'time': '7.99699068069458 Seconds', 'norm': 0.1962890625}\n", + "{'loss': '2.3647425174713135', 'num_iter': 854016, 'lr': 0.001, 'time': '8.39724326133728 Seconds', 'norm': 0.1953125}\n", + "{'loss': '2.384296417236328', 'num_iter': 854528, 'lr': 0.001, 'time': '8.270471096038818 Seconds', 'norm': 0.212890625}\n", + "{'loss': '2.3251872062683105', 'num_iter': 855040, 'lr': 0.001, 'time': '8.594213008880615 Seconds', 'norm': 0.18359375}\n", + "{'loss': '2.3870224952697754', 'num_iter': 855552, 'lr': 0.001, 'time': '8.406900644302368 Seconds', 'norm': 0.2138671875}\n", + "{'loss': '2.442504644393921', 'num_iter': 856064, 'lr': 0.001, 'time': '7.873719692230225 Seconds', 'norm': 0.21875}\n", + "{'loss': '2.3610081672668457', 'num_iter': 856576, 'lr': 0.001, 'time': '8.21334171295166 Seconds', 'norm': 0.1806640625}\n", + "{'loss': '2.3273203372955322', 'num_iter': 857088, 'lr': 0.001, 'time': '8.557069778442383 Seconds', 'norm': 0.201171875}\n", + "{'loss': '2.3845415115356445', 'num_iter': 857600, 'lr': 0.001, 'time': '8.38985800743103 Seconds', 'norm': 0.2265625}\n", + "{'loss': '2.4459645748138428', 'num_iter': 858112, 'lr': 0.001, 'time': '8.53351879119873 Seconds', 'norm': 0.220703125}\n", + "{'loss': '2.363759994506836', 'num_iter': 858624, 'lr': 0.001, 'time': '8.208442449569702 Seconds', 'norm': 0.2236328125}\n", + "{'loss': '2.4292874336242676', 'num_iter': 859136, 'lr': 0.001, 'time': '8.79178500175476 Seconds', 'norm': 0.220703125}\n", + "{'loss': '2.3263585567474365', 'num_iter': 859648, 'lr': 0.001, 'time': '9.65163779258728 Seconds', 'norm': 0.2216796875}\n", + "{'loss': '2.467611074447632', 'num_iter': 860160, 'lr': 0.001, 'time': '8.284956693649292 Seconds', 'norm': 0.232421875}\n", + "{'loss': '2.424715280532837', 'num_iter': 860672, 'lr': 0.001, 'time': '8.96426773071289 Seconds', 'norm': 0.2392578125}\n", + "{'loss': '2.386087417602539', 'num_iter': 861184, 'lr': 0.001, 'time': '9.14323616027832 Seconds', 'norm': 0.23828125}\n", + "{'loss': '2.403947591781616', 'num_iter': 861696, 'lr': 0.001, 'time': '8.145796060562134 Seconds', 'norm': 0.21484375}\n", + "{'loss': '2.3912997245788574', 'num_iter': 862208, 'lr': 0.001, 'time': '8.46107029914856 Seconds', 'norm': 0.1708984375}\n", + "{'loss': '2.3930251598358154', 'num_iter': 862720, 'lr': 0.001, 'time': '8.069331407546997 Seconds', 'norm': 0.189453125}\n", + "{'loss': '2.411935567855835', 'num_iter': 863232, 'lr': 0.001, 'time': '7.920963287353516 Seconds', 'norm': 0.189453125}\n", + "{'loss': '2.3245415687561035', 'num_iter': 863744, 'lr': 0.001, 'time': '8.348475694656372 Seconds', 'norm': 0.1875}\n", + "{'loss': '2.354224681854248', 'num_iter': 864256, 'lr': 0.001, 'time': '9.023526430130005 Seconds', 'norm': 0.1796875}\n", + "{'loss': '2.4342801570892334', 'num_iter': 864768, 'lr': 0.001, 'time': '8.369325160980225 Seconds', 'norm': 0.2021484375}\n", + "{'loss': '2.3029651641845703', 'num_iter': 865280, 'lr': 0.001, 'time': '8.640503168106079 Seconds', 'norm': 0.185546875}\n", + "{'loss': '2.334979295730591', 'num_iter': 865792, 'lr': 0.001, 'time': '8.702525854110718 Seconds', 'norm': 0.185546875}\n", + "{'loss': '2.388380527496338', 'num_iter': 866304, 'lr': 0.001, 'time': '8.247607946395874 Seconds', 'norm': 0.1845703125}\n", + "{'loss': '2.397393226623535', 'num_iter': 866816, 'lr': 0.001, 'time': '8.07540512084961 Seconds', 'norm': 0.1806640625}\n", + "{'loss': '2.3930952548980713', 'num_iter': 867328, 'lr': 0.001, 'time': '8.36198878288269 Seconds', 'norm': 0.2138671875}\n", + "{'loss': '2.3665733337402344', 'num_iter': 867840, 'lr': 0.001, 'time': '8.42233419418335 Seconds', 'norm': 0.19140625}\n", + "{'loss': '2.3468823432922363', 'num_iter': 868352, 'lr': 0.001, 'time': '8.309447765350342 Seconds', 'norm': 0.208984375}\n", + "{'loss': '2.3932745456695557', 'num_iter': 868864, 'lr': 0.001, 'time': '8.770058155059814 Seconds', 'norm': 0.2099609375}\n", + "{'loss': '2.4171395301818848', 'num_iter': 869376, 'lr': 0.001, 'time': '8.309303998947144 Seconds', 'norm': 0.1845703125}\n", + "{'loss': '2.3657121658325195', 'num_iter': 869888, 'lr': 0.001, 'time': '8.554144620895386 Seconds', 'norm': 0.21875}\n", + "{'loss': '2.3017871379852295', 'num_iter': 870400, 'lr': 0.001, 'time': '8.443048238754272 Seconds', 'norm': 0.25390625}\n", + "{'loss': '2.3011395931243896', 'num_iter': 870912, 'lr': 0.001, 'time': '8.654962539672852 Seconds', 'norm': 0.1845703125}\n", + "{'loss': '2.4297683238983154', 'num_iter': 871424, 'lr': 0.001, 'time': '8.201315641403198 Seconds', 'norm': 0.2314453125}\n", + "{'loss': '2.3571419715881348', 'num_iter': 871936, 'lr': 0.001, 'time': '8.582642078399658 Seconds', 'norm': 0.2294921875}\n", + "{'loss': '2.3527097702026367', 'num_iter': 872448, 'lr': 0.001, 'time': '8.01734972000122 Seconds', 'norm': 0.189453125}\n", + "{'loss': '2.384660243988037', 'num_iter': 872960, 'lr': 0.001, 'time': '8.063130855560303 Seconds', 'norm': 0.1728515625}\n", + "{'loss': '2.4713962078094482', 'num_iter': 873472, 'lr': 0.001, 'time': '8.284625053405762 Seconds', 'norm': 0.1904296875}\n", + "{'loss': '2.421858072280884', 'num_iter': 873984, 'lr': 0.001, 'time': '8.190171718597412 Seconds', 'norm': 0.212890625}\n", + "{'loss': '2.4259591102600098', 'num_iter': 874496, 'lr': 0.001, 'time': '7.8865251541137695 Seconds', 'norm': 0.1923828125}\n", + "{'loss': '2.379058361053467', 'num_iter': 875008, 'lr': 0.001, 'time': '8.547734498977661 Seconds', 'norm': 0.21484375}\n", + "{'loss': '2.3634679317474365', 'num_iter': 875520, 'lr': 0.001, 'time': '8.267415523529053 Seconds', 'norm': 0.1875}\n", + "{'loss': '2.2839787006378174', 'num_iter': 876032, 'lr': 0.001, 'time': '8.745175838470459 Seconds', 'norm': 0.1669921875}\n", + "{'loss': '2.3671839237213135', 'num_iter': 876544, 'lr': 0.001, 'time': '8.310211181640625 Seconds', 'norm': 0.1796875}\n", + "{'loss': '2.405569314956665', 'num_iter': 877056, 'lr': 0.001, 'time': '8.13987135887146 Seconds', 'norm': 0.2109375}\n", + "{'loss': '2.3551714420318604', 'num_iter': 877568, 'lr': 0.001, 'time': '8.829322099685669 Seconds', 'norm': 0.205078125}\n", + "{'loss': '2.4247286319732666', 'num_iter': 878080, 'lr': 0.001, 'time': '8.782273292541504 Seconds', 'norm': 0.189453125}\n", + "{'loss': '2.4080123901367188', 'num_iter': 878592, 'lr': 0.001, 'time': '8.52376103401184 Seconds', 'norm': 0.2021484375}\n", + "{'loss': '2.4176392555236816', 'num_iter': 879104, 'lr': 0.001, 'time': '8.156224966049194 Seconds', 'norm': 0.220703125}\n", + "{'loss': '2.2853965759277344', 'num_iter': 879616, 'lr': 0.001, 'time': '8.970664024353027 Seconds', 'norm': 0.1923828125}\n", + "{'loss': '2.4122838973999023', 'num_iter': 880128, 'lr': 0.001, 'time': '7.950299501419067 Seconds', 'norm': 0.2001953125}\n", + "{'loss': '2.363579750061035', 'num_iter': 880640, 'lr': 0.001, 'time': '8.299129724502563 Seconds', 'norm': 0.171875}\n", + "{'loss': '2.3281354904174805', 'num_iter': 881152, 'lr': 0.001, 'time': '8.641669034957886 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.326263904571533', 'num_iter': 881664, 'lr': 0.001, 'time': '8.630057096481323 Seconds', 'norm': 0.193359375}\n", + "{'loss': '2.3642940521240234', 'num_iter': 882176, 'lr': 0.001, 'time': '8.202011108398438 Seconds', 'norm': 0.1806640625}\n", + "{'loss': '2.4537689685821533', 'num_iter': 882688, 'lr': 0.001, 'time': '7.7811665534973145 Seconds', 'norm': 0.2001953125}\n", + "{'loss': '2.404231071472168', 'num_iter': 883200, 'lr': 0.001, 'time': '8.922267198562622 Seconds', 'norm': 0.2001953125}\n", + "{'loss': '2.3902435302734375', 'num_iter': 883712, 'lr': 0.001, 'time': '8.306289911270142 Seconds', 'norm': 0.193359375}\n", + "{'loss': '2.4225828647613525', 'num_iter': 884224, 'lr': 0.001, 'time': '8.53778862953186 Seconds', 'norm': 0.173828125}\n", + "{'loss': '2.312952756881714', 'num_iter': 884736, 'lr': 0.001, 'time': '8.517697095870972 Seconds', 'norm': 0.22265625}\n", + "{'loss': '2.3331220149993896', 'num_iter': 885248, 'lr': 0.001, 'time': '14.273312091827393 Seconds', 'norm': 0.20703125}\n", + "{'loss': '2.4118685722351074', 'num_iter': 885760, 'lr': 0.001, 'time': '8.004231691360474 Seconds', 'norm': 0.162109375}\n", + "{'loss': '2.4545533657073975', 'num_iter': 886272, 'lr': 0.001, 'time': '8.544071197509766 Seconds', 'norm': 0.19921875}\n", + "{'loss': '2.3774712085723877', 'num_iter': 886784, 'lr': 0.001, 'time': '9.038161516189575 Seconds', 'norm': 0.1923828125}\n", + "{'loss': '2.4066500663757324', 'num_iter': 887296, 'lr': 0.001, 'time': '9.174620151519775 Seconds', 'norm': 0.197265625}\n", + "{'loss': '2.306225299835205', 'num_iter': 887808, 'lr': 0.001, 'time': '9.546359300613403 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.435718297958374', 'num_iter': 888320, 'lr': 0.001, 'time': '8.836459398269653 Seconds', 'norm': 0.1748046875}\n", + "{'loss': '2.3922226428985596', 'num_iter': 888832, 'lr': 0.001, 'time': '8.180726528167725 Seconds', 'norm': 0.220703125}\n", + "{'loss': '2.3023841381073', 'num_iter': 889344, 'lr': 0.001, 'time': '10.957780122756958 Seconds', 'norm': 0.20703125}\n", + "{'loss': '2.399470329284668', 'num_iter': 889856, 'lr': 0.001, 'time': '8.294396162033081 Seconds', 'norm': 0.1982421875}\n", + "{'loss': '2.40988826751709', 'num_iter': 890368, 'lr': 0.001, 'time': '8.6176176071167 Seconds', 'norm': 0.181640625}\n", + "{'loss': '2.345062255859375', 'num_iter': 890880, 'lr': 0.001, 'time': '8.739701747894287 Seconds', 'norm': 0.1787109375}\n", + "{'loss': '2.420276403427124', 'num_iter': 891392, 'lr': 0.001, 'time': '8.093956708908081 Seconds', 'norm': 0.2099609375}\n", + "{'loss': '2.3712801933288574', 'num_iter': 891904, 'lr': 0.001, 'time': '8.445114612579346 Seconds', 'norm': 0.193359375}\n", + "{'loss': '2.395455837249756', 'num_iter': 892416, 'lr': 0.001, 'time': '8.659630537033081 Seconds', 'norm': 0.1806640625}\n", + "{'loss': '2.373633623123169', 'num_iter': 892928, 'lr': 0.001, 'time': '8.59441089630127 Seconds', 'norm': 0.27734375}\n", + "{'loss': '2.4062716960906982', 'num_iter': 893440, 'lr': 0.001, 'time': '8.114684820175171 Seconds', 'norm': 0.240234375}\n", + "{'loss': '2.369568109512329', 'num_iter': 893952, 'lr': 0.001, 'time': '8.73387885093689 Seconds', 'norm': 0.27734375}\n", + "{'loss': '2.3462982177734375', 'num_iter': 894464, 'lr': 0.001, 'time': '8.239887952804565 Seconds', 'norm': 0.1943359375}\n", + "{'loss': '2.3270983695983887', 'num_iter': 894976, 'lr': 0.001, 'time': '8.204057216644287 Seconds', 'norm': 0.1767578125}\n", + "{'loss': '2.3199238777160645', 'num_iter': 895488, 'lr': 0.001, 'time': '8.724114179611206 Seconds', 'norm': 0.220703125}\n", + "{'loss': '2.4623286724090576', 'num_iter': 896000, 'lr': 0.001, 'time': '8.734465837478638 Seconds', 'norm': 0.17578125}\n", + "{'loss': '2.2911605834960938', 'num_iter': 896512, 'lr': 0.001, 'time': '9.74925446510315 Seconds', 'norm': 0.189453125}\n", + "{'loss': '2.3094255924224854', 'num_iter': 897024, 'lr': 0.001, 'time': '9.124629259109497 Seconds', 'norm': 0.2236328125}\n", + "{'loss': '2.3452560901641846', 'num_iter': 897536, 'lr': 0.001, 'time': '8.553218603134155 Seconds', 'norm': 0.2060546875}\n", + "{'loss': '2.3641698360443115', 'num_iter': 898048, 'lr': 0.001, 'time': '8.036820411682129 Seconds', 'norm': 0.228515625}\n", + "{'loss': '2.3526058197021484', 'num_iter': 898560, 'lr': 0.001, 'time': '8.405598402023315 Seconds', 'norm': 0.2236328125}\n", + "{'loss': '2.3071482181549072', 'num_iter': 899072, 'lr': 0.001, 'time': '8.627587080001831 Seconds', 'norm': 0.1884765625}\n", + "{'loss': '2.3047642707824707', 'num_iter': 899584, 'lr': 0.001, 'time': '8.37437391281128 Seconds', 'norm': 0.1962890625}\n", + "{'loss': '2.3221399784088135', 'num_iter': 900096, 'lr': 0.001, 'time': '8.993763208389282 Seconds', 'norm': 0.2080078125}\n", + "{'loss': '2.3156065940856934', 'num_iter': 900608, 'lr': 0.001, 'time': '9.611604928970337 Seconds', 'norm': 0.2158203125}\n", + "{'loss': '2.382218837738037', 'num_iter': 901120, 'lr': 0.001, 'time': '8.163914918899536 Seconds', 'norm': 0.181640625}\n", + "{'loss': '2.438100576400757', 'num_iter': 901632, 'lr': 0.001, 'time': '8.086852550506592 Seconds', 'norm': 0.2314453125}\n", + "{'loss': '2.4461135864257812', 'num_iter': 902144, 'lr': 0.001, 'time': '7.9185285568237305 Seconds', 'norm': 0.2041015625}\n", + "{'loss': '2.335604667663574', 'num_iter': 902656, 'lr': 0.001, 'time': '8.364047050476074 Seconds', 'norm': 0.1943359375}\n", + "{'loss': '2.3718466758728027', 'num_iter': 903168, 'lr': 0.001, 'time': '8.708034992218018 Seconds', 'norm': 0.2265625}\n", + "{'loss': '2.3909525871276855', 'num_iter': 903680, 'lr': 0.001, 'time': '8.199344396591187 Seconds', 'norm': 0.26171875}\n", + "{'loss': '2.3544185161590576', 'num_iter': 904192, 'lr': 0.001, 'time': '8.901054382324219 Seconds', 'norm': 0.2099609375}\n", + "{'loss': '2.376430034637451', 'num_iter': 904704, 'lr': 0.001, 'time': '8.851238012313843 Seconds', 'norm': 0.1875}\n", + "{'loss': '2.4198415279388428', 'num_iter': 905216, 'lr': 0.001, 'time': '8.459168434143066 Seconds', 'norm': 0.234375}\n", + "{'loss': '2.3527960777282715', 'num_iter': 905728, 'lr': 0.001, 'time': '8.078238725662231 Seconds', 'norm': 0.162109375}\n", + "{'loss': '2.3573272228240967', 'num_iter': 906240, 'lr': 0.001, 'time': '8.068448305130005 Seconds', 'norm': 0.1875}\n", + "{'loss': '2.368509531021118', 'num_iter': 906752, 'lr': 0.001, 'time': '8.211101531982422 Seconds', 'norm': 0.1923828125}\n", + "{'loss': '2.3345413208007812', 'num_iter': 907264, 'lr': 0.001, 'time': '8.496539831161499 Seconds', 'norm': 0.1875}\n", + "{'loss': '2.37021803855896', 'num_iter': 907776, 'lr': 0.001, 'time': '8.343792915344238 Seconds', 'norm': 0.1845703125}\n", + "{'loss': '2.3669683933258057', 'num_iter': 908288, 'lr': 0.001, 'time': '8.142228603363037 Seconds', 'norm': 0.1728515625}\n", + "{'loss': '2.374251365661621', 'num_iter': 908800, 'lr': 0.001, 'time': '8.681265592575073 Seconds', 'norm': 0.2265625}\n", + "{'loss': '2.3345797061920166', 'num_iter': 909312, 'lr': 0.001, 'time': '8.43146562576294 Seconds', 'norm': 0.2275390625}\n", + "{'loss': '2.3778271675109863', 'num_iter': 909824, 'lr': 0.001, 'time': '8.341204643249512 Seconds', 'norm': 0.1669921875}\n", + "{'loss': '2.354597806930542', 'num_iter': 910336, 'lr': 0.001, 'time': '8.09926438331604 Seconds', 'norm': 0.2001953125}\n", + "{'loss': '2.4028189182281494', 'num_iter': 910848, 'lr': 0.001, 'time': '7.942551851272583 Seconds', 'norm': 0.224609375}\n", + "{'loss': '2.3475964069366455', 'num_iter': 911360, 'lr': 0.001, 'time': '8.381775856018066 Seconds', 'norm': 0.216796875}\n", + "{'loss': '2.355555295944214', 'num_iter': 911872, 'lr': 0.001, 'time': '8.949925661087036 Seconds', 'norm': 0.193359375}\n", + "{'loss': '2.38680362701416', 'num_iter': 912384, 'lr': 0.001, 'time': '8.746765613555908 Seconds', 'norm': 0.248046875}\n", + "{'loss': '2.3561573028564453', 'num_iter': 912896, 'lr': 0.001, 'time': '8.767604351043701 Seconds', 'norm': 0.26171875}\n", + "{'loss': '2.3629369735717773', 'num_iter': 913408, 'lr': 0.001, 'time': '9.628791332244873 Seconds', 'norm': 0.263671875}\n", + "{'loss': '2.3990771770477295', 'num_iter': 913920, 'lr': 0.001, 'time': '8.974919557571411 Seconds', 'norm': 0.2412109375}\n", + "{'loss': '2.342597723007202', 'num_iter': 914432, 'lr': 0.001, 'time': '8.902372121810913 Seconds', 'norm': 0.228515625}\n", + "{'loss': '2.347564458847046', 'num_iter': 914944, 'lr': 0.001, 'time': '8.812807321548462 Seconds', 'norm': 0.22265625}\n", + "{'loss': '2.3715360164642334', 'num_iter': 915456, 'lr': 0.001, 'time': '8.622413396835327 Seconds', 'norm': 0.19921875}\n", + "{'loss': '2.3940467834472656', 'num_iter': 915968, 'lr': 0.001, 'time': '8.149163484573364 Seconds', 'norm': 0.259765625}\n", + "{'loss': '2.3751208782196045', 'num_iter': 916480, 'lr': 0.001, 'time': '7.949531316757202 Seconds', 'norm': 0.1767578125}\n", + "{'loss': '2.385967493057251', 'num_iter': 916992, 'lr': 0.001, 'time': '8.040062427520752 Seconds', 'norm': 0.25390625}\n", + "{'loss': '2.3431928157806396', 'num_iter': 917504, 'lr': 0.001, 'time': '8.492937803268433 Seconds', 'norm': 0.1806640625}\n", + "{'loss': '2.3572185039520264', 'num_iter': 918016, 'lr': 0.001, 'time': '14.323287725448608 Seconds', 'norm': 0.216796875}\n", + "{'loss': '2.3189656734466553', 'num_iter': 918528, 'lr': 0.001, 'time': '8.526179075241089 Seconds', 'norm': 0.193359375}\n", + "{'loss': '2.3758018016815186', 'num_iter': 919040, 'lr': 0.001, 'time': '8.056351900100708 Seconds', 'norm': 0.2138671875}\n", + "{'loss': '2.3652594089508057', 'num_iter': 919552, 'lr': 0.001, 'time': '8.565654993057251 Seconds', 'norm': 0.1728515625}\n", + "{'loss': '2.3590707778930664', 'num_iter': 920064, 'lr': 0.001, 'time': '8.050886869430542 Seconds', 'norm': 0.2236328125}\n", + "{'loss': '2.4174492359161377', 'num_iter': 920576, 'lr': 0.001, 'time': '7.941462516784668 Seconds', 'norm': 0.1904296875}\n", + "{'loss': '2.297471284866333', 'num_iter': 921088, 'lr': 0.001, 'time': '8.739525079727173 Seconds', 'norm': 0.1875}\n", + "{'loss': '2.438664436340332', 'num_iter': 921600, 'lr': 0.001, 'time': '8.569477081298828 Seconds', 'norm': 0.193359375}\n", + "{'loss': '2.3627471923828125', 'num_iter': 922112, 'lr': 0.001, 'time': '9.467327117919922 Seconds', 'norm': 0.1806640625}\n", + "{'loss': '2.334042549133301', 'num_iter': 922624, 'lr': 0.001, 'time': '9.33478331565857 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.286079168319702', 'num_iter': 923136, 'lr': 0.001, 'time': '9.182161808013916 Seconds', 'norm': 0.189453125}\n", + "{'loss': '2.3770241737365723', 'num_iter': 923648, 'lr': 0.001, 'time': '8.206654071807861 Seconds', 'norm': 0.2060546875}\n", + "{'loss': '2.4504945278167725', 'num_iter': 924160, 'lr': 0.001, 'time': '8.043625593185425 Seconds', 'norm': 0.2001953125}\n", + "{'loss': '2.3948819637298584', 'num_iter': 924672, 'lr': 0.001, 'time': '7.9505980014801025 Seconds', 'norm': 0.1806640625}\n", + "{'loss': '2.402768135070801', 'num_iter': 925184, 'lr': 0.001, 'time': '8.220118284225464 Seconds', 'norm': 0.2041015625}\n", + "{'loss': '2.3939926624298096', 'num_iter': 925696, 'lr': 0.001, 'time': '7.870311260223389 Seconds', 'norm': 0.185546875}\n", + "{'loss': '2.364821195602417', 'num_iter': 926208, 'lr': 0.001, 'time': '8.554903507232666 Seconds', 'norm': 0.173828125}\n", + "{'loss': '2.3646743297576904', 'num_iter': 926720, 'lr': 0.001, 'time': '8.473495483398438 Seconds', 'norm': 0.1748046875}\n", + "{'loss': '2.4230446815490723', 'num_iter': 927232, 'lr': 0.001, 'time': '7.780987739562988 Seconds', 'norm': 0.1845703125}\n", + "{'loss': '2.385584592819214', 'num_iter': 927744, 'lr': 0.001, 'time': '8.113358497619629 Seconds', 'norm': 0.1982421875}\n", + "{'loss': '2.353760242462158', 'num_iter': 928256, 'lr': 0.001, 'time': '8.37222695350647 Seconds', 'norm': 0.1708984375}\n", + "{'loss': '2.3519046306610107', 'num_iter': 928768, 'lr': 0.001, 'time': '8.348581790924072 Seconds', 'norm': 0.1826171875}\n", + "{'loss': '2.3855926990509033', 'num_iter': 929280, 'lr': 0.001, 'time': '8.173563718795776 Seconds', 'norm': 0.189453125}\n", + "{'loss': '2.3533990383148193', 'num_iter': 929792, 'lr': 0.001, 'time': '8.13526463508606 Seconds', 'norm': 0.1982421875}\n", + "{'loss': '2.403102159500122', 'num_iter': 930304, 'lr': 0.001, 'time': '7.9633708000183105 Seconds', 'norm': 0.21484375}\n", + "{'loss': '2.394098997116089', 'num_iter': 930816, 'lr': 0.001, 'time': '8.27505350112915 Seconds', 'norm': 0.2353515625}\n", + "{'loss': '2.3781769275665283', 'num_iter': 931328, 'lr': 0.001, 'time': '9.121376514434814 Seconds', 'norm': 0.2197265625}\n", + "{'loss': '2.4632511138916016', 'num_iter': 931840, 'lr': 0.001, 'time': '8.894826889038086 Seconds', 'norm': 0.2001953125}\n", + "{'loss': '2.3970489501953125', 'num_iter': 932352, 'lr': 0.001, 'time': '8.239832401275635 Seconds', 'norm': 0.224609375}\n", + "{'loss': '2.322509527206421', 'num_iter': 932864, 'lr': 0.001, 'time': '8.412793636322021 Seconds', 'norm': 0.2080078125}\n", + "{'loss': '2.362567901611328', 'num_iter': 933376, 'lr': 0.001, 'time': '8.388514041900635 Seconds', 'norm': 0.2265625}\n" + ] + } + ], + "source": [ + "for epoch in range(num_epochs):\n", + " t0=time.time()\n", + " loss_accum=0\n", + " #batch_iterator = tqdm(data, desc=f\"Processing Epoch {epoch:02d}\")\n", + " for i,batch in enumerate(dataloader):\n", + " input_ids = batch['input_ids'].to(device).long()\n", + " attention_mask = batch['attention_mask'].to(device).long()\n", + " with torch.autocast(device_type=\"cuda\", dtype=torch.bfloat16):\n", + " outputs = model(input_ids=input_ids,\n", + " attention_mask=attention_mask,\n", + " labels=input_ids)\n", + " loss=outputs.loss\n", + " loss = loss/ accumulation_steps\n", + " loss_accum+=loss.detach()\n", + " loss.backward()\n", + "\n", + "\n", + " if (i + 1) % accumulation_steps == 0:\n", + " #print(i)\n", + " norm=torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)\n", + " optimizer.step()\n", + " optimizer.zero_grad(set_to_none=True)\n", + " scheduler.step()\n", + " global_step += 1\n", + "\n", + "\n", + " # Get and format the learning rate\n", + " #lr_rate = scheduler.get_last_lr()[0]\n", + " t1=time.time()\n", + " dt=t1-t0\n", + " logs={\"loss\": f\"{loss_accum}\", \"step\": global_step,\"num_iter\":batch_size*(i+1),\"lr\":scheduler.get_last_lr()[0],\"time\":f\"{dt} Seconds\",\"norm\":norm.item()}\n", + " print(logs)\n", + " with open(\"/content/drive/MyDrive/YarnGPT_naij/logs.json\", \"a\") as file:\n", + " json.dump(logs, file)\n", + " t0=time.time()\n", + " loss_accum=0\n", + " if (i>0) and (i + 1) % (2*8192)== 0:\n", + " torch.save({\n", + " 'epoch': epoch,\n", + " 'model_state_dict': model.state_dict(),\n", + " 'optimizer_state_dict': optimizer.state_dict(),\n", + " 'scheduler_state_dict':scheduler.state_dict(),\n", + " 'loss': loss,\n", + " 'global_step':global_step\n", + " },f'/content/drive/MyDrive/YarnGPT_naij/{i*batch_size}_{epoch}xtraepoch.pt')\n", + "\n", + "\n", + " #model.push_to_hub(new_checkpoint,private=False,commit_message=f\"model {epoch} {(i+1)*batch_size}\")\n", + " model.train()\n", + " optimizer.step()\n", + " torch.save({\n", + " 'epoch': epoch,\n", + " 'model_state_dict': model.state_dict(),\n", + " 'optimizer_state_dict': optimizer.state_dict(),\n", + " 'scheduler_state_dict':scheduler.state_dict(),\n", + " 'loss': loss,\n", + " 'global_step':global_step\n", + " },f'/content/drive/MyDrive/YarnGPT_naij/final_{epoch}xtraepoch.pt')\n", + "model.push_to_hub(new_checkpoint,private=False,commit_message=f\"final\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6GwywbGrlMKb" + }, + "outputs": [], + "source": [ + "model.push_to_hub(new_checkpoint,private=False,)#commit_message=f\"model {epoch} {(i+1)*batch_size}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8ZhxvZA_w3Xl" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "A100", + "machine_shape": "hm", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "02848bb5b7494a4fa7fa9a05aa4ac2bc": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": "center", + "align_self": null, + "border": null, + "bottom": null, + "display": "flex", + "flex": null, + "flex_flow": "column", + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "50%" + } + }, + "04de5b7011464dd182b34a395d877d3d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "07824afeabf1432da02e4eee4a2d26e3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "08ffc708159a44a8a35dffcb62fa1d62": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "09580395c9934b5190bcf3e93460c8a1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0c36380be9a643bf9491e4a10534eb1b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_1d7702ecb38b440a8b4b9ec5d4be9d57", + "IPY_MODEL_29c5bf81e8aa49108806387dbdbb1914", + "IPY_MODEL_a198de7ec54241898928660653e0814c" + ], + "layout": "IPY_MODEL_26b211e65bef4812b05aff4f9daa40d0" + } + }, + "0e2fc3ee86a9478d95cdf2e619452d24": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "0e5825f0b53b424eb1a955be2b788498": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "11f977857e664128a6ef3235380c557a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_bc4ddf95ae3d4c80aecd92e1fd339565", + "IPY_MODEL_7ddde911e3da4ba99e8a8edbc8aeae9b", + "IPY_MODEL_39af899a5aee479ba0b448b63b0bea05" + ], + "layout": "IPY_MODEL_6d4a662059fb494ba610d50404c67e7d" + } + }, + "12fc1343184246fab5b3bb814c19ba98": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_09580395c9934b5190bcf3e93460c8a1", + "placeholder": "​", + "style": "IPY_MODEL_dc7d0e01c8b94b9789f24e498492ceb9", + "value": " 111/111 [00:00<00:00, 9.95kB/s]" + } + }, + "14513671d2c342e59ce2c74d330febc5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ca75bd8ceef74139a8f0230e330646a6", + "placeholder": "​", + "style": "IPY_MODEL_4c5d4ccbc801405c8015b017d57aa10e", + "value": " 532k/532k [00:00<00:00, 2.43MB/s]" + } + }, + "1d7702ecb38b440a8b4b9ec5d4be9d57": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f523aaaa5905455187666a3b07a8975e", + "placeholder": "​", + "style": "IPY_MODEL_488d96077f0642d3ae695cafe5f60aef", + "value": "vocab.json: 100%" + } + }, + "1ee4b3a3268e4e0e92b724e9b7ac1e92": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "24e3faacf723412cbc6fe21cf983b64e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4de4dd4c73444c09a1121edcf9e9252f", + "placeholder": "​", + "style": "IPY_MODEL_e1e6b64b07344a928c269e1ca48f12d1", + "value": "generation_config.json: 100%" + } + }, + "2559835b179f4898b47a9a4939f7ff40": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_57c2dec352b34076a7f7333cb5eca1a7", + "IPY_MODEL_276d3129fbb940bc8389b552fd9564f2", + "IPY_MODEL_c707c9e8a4bf4d1db41e372ff37f7eb6" + ], + "layout": "IPY_MODEL_4bcaae3e55aa4ca99d9c6a1b2606d58d" + } + }, + "26b211e65bef4812b05aff4f9daa40d0": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "276d3129fbb940bc8389b552fd9564f2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d2ad95651c5f4cbf8150ff1b93e614a6", + "max": 863, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_9ca99593cabb497d8c0fb50cb63996f6", + "value": 863 + } + }, + "29c5bf81e8aa49108806387dbdbb1914": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3a7b9ba5c6f74db8b61be044ba56b51d", + "max": 800662, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_a0c4a76d148345eea87dfc4fd5a5fc74", + "value": 800662 + } + }, + "2c24aa4da46f4f48b7a1edf6b8d97904": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_9d9e0c6e807c427385f75d32747fb8ab", + "IPY_MODEL_3a5cfba8389c45a8baf1cd58008e2daa", + "IPY_MODEL_14513671d2c342e59ce2c74d330febc5" + ], + "layout": "IPY_MODEL_5a8e0e651c944d07bcb22b8db4cc8f8a" + } + }, + "39af899a5aee479ba0b448b63b0bea05": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d5109e3ff8c74285a6bdbb04cff2647d", + "placeholder": "​", + "style": "IPY_MODEL_92f058e4ff014735a45226076f891e5f", + "value": " 466k/466k [00:00<00:00, 2.18MB/s]" + } + }, + "3a5cfba8389c45a8baf1cd58008e2daa": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c47658e211854bbd954b1e796f4ef148", + "max": 532463, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_c4267212b49744c5a44595e19272fee0", + "value": 532463 + } + }, + "3a7b9ba5c6f74db8b61be044ba56b51d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3d52fa175e4c4ec29a780af707098f66": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3d687f99d4564f4e9efc1f988f5d6799": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "VBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "VBoxView", + "box_style": "", + "children": [], + "layout": "IPY_MODEL_5625d8053fb64e00a587464d8800a25c" + } + }, + "44d657b3f11f414d8d3b1cd116e982e6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "45981dec8c674296a027470e6a3a03e4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_761ccc19d9c94d429fa0549da226f07a", + "IPY_MODEL_5f34ecd3df7e46e588ddaeae185b9832", + "IPY_MODEL_dfa17938bf884df7a5f772f69d66af20" + ], + "layout": "IPY_MODEL_3d52fa175e4c4ec29a780af707098f66" + } + }, + "45a1827b8ce245b1961e4d52482e580a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "4661d7cb90414655b7ea77f615bb99cb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "488d96077f0642d3ae695cafe5f60aef": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "48d9ac86996c4e42a27b772503b281bc": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4bcaae3e55aa4ca99d9c6a1b2606d58d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4c5d4ccbc801405c8015b017d57aa10e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "4de4dd4c73444c09a1121edcf9e9252f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "51170a37647f45fcb37e1efdf983e340": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "53f6f92472f345d78c961f735b87c437": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_04de5b7011464dd182b34a395d877d3d", + "placeholder": "​", + "style": "IPY_MODEL_08ffc708159a44a8a35dffcb62fa1d62", + "value": "tokenizer.json: 100%" + } + }, + "553043ca5e4f4da8bdd60eeb9de680a0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_53f6f92472f345d78c961f735b87c437", + "IPY_MODEL_6b08efe2042847b4968ca67c65077c6b", + "IPY_MODEL_fc801301982346af98287e5bad9caab6" + ], + "layout": "IPY_MODEL_b99672ee6bbe4a58b6b92d8ed2760e13" + } + }, + "5625d8053fb64e00a587464d8800a25c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": "center", + "align_self": null, + "border": null, + "bottom": null, + "display": "flex", + "flex": null, + "flex_flow": "column", + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "50%" + } + }, + "57c2dec352b34076a7f7333cb5eca1a7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9b9547914181479395814e460cf6d27c", + "placeholder": "​", + "style": "IPY_MODEL_b20e6228777b470f843ead8709b8d961", + "value": "special_tokens_map.json: 100%" + } + }, + "5a8e0e651c944d07bcb22b8db4cc8f8a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5f34ecd3df7e46e588ddaeae185b9832": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b1fc41f86a3d43e9be40abfc028af5fd", + "max": 731539240, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_88a5eded0bf84c71b0954e2edf977487", + "value": 731539240 + } + }, + "5fdc46a403254a6981f7035a9d353ae5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "662d5001154840a78864c713f8701877": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6b08efe2042847b4968ca67c65077c6b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f8e7c0fc8e174471b3e5c39511b0cdb6", + "max": 4081739, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_0e5825f0b53b424eb1a955be2b788498", + "value": 4081739 + } + }, + "6d4a662059fb494ba610d50404c67e7d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6fe9554203f343dab19deed6b1fb026d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_869992447745421e83b0c2f179aa6a5e", + "IPY_MODEL_b0f76ecd5bf045ed9ebd2ab76c37bbe0", + "IPY_MODEL_f6c22c85dea54ad887a4e193f2928e2a" + ], + "layout": "IPY_MODEL_f4de68e6b3a44fc7bdacd90f27aea914" + } + }, + "70f2b1a35f414c798a8300c74e1d1be0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "VBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "VBoxView", + "box_style": "", + "children": [], + "layout": "IPY_MODEL_02848bb5b7494a4fa7fa9a05aa4ac2bc" + } + }, + "72f4119194fe47268df25e7530b700cf": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "761ccc19d9c94d429fa0549da226f07a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_48d9ac86996c4e42a27b772503b281bc", + "placeholder": "​", + "style": "IPY_MODEL_c8f34c306d2d48f59d8dbd5d33ef4603", + "value": "model.safetensors: 100%" + } + }, + "7ddde911e3da4ba99e8a8edbc8aeae9b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_662d5001154840a78864c713f8701877", + "max": 466391, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_add1ea4347ec4122aa4a9c246e206591", + "value": 466391 + } + }, + "853d260a8a164307ad56229ee36e1eaf": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "869992447745421e83b0c2f179aa6a5e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8e9923498afe4589bc160b7387835b96", + "placeholder": "​", + "style": "IPY_MODEL_c4305feaa21d4314ab7b59c4ca6eae8d", + "value": "config.json: 100%" + } + }, + "88a5eded0bf84c71b0954e2edf977487": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "89010e1bcd164e60b5d5ccaafe5df3fe": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8e9923498afe4589bc160b7387835b96": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "92f058e4ff014735a45226076f891e5f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "99ef4d508b784f98961a97e108518e60": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9b9547914181479395814e460cf6d27c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9ca99593cabb497d8c0fb50cb63996f6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "9d54e918963e4b41924f771273ef52b6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_72f4119194fe47268df25e7530b700cf", + "max": 111, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_e1157ed76cfe430d81de5eaa291f2956", + "value": 111 + } + }, + "9d9e0c6e807c427385f75d32747fb8ab": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d55bfc1006b74b11b59711b4acef5cc9", + "placeholder": "​", + "style": "IPY_MODEL_af54aee43338446394aeb1883c2fcef4", + "value": "tokenizer_config.json: 100%" + } + }, + "a01859158d4e43b6a548c6195555cb57": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f4ea5ec051234ba69c9109c8bd8b83dc", + "max": 64546, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_0e2fc3ee86a9478d95cdf2e619452d24", + "value": 64546 + } + }, + "a0c4a76d148345eea87dfc4fd5a5fc74": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "a10a9ecd872540cfa66f14c2d4ee2a58": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_a254409982994c11b03e58c5a60ac8c7", + "IPY_MODEL_a01859158d4e43b6a548c6195555cb57", + "IPY_MODEL_ccf73c00401a4877a6c0f60a9372e9e8" + ], + "layout": "IPY_MODEL_89010e1bcd164e60b5d5ccaafe5df3fe" + } + }, + "a198de7ec54241898928660653e0814c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_acade5a579254accb2b44e49b24ec542", + "placeholder": "​", + "style": "IPY_MODEL_db1f5773877d44458854cdb07cc1f529", + "value": " 801k/801k [00:00<00:00, 1.25MB/s]" + } + }, + "a254409982994c11b03e58c5a60ac8c7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_99ef4d508b784f98961a97e108518e60", + "placeholder": "​", + "style": "IPY_MODEL_45a1827b8ce245b1961e4d52482e580a", + "value": "added_tokens.json: 100%" + } + }, + "acade5a579254accb2b44e49b24ec542": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "acf633a672ff40d99a14b8d7ab6dc708": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "add1ea4347ec4122aa4a9c246e206591": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "af54aee43338446394aeb1883c2fcef4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b0f76ecd5bf045ed9ebd2ab76c37bbe0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_853d260a8a164307ad56229ee36e1eaf", + "max": 765, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_f0f095420fd54431b6c0125dae96bf5d", + "value": 765 + } + }, + "b1fc41f86a3d43e9be40abfc028af5fd": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b20e6228777b470f843ead8709b8d961": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b99672ee6bbe4a58b6b92d8ed2760e13": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bc4ddf95ae3d4c80aecd92e1fd339565": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_acf633a672ff40d99a14b8d7ab6dc708", + "placeholder": "​", + "style": "IPY_MODEL_bdb0c222f5e945e79f2b74ddafe942f5", + "value": "merges.txt: 100%" + } + }, + "bdb0c222f5e945e79f2b74ddafe942f5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c4267212b49744c5a44595e19272fee0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "c4305feaa21d4314ab7b59c4ca6eae8d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c47658e211854bbd954b1e796f4ef148": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c707c9e8a4bf4d1db41e372ff37f7eb6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_51170a37647f45fcb37e1efdf983e340", + "placeholder": "​", + "style": "IPY_MODEL_5fdc46a403254a6981f7035a9d353ae5", + "value": " 863/863 [00:00<00:00, 70.2kB/s]" + } + }, + "c8744acaa3e44977a8e29d8c9dcffddf": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c8f34c306d2d48f59d8dbd5d33ef4603": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ca75bd8ceef74139a8f0230e330646a6": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ccf73c00401a4877a6c0f60a9372e9e8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f783b927dbe94d9dac6bd1af060d47ca", + "placeholder": "​", + "style": "IPY_MODEL_44d657b3f11f414d8d3b1cd116e982e6", + "value": " 64.5k/64.5k [00:00<00:00, 5.21MB/s]" + } + }, + "d2ad95651c5f4cbf8150ff1b93e614a6": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d49eb3b2a3ef4d9bb08662e96574a701": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "d5109e3ff8c74285a6bdbb04cff2647d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d55bfc1006b74b11b59711b4acef5cc9": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "db1f5773877d44458854cdb07cc1f529": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "dc7d0e01c8b94b9789f24e498492ceb9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "dd30e304e6494d01bf391164316088e1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_24e3faacf723412cbc6fe21cf983b64e", + "IPY_MODEL_9d54e918963e4b41924f771273ef52b6", + "IPY_MODEL_12fc1343184246fab5b3bb814c19ba98" + ], + "layout": "IPY_MODEL_f61b4939204746d59ccfb6a10b2e9d9e" + } + }, + "dfa17938bf884df7a5f772f69d66af20": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c8744acaa3e44977a8e29d8c9dcffddf", + "placeholder": "​", + "style": "IPY_MODEL_1ee4b3a3268e4e0e92b724e9b7ac1e92", + "value": " 732M/732M [00:17<00:00, 40.1MB/s]" + } + }, + "e1157ed76cfe430d81de5eaa291f2956": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "e1e6b64b07344a928c269e1ca48f12d1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "f0f095420fd54431b6c0125dae96bf5d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "f4de68e6b3a44fc7bdacd90f27aea914": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f4ea5ec051234ba69c9109c8bd8b83dc": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f523aaaa5905455187666a3b07a8975e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f61b4939204746d59ccfb6a10b2e9d9e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f6c22c85dea54ad887a4e193f2928e2a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f8ef11f18b3542f6ad72179b8c667c46", + "placeholder": "​", + "style": "IPY_MODEL_d49eb3b2a3ef4d9bb08662e96574a701", + "value": " 765/765 [00:00<00:00, 62.7kB/s]" + } + }, + "f783b927dbe94d9dac6bd1af060d47ca": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f8e7c0fc8e174471b3e5c39511b0cdb6": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f8ef11f18b3542f6ad72179b8c667c46": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fc801301982346af98287e5bad9caab6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4661d7cb90414655b7ea77f615bb99cb", + "placeholder": "​", + "style": "IPY_MODEL_07824afeabf1432da02e4eee4a2d26e3", + "value": " 4.08M/4.08M [00:01<00:00, 3.82MB/s]" + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file