updated-policy / logger /trajectory.log
srinjoyd's picture
add logs
1eed1a8
data: {"data":"===== Job started at 2026-04-25 19:57:38 =====","timestamp":"2026-04-25T19:57:38Z"}
data: {"data":"Downloading setuptools (1.0MiB)","timestamp":"2026-04-25T19:57:52.294Z"}
data: {"data":"Downloading pillow (6.8MiB)","timestamp":"2026-04-25T19:57:52.295Z"}
data: {"data":"Downloading nvidia-nvshmem-cu13 (57.6MiB)","timestamp":"2026-04-25T19:57:52.295Z"}
data: {"data":"Downloading nvidia-cusolver (191.6MiB)","timestamp":"2026-04-25T19:57:52.295Z"}
data: {"data":"Downloading hf-xet (4.0MiB)","timestamp":"2026-04-25T19:57:52.295Z"}
data: {"data":"Downloading nvidia-cusparse (139.2MiB)","timestamp":"2026-04-25T19:57:52.295Z"}
data: {"data":"Downloading pandas (10.4MiB)","timestamp":"2026-04-25T19:57:52.295Z"}
data: {"data":"Downloading nvidia-cuda-nvrtc (86.0MiB)","timestamp":"2026-04-25T19:57:52.296Z"}
data: {"data":"Downloading nvidia-nvjitlink (38.8MiB)","timestamp":"2026-04-25T19:57:52.296Z"}
data: {"data":"Downloading uvloop (4.2MiB)","timestamp":"2026-04-25T19:57:52.296Z"}
data: {"data":"Downloading nvidia-nccl-cu13 (187.4MiB)","timestamp":"2026-04-25T19:57:52.296Z"}
data: {"data":"Downloading nvidia-cuda-runtime (2.1MiB)","timestamp":"2026-04-25T19:57:52.297Z"}
data: {"data":"Downloading networkx (2.0MiB)","timestamp":"2026-04-25T19:57:52.297Z"}
data: {"data":"Downloading nvidia-cublas (403.5MiB)","timestamp":"2026-04-25T19:57:52.297Z"}
data: {"data":"Downloading aiohttp (1.7MiB)","timestamp":"2026-04-25T19:57:52.297Z"}
data: {"data":"Downloading nvidia-curand (56.8MiB)","timestamp":"2026-04-25T19:57:52.298Z"}
data: {"data":"Downloading nvidia-cuda-cupti (10.2MiB)","timestamp":"2026-04-25T19:57:52.298Z"}
data: {"data":"Downloading tokenizers (3.1MiB)","timestamp":"2026-04-25T19:57:52.298Z"}
data: {"data":"Downloading sympy (6.0MiB)","timestamp":"2026-04-25T19:57:52.298Z"}
data: {"data":"Downloading pygments (1.2MiB)","timestamp":"2026-04-25T19:57:52.299Z"}
data: {"data":"Downloading triton (179.5MiB)","timestamp":"2026-04-25T19:57:52.299Z"}
data: {"data":"Downloading trackio (1.6MiB)","timestamp":"2026-04-25T19:57:52.300Z"}
data: {"data":"Downloading nvidia-cufile (1.2MiB)","timestamp":"2026-04-25T19:57:52.300Z"}
data: {"data":"Downloading pyarrow (46.6MiB)","timestamp":"2026-04-25T19:57:52.300Z"}
data: {"data":"Downloading nvidia-cusparselt-cu13 (162.0MiB)","timestamp":"2026-04-25T19:57:52.300Z"}
data: {"data":"Downloading nvidia-cufft (204.2MiB)","timestamp":"2026-04-25T19:57:52.300Z"}
data: {"data":"Downloading transformers (9.9MiB)","timestamp":"2026-04-25T19:57:52.301Z"}
data: {"data":"Downloading torch (506.1MiB)","timestamp":"2026-04-25T19:57:52.301Z"}
data: {"data":"Downloading numpy (15.9MiB)","timestamp":"2026-04-25T19:57:52.301Z"}
data: {"data":"Downloading cuda-bindings (6.0MiB)","timestamp":"2026-04-25T19:57:52.301Z"}
data: {"data":"Downloading nvidia-cudnn-cu13 (349.1MiB)","timestamp":"2026-04-25T19:57:52.302Z"}
data: {"data":" Downloaded nvidia-cufile","timestamp":"2026-04-25T19:57:52.891Z"}
data: {"data":" Downloaded aiohttp","timestamp":"2026-04-25T19:57:53.092Z"}
data: {"data":" Downloaded trackio","timestamp":"2026-04-25T19:57:53.136Z"}
data: {"data":" Downloaded nvidia-cuda-runtime","timestamp":"2026-04-25T19:57:53.198Z"}
data: {"data":" Downloaded pygments","timestamp":"2026-04-25T19:57:53.324Z"}
data: {"data":" Downloaded tokenizers","timestamp":"2026-04-25T19:57:53.415Z"}
data: {"data":" Downloaded setuptools","timestamp":"2026-04-25T19:57:53.520Z"}
data: {"data":" Downloaded hf-xet","timestamp":"2026-04-25T19:57:53.612Z"}
data: {"data":" Downloaded networkx","timestamp":"2026-04-25T19:57:53.688Z"}
data: {"data":" Downloaded uvloop","timestamp":"2026-04-25T19:57:53.702Z"}
data: {"data":" Downloaded cuda-bindings","timestamp":"2026-04-25T19:57:54.130Z"}
data: {"data":" Downloaded pillow","timestamp":"2026-04-25T19:57:54.215Z"}
data: {"data":" Downloaded sympy","timestamp":"2026-04-25T19:57:54.570Z"}
data: {"data":" Downloaded nvidia-cuda-cupti","timestamp":"2026-04-25T19:57:54.733Z"}
data: {"data":" Downloaded numpy","timestamp":"2026-04-25T19:57:55.479Z"}
data: {"data":" Downloaded pandas","timestamp":"2026-04-25T19:57:56.503Z"}
data: {"data":" Downloaded transformers","timestamp":"2026-04-25T19:57:56.800Z"}
data: {"data":" Downloaded nvidia-nvjitlink","timestamp":"2026-04-25T19:57:57.075Z"}
data: {"data":" Downloaded nvidia-curand","timestamp":"2026-04-25T19:57:58.175Z"}
data: {"data":" Downloaded nvidia-nvshmem-cu13","timestamp":"2026-04-25T19:57:58.232Z"}
data: {"data":" Downloaded pyarrow","timestamp":"2026-04-25T19:57:58.535Z"}
data: {"data":" Downloaded nvidia-cuda-nvrtc","timestamp":"2026-04-25T19:57:59.905Z"}
data: {"data":" Downloaded nvidia-cusparse","timestamp":"2026-04-25T19:58:02.015Z"}
data: {"data":" Downloaded nvidia-cusparselt-cu13","timestamp":"2026-04-25T19:58:02.624Z"}
data: {"data":" Downloaded nvidia-nccl-cu13","timestamp":"2026-04-25T19:58:03.213Z"}
data: {"data":" Downloaded nvidia-cusolver","timestamp":"2026-04-25T19:58:03.282Z"}
data: {"data":" Downloaded triton","timestamp":"2026-04-25T19:58:03.328Z"}
data: {"data":" Downloaded nvidia-cufft","timestamp":"2026-04-25T19:58:03.404Z"}
data: {"data":" Downloaded nvidia-cudnn-cu13","timestamp":"2026-04-25T19:58:04.451Z"}
data: {"data":" Downloaded nvidia-cublas","timestamp":"2026-04-25T19:58:04.630Z"}
data: {"data":" Downloaded torch","timestamp":"2026-04-25T19:58:23.747Z"}
data: {"data":"Installed 90 packages in 436ms","timestamp":"2026-04-25T19:58:24.184Z"}
data: {"data":"","timestamp":"2026-04-25T19:58:43.478Z"}
data: {"data":"\rconfig.json: 0%| | 0.00/663 [00:00<?, ?B/s]\u001b[A\rconfig.json: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 663/663 [00:00<00:00, 3.33MB/s]","timestamp":"2026-04-25T19:58:43.478Z"}
data: {"data":"","timestamp":"2026-04-25T19:58:43.579Z"}
data: {"data":"\rmodel.safetensors.index.json: 0%| | 0.00/27.8k [00:00<?, ?B/s]\u001b[A\rmodel.safetensors.index.json: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 27.8k/27.8k [00:00<00:00, 84.4MB/s]","timestamp":"2026-04-25T19:58:43.580Z"}
data: {"data":"","timestamp":"2026-04-25T19:58:43.600Z"}
data: {"data":"\rDownloading (incomplete total...): 0.00B [00:00, ?B/s]\u001b[A","timestamp":"2026-04-25T19:58:43.603Z"}
data: {"data":"\rFetching 4 files: 0%| | 0/4 [00:00<?, ?it/s]\u001b[A","timestamp":"2026-04-25T19:58:43.628Z"}
data: {"data":"\rDownloading (incomplete total...): 0%| | 0.00/3.95G [00:00<?, ?B/s]\u001b[A","timestamp":"2026-04-25T19:58:43.629Z"}
data: {"data":"\rDownloading (incomplete total...): 0%| | 0.00/7.81G [00:00<?, ?B/s]\u001b[A","timestamp":"2026-04-25T19:58:43.632Z"}
data: {"data":"\rDownloading (incomplete total...): 0%| | 0.00/11.7G [00:00<?, ?B/s]\u001b[A","timestamp":"2026-04-25T19:58:43.647Z"}
data: {"data":"\rDownloading (incomplete total...): 0%| | 0.00/15.2G [00:00<?, ?B/s]\u001b[A","timestamp":"2026-04-25T19:58:44.605Z"}
data: {"data":"\rDownloading (incomplete total...): 0%| | 0.00/15.2G [00:01<?, ?B/s]\u001b[A","timestamp":"2026-04-25T19:58:44.605Z"}
data: {"data":"\rDownloading (incomplete total...): 0%| | 0.00/15.2G [00:01<?, ?B/s]\u001b[A","timestamp":"2026-04-25T19:58:45.605Z"}
data: {"data":"\rDownloading (incomplete total...): 6%|β–Œ | 916M/15.2G [00:02<00:15, 916MB/s]\u001b[A","timestamp":"2026-04-25T19:58:45.605Z"}
data: {"data":"\rDownloading (incomplete total...): 6%|β–Œ | 916M/15.2G [00:02<00:15, 916MB/s]\u001b[A","timestamp":"2026-04-25T19:58:46.648Z"}
data: {"data":"\rDownloading (incomplete total...): 19%|β–ˆβ–‰ | 2.87G/15.2G [00:03<00:09, 1.36GB/s]\u001b[A","timestamp":"2026-04-25T19:58:47.813Z"}
data: {"data":"\rDownloading (incomplete total...): 34%|β–ˆβ–ˆβ–ˆβ– | 5.19G/15.2G [00:04<00:05, 1.81GB/s]\u001b[A","timestamp":"2026-04-25T19:58:47.813Z"}
data: {"data":"\rDownloading (incomplete total...): 34%|β–ˆβ–ˆβ–ˆβ– | 5.19G/15.2G [00:04<00:05, 1.81GB/s]\u001b[A","timestamp":"2026-04-25T19:58:47.813Z"}
data: {"data":"\rDownloading (incomplete total...): 34%|β–ˆβ–ˆβ–ˆβ– | 5.19G/15.2G [00:04<00:05, 1.81GB/s]\u001b[A","timestamp":"2026-04-25T19:58:48.814Z"}
data: {"data":"\rDownloading (incomplete total...): 56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 8.46G/15.2G [00:05<00:03, 2.25GB/s]\u001b[A","timestamp":"2026-04-25T19:58:48.814Z"}
data: {"data":"\rDownloading (incomplete total...): 56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 8.53G/15.2G [00:05<00:02, 2.57GB/s]\u001b[A","timestamp":"2026-04-25T19:58:49.824Z"}
data: {"data":"\rDownloading (incomplete total...): 74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 11.2G/15.2G [00:06<00:01, 2.59GB/s]\u001b[A","timestamp":"2026-04-25T19:58:50.807Z"}
data: {"data":"\rFetching 4 files: 25%|β–ˆβ–ˆβ–Œ | 1/4 [00:07<00:21, 7.20s/it]\u001b[A\rFetching 4 files: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 4/4 [00:07<00:00, 1.80s/it]","timestamp":"2026-04-25T19:58:50.807Z"}
data: {"data":"","timestamp":"2026-04-25T19:58:50.808Z"}
data: {"data":"\rDownload complete: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 15.2G/15.2G [00:07<00:00, 2.59GB/s] \u001b[A\rDownload complete: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 15.2G/15.2G [00:07<00:00, 2.10GB/s]","timestamp":"2026-04-25T19:58:50.840Z"}
data: {"data":"","timestamp":"2026-04-25T19:58:51.651Z"}
data: {"data":"\rLoading weights: 0%| | 0/339 [00:00<?, ?it/s]\u001b[A","timestamp":"2026-04-25T19:58:52.759Z"}
data: {"data":"\rLoading weights: 15%|β–ˆβ–Œ | 52/339 [00:01<00:06, 46.94it/s]\u001b[A","timestamp":"2026-04-25T19:58:53.854Z"}
data: {"data":"\rLoading weights: 29%|β–ˆβ–ˆβ–‰ | 99/339 [00:02<00:05, 44.68it/s]\u001b[A","timestamp":"2026-04-25T19:58:54.953Z"}
data: {"data":"\rLoading weights: 42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 144/339 [00:03<00:04, 42.95it/s]\u001b[A","timestamp":"2026-04-25T19:58:55.968Z"}
data: {"data":"\rLoading weights: 58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 196/339 [00:04<00:03, 46.17it/s]\u001b[A","timestamp":"2026-04-25T19:58:56.959Z"}
data: {"data":"\rLoading weights: 72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 245/339 [00:05<00:01, 47.14it/s]\u001b[A","timestamp":"2026-04-25T19:58:58.049Z"}
data: {"data":"\rLoading weights: 86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 293/339 [00:06<00:00, 46.05it/s]\u001b[A\rLoading weights: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 339/339 [00:07<00:00, 47.02it/s]","timestamp":"2026-04-25T19:58:58.861Z"}
data: {"data":"","timestamp":"2026-04-25T19:58:58.981Z"}
data: {"data":"\rgeneration_config.json: 0%| | 0.00/243 [00:00<?, ?B/s]\u001b[A\rgeneration_config.json: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 243/243 [00:00<00:00, 1.86MB/s]","timestamp":"2026-04-25T19:58:58.982Z"}
data: {"data":"","timestamp":"2026-04-25T19:58:59.019Z"}
data: {"data":"\rREADME.md: 0%| | 0.00/748 [00:00<?, ?B/s]\u001b[A\rREADME.md: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 748/748 [00:00<00:00, 6.16MB/s]","timestamp":"2026-04-25T19:58:59.019Z"}
data: {"data":"","timestamp":"2026-04-25T19:58:59.236Z"}
data: {"data":"\rdata/train-00000-of-00001.parquet: 0%| | 0.00/30.6M [00:00<?, ?B/s]\u001b[A\rdata/train-00000-of-00001.parquet: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 30.6M/30.6M [00:00<00:00, 146MB/s]","timestamp":"2026-04-25T19:58:59.446Z"}
data: {"data":"","timestamp":"2026-04-25T19:58:59.460Z"}
data: {"data":"\rdata/validation-00000-of-00001.parquet: 0%| | 0.00/648k [00:00<?, ?B/s]\u001b[A\rdata/validation-00000-of-00001.parquet: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 648k/648k [00:00<00:00, 3.09MB/s]","timestamp":"2026-04-25T19:58:59.670Z"}
data: {"data":"","timestamp":"2026-04-25T19:58:59.671Z"}
data: {"data":"\rGenerating train split: 0%| | 0/1335 [00:00<?, ? examples/s]\u001b[A\rGenerating train split: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1335/1335 [00:00<00:00, 19246.80 examples/s]","timestamp":"2026-04-25T19:58:59.741Z"}
data: {"data":"","timestamp":"2026-04-25T19:58:59.741Z"}
data: {"data":"\rGenerating validation split: 0%| | 0/28 [00:00<?, ? examples/s]\u001b[A\rGenerating validation split: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 28/28 [00:00<00:00, 8099.90 examples/s]","timestamp":"2026-04-25T19:58:59.745Z"}
data: {"data":"","timestamp":"2026-04-25T19:58:59.834Z"}
data: {"data":"\rtokenizer_config.json: 0%| | 0.00/7.30k [00:00<?, ?B/s]\u001b[A\rtokenizer_config.json: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 7.30k/7.30k [00:00<00:00, 43.5MB/s]","timestamp":"2026-04-25T19:58:59.835Z"}
data: {"data":"","timestamp":"2026-04-25T19:58:59.958Z"}
data: {"data":"\rvocab.json: 0%| | 0.00/2.78M [00:00<?, ?B/s]\u001b[A\rvocab.json: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 2.78M/2.78M [00:00<00:00, 52.1MB/s]","timestamp":"2026-04-25T19:59:00.011Z"}
data: {"data":"","timestamp":"2026-04-25T19:59:00.050Z"}
data: {"data":"\rmerges.txt: 0%| | 0.00/1.67M [00:00<?, ?B/s]\u001b[A\rmerges.txt: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1.67M/1.67M [00:00<00:00, 26.5MB/s]","timestamp":"2026-04-25T19:59:00.113Z"}
data: {"data":"","timestamp":"2026-04-25T19:59:00.143Z"}
data: {"data":"\rtokenizer.json: 0%| | 0.00/7.03M [00:00<?, ?B/s]\u001b[A\rtokenizer.json: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 7.03M/7.03M [00:00<00:00, 48.2MB/s]","timestamp":"2026-04-25T19:59:00.289Z"}
data: {"data":"","timestamp":"2026-04-25T19:59:02.610Z"}
data: {"data":"\rTokenizing train dataset: 0%| | 0/1335 [00:00<?, ? examples/s]\u001b[A","timestamp":"2026-04-25T19:59:03.622Z"}
data: {"data":"\rTokenizing train dataset: 7%|β–‹ | 97/1335 [00:01<00:12, 95.94 examples/s]\u001b[A","timestamp":"2026-04-25T19:59:04.691Z"}
data: {"data":"\rTokenizing train dataset: 14%|β–ˆβ– | 193/1335 [00:02<00:12, 92.21 examples/s]\u001b[A","timestamp":"2026-04-25T19:59:05.736Z"}
data: {"data":"\rTokenizing train dataset: 22%|β–ˆβ–ˆβ– | 295/1335 [00:03<00:10, 94.69 examples/s]\u001b[A","timestamp":"2026-04-25T19:59:06.882Z"}
data: {"data":"\rTokenizing train dataset: 29%|β–ˆβ–ˆβ–‰ | 393/1335 [00:04<00:10, 90.85 examples/s]\u001b[A","timestamp":"2026-04-25T19:59:07.936Z"}
data: {"data":"\rTokenizing train dataset: 37%|β–ˆβ–ˆβ–ˆβ–‹ | 488/1335 [00:05<00:09, 90.61 examples/s]\u001b[A","timestamp":"2026-04-25T19:59:09.052Z"}
data: {"data":"\rTokenizing train dataset: 44%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 583/1335 [00:06<00:08, 88.70 examples/s]\u001b[A","timestamp":"2026-04-25T19:59:10.097Z"}
data: {"data":"\rTokenizing train dataset: 51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 677/1335 [00:07<00:07, 89.09 examples/s]\u001b[A","timestamp":"2026-04-25T19:59:11.200Z"}
data: {"data":"\rTokenizing train dataset: 58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 768/1335 [00:08<00:06, 86.97 examples/s]\u001b[A","timestamp":"2026-04-25T19:59:12.277Z"}
data: {"data":"\rTokenizing train dataset: 64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 859/1335 [00:09<00:05, 86.20 examples/s]\u001b[A","timestamp":"2026-04-25T19:59:13.350Z"}
data: {"data":"\rTokenizing train dataset: 71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 950/1335 [00:10<00:04, 85.87 examples/s]\u001b[A","timestamp":"2026-04-25T19:59:16.265Z"}
data: {"data":"\rTokenizing train dataset: 78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 1038/1335 [00:13<00:05, 55.53 examples/s]\u001b[A","timestamp":"2026-04-25T19:59:17.300Z"}
data: {"data":"\rTokenizing train dataset: 84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 1116/1335 [00:14<00:03, 59.82 examples/s]\u001b[A","timestamp":"2026-04-25T19:59:18.349Z"}
data: {"data":"\rTokenizing train dataset: 90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1204/1335 [00:15<00:01, 65.55 examples/s]\u001b[A","timestamp":"2026-04-25T19:59:19.385Z"}
data: {"data":"\rTokenizing train dataset: 96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 1280/1335 [00:16<00:00, 67.52 examples/s]\u001b[A\rTokenizing train dataset: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1335/1335 [00:17<00:00, 75.99 examples/s]","timestamp":"2026-04-25T19:59:20.178Z"}
data: {"data":"[transformers] The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None, 'pad_token_id': 151643}.","timestamp":"2026-04-25T19:59:40.471Z"}
data: {"data":"","timestamp":"2026-04-25T19:59:40.744Z"}
data: {"data":"\r 0%| | 0/501 [00:00<?, ?it/s]\u001b[A","timestamp":"2026-04-25T19:59:43.620Z"}
data: {"data":"\r 0%| | 1/501 [00:02<23:57, 2.88s/it]\u001b[A","timestamp":"2026-04-25T19:59:45.658Z"}
data: {"data":"\r 0%| | 2/501 [00:04<19:49, 2.38s/it]\u001b[A","timestamp":"2026-04-25T19:59:47.698Z"}
data: {"data":"\r 1%| | 3/501 [00:06<18:28, 2.23s/it]\u001b[A","timestamp":"2026-04-25T19:59:49.742Z"}
data: {"data":"\r 1%| | 4/501 [00:08<17:50, 2.15s/it]\u001b[A","timestamp":"2026-04-25T19:59:51.788Z"}
data: {"data":"\r 1%| | 5/501 [00:11<17:29, 2.12s/it]\u001b[A","timestamp":"2026-04-25T19:59:53.829Z"}
data: {"data":"\r 1%| | 6/501 [00:13<17:14, 2.09s/it]\u001b[A","timestamp":"2026-04-25T19:59:56.084Z"}
data: {"data":"\r 1%|▏ | 7/501 [00:15<17:39, 2.14s/it]\u001b[A","timestamp":"2026-04-25T19:59:58.138Z"}
data: {"data":"\r 2%|▏ | 8/501 [00:17<17:22, 2.12s/it]\u001b[A","timestamp":"2026-04-25T20:00:00.181Z"}
data: {"data":"\r 2%|▏ | 9/501 [00:19<17:09, 2.09s/it]\u001b[A","timestamp":"2026-04-25T20:00:02.423Z"}
data: {"data":"\r 2%|▏ | 10/501 [00:21<17:30, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:00:02.423Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:00:02.423Z"}
data: {"data":"{'loss': '2.016', 'grad_norm': '0.934', 'learning_rate': '1.964e-05', 'entropy': '1.002', 'num_tokens': '8.094e+04', 'mean_token_accuracy': '0.6603', 'epoch': '0.05988'}","timestamp":"2026-04-25T20:00:02.423Z"}
data: {"data":"\r 2%|▏ | 10/501 [00:21<17:30, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:00:04.669Z"}
data: {"data":"\r 2%|▏ | 11/501 [00:23<17:44, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:00:06.720Z"}
data: {"data":"\r 2%|▏ | 12/501 [00:25<17:24, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:00:08.769Z"}
data: {"data":"\r 3%|β–Ž | 13/501 [00:28<17:09, 2.11s/it]\u001b[A","timestamp":"2026-04-25T20:00:11.025Z"}
data: {"data":"\r 3%|β–Ž | 14/501 [00:30<17:28, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:00:13.075Z"}
data: {"data":"\r 3%|β–Ž | 15/501 [00:32<17:11, 2.12s/it]\u001b[A","timestamp":"2026-04-25T20:00:15.325Z"}
data: {"data":"\r 3%|β–Ž | 16/501 [00:34<17:27, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:00:17.373Z"}
data: {"data":"\r 3%|β–Ž | 17/501 [00:36<17:09, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:00:19.627Z"}
data: {"data":"\r 4%|β–Ž | 18/501 [00:38<17:25, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:00:21.672Z"}
data: {"data":"\r 4%|▍ | 19/501 [00:40<17:06, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:00:23.930Z"}
data: {"data":"\r 4%|▍ | 20/501 [00:43<17:22, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:00:23.930Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:00:23.930Z"}
data: {"data":"{'loss': '1.94', 'grad_norm': '0.9047', 'learning_rate': '1.924e-05', 'entropy': '1.054', 'num_tokens': '1.611e+05', 'mean_token_accuracy': '0.6627', 'epoch': '0.1198'}","timestamp":"2026-04-25T20:00:23.930Z"}
data: {"data":"\r 4%|▍ | 20/501 [00:43<17:22, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:00:25.983Z"}
data: {"data":"\r 4%|▍ | 21/501 [00:45<17:03, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:00:28.035Z"}
data: {"data":"\r 4%|▍ | 22/501 [00:47<16:50, 2.11s/it]\u001b[A","timestamp":"2026-04-25T20:00:30.282Z"}
data: {"data":"\r 5%|▍ | 23/501 [00:49<17:07, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:00:32.541Z"}
data: {"data":"\r 5%|▍ | 24/501 [00:51<17:21, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:00:34.797Z"}
data: {"data":"\r 5%|▍ | 25/501 [00:54<17:29, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:00:37.050Z"}
data: {"data":"\r 5%|β–Œ | 26/501 [00:56<17:34, 2.22s/it]\u001b[A","timestamp":"2026-04-25T20:00:39.306Z"}
data: {"data":"\r 5%|β–Œ | 27/501 [00:58<17:37, 2.23s/it]\u001b[A","timestamp":"2026-04-25T20:00:41.564Z"}
data: {"data":"\r 6%|β–Œ | 28/501 [01:00<17:38, 2.24s/it]\u001b[A","timestamp":"2026-04-25T20:00:43.615Z"}
data: {"data":"\r 6%|β–Œ | 29/501 [01:02<17:10, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:00:45.873Z"}
data: {"data":"\r 6%|β–Œ | 30/501 [01:05<17:18, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:00:45.873Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:00:45.873Z"}
data: {"data":"{'loss': '1.85', 'grad_norm': '0.7995', 'learning_rate': '1.884e-05', 'entropy': '1.125', 'num_tokens': '2.41e+05', 'mean_token_accuracy': '0.6653', 'epoch': '0.1796'}","timestamp":"2026-04-25T20:00:45.873Z"}
data: {"data":"\r 6%|β–Œ | 30/501 [01:05<17:18, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:00:48.129Z"}
data: {"data":"\r 6%|β–Œ | 31/501 [01:07<17:23, 2.22s/it]\u001b[A","timestamp":"2026-04-25T20:00:50.388Z"}
data: {"data":"\r 6%|β–‹ | 32/501 [01:09<17:26, 2.23s/it]\u001b[A","timestamp":"2026-04-25T20:00:52.450Z"}
data: {"data":"\r 7%|β–‹ | 33/501 [01:11<17:00, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:00:54.504Z"}
data: {"data":"\r 7%|β–‹ | 34/501 [01:13<16:40, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:00:56.559Z"}
data: {"data":"\r 7%|β–‹ | 35/501 [01:15<16:26, 2.12s/it]\u001b[A","timestamp":"2026-04-25T20:00:58.823Z"}
data: {"data":"\r 7%|β–‹ | 36/501 [01:18<16:44, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:01:01.086Z"}
data: {"data":"\r 7%|β–‹ | 37/501 [01:20<16:56, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:01:03.357Z"}
data: {"data":"\r 8%|β–Š | 38/501 [01:22<17:05, 2.22s/it]\u001b[A","timestamp":"2026-04-25T20:01:05.412Z"}
data: {"data":"\r 8%|β–Š | 39/501 [01:24<16:41, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:01:07.670Z"}
data: {"data":"{'loss': '1.787', 'grad_norm': '0.5725', 'learning_rate': '1.844e-05', 'entropy': '1.236', 'num_tokens': '3.214e+05', 'mean_token_accuracy': '0.6671', 'epoch': '0.2395'}","timestamp":"2026-04-25T20:01:07.670Z"}
data: {"data":"\r 8%|β–Š | 40/501 [01:26<16:51, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:01:07.670Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:01:07.670Z"}
data: {"data":"\r 8%|β–Š | 40/501 [01:26<16:51, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:01:09.931Z"}
data: {"data":"\r 8%|β–Š | 41/501 [01:29<16:58, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:01:12.203Z"}
data: {"data":"\r 8%|β–Š | 42/501 [01:31<17:04, 2.23s/it]\u001b[A","timestamp":"2026-04-25T20:01:14.477Z"}
data: {"data":"\r 9%|β–Š | 43/501 [01:33<17:07, 2.24s/it]\u001b[A","timestamp":"2026-04-25T20:01:16.751Z"}
data: {"data":"\r 9%|β–‰ | 44/501 [01:36<17:09, 2.25s/it]\u001b[A","timestamp":"2026-04-25T20:01:18.815Z"}
data: {"data":"\r 9%|β–‰ | 45/501 [01:38<16:41, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:01:21.087Z"}
data: {"data":"\r 9%|β–‰ | 46/501 [01:40<16:49, 2.22s/it]\u001b[A","timestamp":"2026-04-25T20:01:23.143Z"}
data: {"data":"\r 9%|β–‰ | 47/501 [01:42<16:25, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:01:25.408Z"}
data: {"data":"\r 10%|β–‰ | 48/501 [01:44<16:36, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:01:27.467Z"}
data: {"data":"\r 10%|β–‰ | 49/501 [01:46<16:14, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:01:29.532Z"}
data: {"data":"\r 10%|β–‰ | 50/501 [01:48<16:00, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:01:29.533Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:01:29.533Z"}
data: {"data":"{'loss': '1.71', 'grad_norm': '0.4529', 'learning_rate': '1.804e-05', 'entropy': '1.288', 'num_tokens': '4.017e+05', 'mean_token_accuracy': '0.6737', 'epoch': '0.2994'}","timestamp":"2026-04-25T20:01:29.533Z"}
data: {"data":"\r 10%|β–‰ | 50/501 [01:48<16:00, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:01:31.594Z"}
data: {"data":"\r 10%|β–ˆ | 51/501 [01:50<15:49, 2.11s/it]\u001b[A","timestamp":"2026-04-25T20:01:33.856Z"}
data: {"data":"\r 10%|β–ˆ | 52/501 [01:53<16:07, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:01:35.923Z"}
data: {"data":"\r 11%|β–ˆ | 53/501 [01:55<15:53, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:01:38.180Z"}
data: {"data":"\r 11%|β–ˆ | 54/501 [01:57<16:08, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:01:40.440Z"}
data: {"data":"\r 11%|β–ˆ | 55/501 [01:59<16:18, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:01:42.519Z"}
data: {"data":"\r 11%|β–ˆ | 56/501 [02:01<16:01, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:01:44.584Z"}
data: {"data":"\r 11%|β–ˆβ– | 57/501 [02:03<15:46, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:01:46.852Z"}
data: {"data":"\r 12%|β–ˆβ– | 58/501 [02:06<16:02, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:01:48.922Z"}
data: {"data":"\r 12%|β–ˆβ– | 59/501 [02:08<15:46, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:01:50.980Z"}
data: {"data":"\r 12%|β–ˆβ– | 60/501 [02:10<15:33, 2.12s/it]\u001b[A","timestamp":"2026-04-25T20:01:50.981Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:01:50.981Z"}
data: {"data":"{'loss': '1.671', 'grad_norm': '0.4426', 'learning_rate': '1.764e-05', 'entropy': '1.369', 'num_tokens': '4.81e+05', 'mean_token_accuracy': '0.6733', 'epoch': '0.3593'}","timestamp":"2026-04-25T20:01:50.981Z"}
data: {"data":"\r 12%|β–ˆβ– | 60/501 [02:10<15:33, 2.12s/it]\u001b[A","timestamp":"2026-04-25T20:01:53.242Z"}
data: {"data":"\r 12%|β–ˆβ– | 61/501 [02:12<15:50, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:01:55.296Z"}
data: {"data":"\r 12%|β–ˆβ– | 62/501 [02:14<15:34, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:01:57.563Z"}
data: {"data":"\r 13%|β–ˆβ–Ž | 63/501 [02:16<15:50, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:01:59.836Z"}
data: {"data":"\r 13%|β–ˆβ–Ž | 64/501 [02:19<16:01, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:02:02.104Z"}
data: {"data":"\r 13%|β–ˆβ–Ž | 65/501 [02:21<16:08, 2.22s/it]\u001b[A","timestamp":"2026-04-25T20:02:04.167Z"}
data: {"data":"\r 13%|β–ˆβ–Ž | 66/501 [02:23<15:45, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:02:06.225Z"}
data: {"data":"\r 13%|β–ˆβ–Ž | 67/501 [02:25<15:28, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:02:08.503Z"}
data: {"data":"\r 14%|β–ˆβ–Ž | 68/501 [02:27<15:44, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:02:10.563Z"}
data: {"data":"\r 14%|β–ˆβ– | 69/501 [02:29<15:26, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:02:12.626Z"}
data: {"data":"\r 14%|β–ˆβ– | 70/501 [02:31<15:13, 2.12s/it]\u001b[A","timestamp":"2026-04-25T20:02:12.626Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:02:12.626Z"}
data: {"data":"{'loss': '1.621', 'grad_norm': '0.4408', 'learning_rate': '1.725e-05', 'entropy': '1.402', 'num_tokens': '5.606e+05', 'mean_token_accuracy': '0.6754', 'epoch': '0.4192'}","timestamp":"2026-04-25T20:02:12.626Z"}
data: {"data":"\r 14%|β–ˆβ– | 70/501 [02:31<15:13, 2.12s/it]\u001b[A","timestamp":"2026-04-25T20:02:14.889Z"}
data: {"data":"\r 14%|β–ˆβ– | 71/501 [02:34<15:30, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:02:17.158Z"}
data: {"data":"\r 14%|β–ˆβ– | 72/501 [02:36<15:41, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:02:19.431Z"}
data: {"data":"\r 15%|β–ˆβ– | 73/501 [02:38<15:49, 2.22s/it]\u001b[A","timestamp":"2026-04-25T20:02:21.495Z"}
data: {"data":"\r 15%|β–ˆβ– | 74/501 [02:40<15:27, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:02:23.556Z"}
data: {"data":"\r 15%|β–ˆβ– | 75/501 [02:42<15:11, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:02:25.825Z"}
data: {"data":"\r 15%|β–ˆβ–Œ | 76/501 [02:45<15:25, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:02:28.100Z"}
data: {"data":"\r 15%|β–ˆβ–Œ | 77/501 [02:47<15:35, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:02:30.362Z"}
data: {"data":"\r 16%|β–ˆβ–Œ | 78/501 [02:49<15:40, 2.22s/it]\u001b[A","timestamp":"2026-04-25T20:02:32.629Z"}
data: {"data":"\r 16%|β–ˆβ–Œ | 79/501 [02:51<15:43, 2.24s/it]\u001b[A","timestamp":"2026-04-25T20:02:34.697Z"}
data: {"data":"\r 16%|β–ˆβ–Œ | 80/501 [02:53<15:20, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:02:34.698Z"}
data: {"data":"{'loss': '1.576', 'grad_norm': '0.3904', 'learning_rate': '1.685e-05', 'entropy': '1.434', 'num_tokens': '6.407e+05', 'mean_token_accuracy': '0.6783', 'epoch': '0.479'}","timestamp":"2026-04-25T20:02:34.698Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:02:34.698Z"}
data: {"data":"\r 16%|β–ˆβ–Œ | 80/501 [02:53<15:20, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:02:36.960Z"}
data: {"data":"\r 16%|β–ˆβ–Œ | 81/501 [02:56<15:27, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:02:39.222Z"}
data: {"data":"\r 16%|β–ˆβ–‹ | 82/501 [02:58<15:32, 2.23s/it]\u001b[A","timestamp":"2026-04-25T20:02:41.484Z"}
data: {"data":"\r 17%|β–ˆβ–‹ | 83/501 [03:00<15:34, 2.24s/it]\u001b[A","timestamp":"2026-04-25T20:02:43.748Z"}
data: {"data":"\r 17%|β–ˆβ–‹ | 84/501 [03:03<15:35, 2.24s/it]\u001b[A","timestamp":"2026-04-25T20:02:45.803Z"}
data: {"data":"\r 17%|β–ˆβ–‹ | 85/501 [03:05<15:10, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:02:48.061Z"}
data: {"data":"\r 17%|β–ˆβ–‹ | 86/501 [03:07<15:16, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:02:50.120Z"}
data: {"data":"\r 17%|β–ˆβ–‹ | 87/501 [03:09<14:55, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:02:52.397Z"}
data: {"data":"\r 18%|β–ˆβ–Š | 88/501 [03:11<15:07, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:02:54.452Z"}
data: {"data":"\r 18%|β–ˆβ–Š | 89/501 [03:13<14:47, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:02:56.713Z"}
data: {"data":"\r 18%|β–ˆβ–Š | 90/501 [03:15<14:58, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:02:56.714Z"}
data: {"data":"{'loss': '1.54', 'grad_norm': '0.4359', 'learning_rate': '1.645e-05', 'entropy': '1.451', 'num_tokens': '7.193e+05', 'mean_token_accuracy': '0.6835', 'epoch': '0.5389'}","timestamp":"2026-04-25T20:02:56.714Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:02:56.714Z"}
data: {"data":"\r 18%|β–ˆβ–Š | 90/501 [03:15<14:58, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:02:58.987Z"}
data: {"data":"\r 18%|β–ˆβ–Š | 91/501 [03:18<15:07, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:03:01.050Z"}
data: {"data":"\r 18%|β–ˆβ–Š | 92/501 [03:20<14:46, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:03:03.118Z"}
data: {"data":"\r 19%|β–ˆβ–Š | 93/501 [03:22<14:32, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:03:05.189Z"}
data: {"data":"\r 19%|β–ˆβ–‰ | 94/501 [03:24<14:22, 2.12s/it]\u001b[A","timestamp":"2026-04-25T20:03:07.261Z"}
data: {"data":"\r 19%|β–ˆβ–‰ | 95/501 [03:26<14:14, 2.10s/it]\u001b[A","timestamp":"2026-04-25T20:03:09.323Z"}
data: {"data":"\r 19%|β–ˆβ–‰ | 96/501 [03:28<14:07, 2.09s/it]\u001b[A","timestamp":"2026-04-25T20:03:11.383Z"}
data: {"data":"\r 19%|β–ˆβ–‰ | 97/501 [03:30<14:01, 2.08s/it]\u001b[A","timestamp":"2026-04-25T20:03:13.648Z"}
data: {"data":"\r 20%|β–ˆβ–‰ | 98/501 [03:32<14:21, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:03:15.707Z"}
data: {"data":"\r 20%|β–ˆβ–‰ | 99/501 [03:34<14:09, 2.11s/it]\u001b[A","timestamp":"2026-04-25T20:03:17.779Z"}
data: {"data":"\r 20%|β–ˆβ–‰ | 100/501 [03:37<14:02, 2.10s/it]\u001b[A","timestamp":"2026-04-25T20:03:17.779Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:03:17.779Z"}
data: {"data":"{'loss': '1.461', 'grad_norm': '0.4393', 'learning_rate': '1.605e-05', 'entropy': '1.456', 'num_tokens': '7.998e+05', 'mean_token_accuracy': '0.6981', 'epoch': '0.5988'}","timestamp":"2026-04-25T20:03:17.779Z"}
data: {"data":"\r 20%|β–ˆβ–‰ | 100/501 [03:37<14:02, 2.10s/it]\u001b[A","timestamp":"2026-04-25T20:03:19.846Z"}
data: {"data":"\r 20%|β–ˆβ–ˆ | 101/501 [03:39<13:56, 2.09s/it]\u001b[A","timestamp":"2026-04-25T20:03:22.109Z"}
data: {"data":"\r 20%|β–ˆβ–ˆ | 102/501 [03:41<14:14, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:03:24.372Z"}
data: {"data":"\r 21%|β–ˆβ–ˆ | 103/501 [03:43<14:27, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:03:26.434Z"}
data: {"data":"\r 21%|β–ˆβ–ˆ | 104/501 [03:45<14:11, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:03:28.693Z"}
data: {"data":"\r 21%|β–ˆβ–ˆ | 105/501 [03:47<14:22, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:03:30.957Z"}
data: {"data":"\r 21%|β–ˆβ–ˆ | 106/501 [03:50<14:30, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:03:33.012Z"}
data: {"data":"\r 21%|β–ˆβ–ˆβ– | 107/501 [03:52<14:10, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:03:35.291Z"}
data: {"data":"\r 22%|β–ˆβ–ˆβ– | 108/501 [03:54<14:22, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:03:37.354Z"}
data: {"data":"\r 22%|β–ˆβ–ˆβ– | 109/501 [03:56<14:04, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:03:39.421Z"}
data: {"data":"\r 22%|β–ˆβ–ˆβ– | 110/501 [03:58<13:52, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:03:39.421Z"}
data: {"data":"{'loss': '1.413', 'grad_norm': '0.503', 'learning_rate': '1.565e-05', 'entropy': '1.44', 'num_tokens': '8.798e+05', 'mean_token_accuracy': '0.7075', 'epoch': '0.6587'}","timestamp":"2026-04-25T20:03:39.421Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:03:39.421Z"}
data: {"data":"\r 22%|β–ˆβ–ˆβ– | 110/501 [03:58<13:52, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:03:41.689Z"}
data: {"data":"\r 22%|β–ˆβ–ˆβ– | 111/501 [04:00<14:06, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:03:43.958Z"}
data: {"data":"\r 22%|β–ˆβ–ˆβ– | 112/501 [04:03<14:15, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:03:46.023Z"}
data: {"data":"\r 23%|β–ˆβ–ˆβ–Ž | 113/501 [04:05<13:57, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:03:48.092Z"}
data: {"data":"\r 23%|β–ˆβ–ˆβ–Ž | 114/501 [04:07<13:45, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:03:50.366Z"}
data: {"data":"\r 23%|β–ˆβ–ˆβ–Ž | 115/501 [04:09<13:59, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:03:52.638Z"}
data: {"data":"\r 23%|β–ˆβ–ˆβ–Ž | 116/501 [04:11<14:08, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:03:54.907Z"}
data: {"data":"\r 23%|β–ˆβ–ˆβ–Ž | 117/501 [04:14<14:13, 2.22s/it]\u001b[A","timestamp":"2026-04-25T20:03:57.174Z"}
data: {"data":"\r 24%|β–ˆβ–ˆβ–Ž | 118/501 [04:16<14:16, 2.24s/it]\u001b[A","timestamp":"2026-04-25T20:03:59.238Z"}
data: {"data":"\r 24%|β–ˆβ–ˆβ– | 119/501 [04:18<13:54, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:04:01.295Z"}
data: {"data":"\r 24%|β–ˆβ–ˆβ– | 120/501 [04:20<13:37, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:04:01.296Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:04:01.296Z"}
data: {"data":"{'loss': '1.351', 'grad_norm': '0.6165', 'learning_rate': '1.525e-05', 'entropy': '1.411', 'num_tokens': '9.6e+05', 'mean_token_accuracy': '0.7185', 'epoch': '0.7186'}","timestamp":"2026-04-25T20:04:01.296Z"}
data: {"data":"\r 24%|β–ˆβ–ˆβ– | 120/501 [04:20<13:37, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:04:03.562Z"}
data: {"data":"\r 24%|β–ˆβ–ˆβ– | 121/501 [04:22<13:49, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:04:05.626Z"}
data: {"data":"\r 24%|β–ˆβ–ˆβ– | 122/501 [04:24<13:33, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:04:07.697Z"}
data: {"data":"\r 25%|β–ˆβ–ˆβ– | 123/501 [04:26<13:23, 2.12s/it]\u001b[A","timestamp":"2026-04-25T20:04:09.964Z"}
data: {"data":"\r 25%|β–ˆβ–ˆβ– | 124/501 [04:29<13:37, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:04:12.232Z"}
data: {"data":"\r 25%|β–ˆβ–ˆβ– | 125/501 [04:31<13:46, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:04:14.500Z"}
data: {"data":"\r 25%|β–ˆβ–ˆβ–Œ | 126/501 [04:33<13:51, 2.22s/it]\u001b[A","timestamp":"2026-04-25T20:04:16.773Z"}
data: {"data":"\r 25%|β–ˆβ–ˆβ–Œ | 127/501 [04:36<13:55, 2.23s/it]\u001b[A","timestamp":"2026-04-25T20:04:19.045Z"}
data: {"data":"\r 26%|β–ˆβ–ˆβ–Œ | 128/501 [04:38<13:57, 2.25s/it]\u001b[A","timestamp":"2026-04-25T20:04:21.317Z"}
data: {"data":"\r 26%|β–ˆβ–ˆβ–Œ | 129/501 [04:40<13:58, 2.25s/it]\u001b[A","timestamp":"2026-04-25T20:04:23.387Z"}
data: {"data":"\r 26%|β–ˆβ–ˆβ–Œ | 130/501 [04:42<13:35, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:04:23.387Z"}
data: {"data":"{'loss': '1.286', 'grad_norm': '0.6028', 'learning_rate': '1.485e-05', 'entropy': '1.34', 'num_tokens': '1.039e+06', 'mean_token_accuracy': '0.7231', 'epoch': '0.7784'}","timestamp":"2026-04-25T20:04:23.387Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:04:23.387Z"}
data: {"data":"\r 26%|β–ˆβ–ˆβ–Œ | 130/501 [04:42<13:35, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:04:25.442Z"}
data: {"data":"\r 26%|β–ˆβ–ˆβ–Œ | 131/501 [04:44<13:17, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:04:27.707Z"}
data: {"data":"\r 26%|β–ˆβ–ˆβ–‹ | 132/501 [04:46<13:27, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:04:29.979Z"}
data: {"data":"\r 27%|β–ˆβ–ˆβ–‹ | 133/501 [04:49<13:34, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:04:32.049Z"}
data: {"data":"\r 27%|β–ˆβ–ˆβ–‹ | 134/501 [04:51<13:16, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:04:34.120Z"}
data: {"data":"\r 27%|β–ˆβ–ˆβ–‹ | 135/501 [04:53<13:03, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:04:36.398Z"}
data: {"data":"\r 27%|β–ˆβ–ˆβ–‹ | 136/501 [04:55<13:16, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:04:38.468Z"}
data: {"data":"\r 27%|β–ˆβ–ˆβ–‹ | 137/501 [04:57<13:01, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:04:40.536Z"}
data: {"data":"\r 28%|β–ˆβ–ˆβ–Š | 138/501 [04:59<12:51, 2.12s/it]\u001b[A","timestamp":"2026-04-25T20:04:42.589Z"}
data: {"data":"\r 28%|β–ˆβ–ˆβ–Š | 139/501 [05:01<12:41, 2.10s/it]\u001b[A","timestamp":"2026-04-25T20:04:44.855Z"}
data: {"data":"\r 28%|β–ˆβ–ˆβ–Š | 140/501 [05:04<12:56, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:04:44.855Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:04:44.855Z"}
data: {"data":"{'loss': '1.206', 'grad_norm': '0.7572', 'learning_rate': '1.445e-05', 'entropy': '1.26', 'num_tokens': '1.119e+06', 'mean_token_accuracy': '0.73', 'epoch': '0.8383'}","timestamp":"2026-04-25T20:04:44.855Z"}
data: {"data":"\r 28%|β–ˆβ–ˆβ–Š | 140/501 [05:04<12:56, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:04:47.115Z"}
data: {"data":"\r 28%|β–ˆβ–ˆβ–Š | 141/501 [05:06<13:06, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:04:49.390Z"}
data: {"data":"\r 28%|β–ˆβ–ˆβ–Š | 142/501 [05:08<13:13, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:04:51.457Z"}
data: {"data":"\r 29%|β–ˆβ–ˆβ–Š | 143/501 [05:10<12:56, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:04:53.519Z"}
data: {"data":"\r 29%|β–ˆβ–ˆβ–Š | 144/501 [05:12<12:42, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:04:55.795Z"}
data: {"data":"\r 29%|β–ˆβ–ˆβ–‰ | 145/501 [05:15<12:55, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:04:58.075Z"}
data: {"data":"\r 29%|β–ˆβ–ˆβ–‰ | 146/501 [05:17<13:04, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:05:00.131Z"}
data: {"data":"\r 29%|β–ˆβ–ˆβ–‰ | 147/501 [05:19<12:45, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:05:02.195Z"}
data: {"data":"\r 30%|β–ˆβ–ˆβ–‰ | 148/501 [05:21<12:33, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:05:04.255Z"}
data: {"data":"\r 30%|β–ˆβ–ˆβ–‰ | 149/501 [05:23<12:23, 2.11s/it]\u001b[A","timestamp":"2026-04-25T20:05:06.322Z"}
data: {"data":"\r 30%|β–ˆβ–ˆβ–‰ | 150/501 [05:25<12:16, 2.10s/it]\u001b[A","timestamp":"2026-04-25T20:05:06.323Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:05:06.323Z"}
data: {"data":"{'loss': '1.088', 'grad_norm': '0.724', 'learning_rate': '1.405e-05', 'entropy': '1.175', 'num_tokens': '1.199e+06', 'mean_token_accuracy': '0.7461', 'epoch': '0.8982'}","timestamp":"2026-04-25T20:05:06.323Z"}
data: {"data":"\r 30%|β–ˆβ–ˆβ–‰ | 150/501 [05:25<12:16, 2.10s/it]\u001b[A","timestamp":"2026-04-25T20:05:08.387Z"}
data: {"data":"\r 30%|β–ˆβ–ˆβ–ˆ | 151/501 [05:27<12:10, 2.09s/it]\u001b[A","timestamp":"2026-04-25T20:05:10.652Z"}
data: {"data":"\r 30%|β–ˆβ–ˆβ–ˆ | 152/501 [05:29<12:27, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:05:12.709Z"}
data: {"data":"\r 31%|β–ˆβ–ˆβ–ˆ | 153/501 [05:31<12:16, 2.12s/it]\u001b[A","timestamp":"2026-04-25T20:05:14.968Z"}
data: {"data":"\r 31%|β–ˆβ–ˆβ–ˆ | 154/501 [05:34<12:29, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:05:17.031Z"}
data: {"data":"\r 31%|β–ˆβ–ˆβ–ˆ | 155/501 [05:36<12:17, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:05:19.300Z"}
data: {"data":"\r 31%|β–ˆβ–ˆβ–ˆ | 156/501 [05:38<12:29, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:05:21.360Z"}
data: {"data":"\r 31%|β–ˆβ–ˆβ–ˆβ– | 157/501 [05:40<12:15, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:05:23.413Z"}
data: {"data":"\r 32%|β–ˆβ–ˆβ–ˆβ– | 158/501 [05:42<12:04, 2.11s/it]\u001b[A","timestamp":"2026-04-25T20:05:25.681Z"}
data: {"data":"\r 32%|β–ˆβ–ˆβ–ˆβ– | 159/501 [05:44<12:18, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:05:27.941Z"}
data: {"data":"\r 32%|β–ˆβ–ˆβ–ˆβ– | 160/501 [05:47<12:26, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:05:27.941Z"}
data: {"data":"{'loss': '0.9598', 'grad_norm': '1.037', 'learning_rate': '1.365e-05', 'entropy': '1.044', 'num_tokens': '1.28e+06', 'mean_token_accuracy': '0.7734', 'epoch': '0.9581'}","timestamp":"2026-04-25T20:05:27.941Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:05:27.941Z"}
data: {"data":"\r 32%|β–ˆβ–ˆβ–ˆβ– | 160/501 [05:47<12:26, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:05:30.214Z"}
data: {"data":"\r 32%|β–ˆβ–ˆβ–ˆβ– | 161/501 [05:49<12:32, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:05:32.279Z"}
data: {"data":"\r 32%|β–ˆβ–ˆβ–ˆβ– | 162/501 [05:51<12:15, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:05:34.338Z"}
data: {"data":"\r 33%|β–ˆβ–ˆβ–ˆβ–Ž | 163/501 [05:53<12:02, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:05:36.602Z"}
data: {"data":"\r 33%|β–ˆβ–ˆβ–ˆβ–Ž | 164/501 [05:55<12:12, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:05:38.869Z"}
data: {"data":"\r 33%|β–ˆβ–ˆβ–ˆβ–Ž | 165/501 [05:58<12:20, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:05:40.927Z"}
data: {"data":"\r 33%|β–ˆβ–ˆβ–ˆβ–Ž | 166/501 [06:00<12:03, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:05:42.727Z"}
data: {"data":"\r 33%|β–ˆβ–ˆβ–ˆβ–Ž | 167/501 [06:01<11:25, 2.05s/it]\u001b[A","timestamp":"2026-04-25T20:05:44.815Z"}
data: {"data":"\r 34%|β–ˆβ–ˆβ–ˆβ–Ž | 168/501 [06:04<11:26, 2.06s/it]\u001b[A","timestamp":"2026-04-25T20:05:46.873Z"}
data: {"data":"\r 34%|β–ˆβ–ˆβ–ˆβ–Ž | 169/501 [06:06<11:24, 2.06s/it]\u001b[A","timestamp":"2026-04-25T20:05:49.144Z"}
data: {"data":"\r 34%|β–ˆβ–ˆβ–ˆβ– | 170/501 [06:08<11:43, 2.12s/it]\u001b[A","timestamp":"2026-04-25T20:05:49.144Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:05:49.144Z"}
data: {"data":"{'loss': '0.8251', 'grad_norm': '0.9347', 'learning_rate': '1.325e-05', 'entropy': '0.9091', 'num_tokens': '1.36e+06', 'mean_token_accuracy': '0.8098', 'epoch': '1.018'}","timestamp":"2026-04-25T20:05:49.144Z"}
data: {"data":"\r 34%|β–ˆβ–ˆβ–ˆβ– | 170/501 [06:08<11:43, 2.12s/it]\u001b[A","timestamp":"2026-04-25T20:05:51.415Z"}
data: {"data":"\r 34%|β–ˆβ–ˆβ–ˆβ– | 171/501 [06:10<11:55, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:05:53.475Z"}
data: {"data":"\r 34%|β–ˆβ–ˆβ–ˆβ– | 172/501 [06:12<11:42, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:05:55.743Z"}
data: {"data":"\r 35%|β–ˆβ–ˆβ–ˆβ– | 173/501 [06:14<11:53, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:05:57.804Z"}
data: {"data":"\r 35%|β–ˆβ–ˆβ–ˆβ– | 174/501 [06:17<11:40, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:06:00.071Z"}
data: {"data":"\r 35%|β–ˆβ–ˆβ–ˆβ– | 175/501 [06:19<11:50, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:06:02.328Z"}
data: {"data":"\r 35%|β–ˆβ–ˆβ–ˆβ–Œ | 176/501 [06:21<11:55, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:06:04.597Z"}
data: {"data":"\r 35%|β–ˆβ–ˆβ–ˆβ–Œ | 177/501 [06:23<12:00, 2.22s/it]\u001b[A","timestamp":"2026-04-25T20:06:06.657Z"}
data: {"data":"\r 36%|β–ˆβ–ˆβ–ˆβ–Œ | 178/501 [06:25<11:42, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:06:08.922Z"}
data: {"data":"\r 36%|β–ˆβ–ˆβ–ˆβ–Œ | 179/501 [06:28<11:48, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:06:10.981Z"}
data: {"data":"{'loss': '0.7125', 'grad_norm': '0.9415', 'learning_rate': '1.285e-05', 'entropy': '0.7939', 'num_tokens': '1.44e+06', 'mean_token_accuracy': '0.8415', 'epoch': '1.078'}","timestamp":"2026-04-25T20:06:10.981Z"}
data: {"data":"\r 36%|β–ˆβ–ˆβ–ˆβ–Œ | 180/501 [06:30<11:32, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:06:10.981Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:06:10.981Z"}
data: {"data":"\r 36%|β–ˆβ–ˆβ–ˆβ–Œ | 180/501 [06:30<11:32, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:06:13.250Z"}
data: {"data":"\r 36%|β–ˆβ–ˆβ–ˆβ–Œ | 181/501 [06:32<11:41, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:06:15.316Z"}
data: {"data":"\r 36%|β–ˆβ–ˆβ–ˆβ–‹ | 182/501 [06:34<11:27, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:06:17.371Z"}
data: {"data":"\r 37%|β–ˆβ–ˆβ–ˆβ–‹ | 183/501 [06:36<11:15, 2.12s/it]\u001b[A","timestamp":"2026-04-25T20:06:19.636Z"}
data: {"data":"\r 37%|β–ˆβ–ˆβ–ˆβ–‹ | 184/501 [06:38<11:26, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:06:21.901Z"}
data: {"data":"\r 37%|β–ˆβ–ˆβ–ˆβ–‹ | 185/501 [06:41<11:33, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:06:23.962Z"}
data: {"data":"\r 37%|β–ˆβ–ˆβ–ˆβ–‹ | 186/501 [06:43<11:18, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:06:26.335Z"}
data: {"data":"\r 37%|β–ˆβ–ˆβ–ˆβ–‹ | 187/501 [06:45<11:37, 2.22s/it]\u001b[A","timestamp":"2026-04-25T20:06:28.598Z"}
data: {"data":"\r 38%|β–ˆβ–ˆβ–ˆβ–Š | 188/501 [06:47<11:39, 2.23s/it]\u001b[A","timestamp":"2026-04-25T20:06:30.862Z"}
data: {"data":"\r 38%|β–ˆβ–ˆβ–ˆβ–Š | 189/501 [06:50<11:39, 2.24s/it]\u001b[A","timestamp":"2026-04-25T20:06:33.129Z"}
data: {"data":"\r 38%|β–ˆβ–ˆβ–ˆβ–Š | 190/501 [06:52<11:39, 2.25s/it]\u001b[A","timestamp":"2026-04-25T20:06:33.129Z"}
data: {"data":"{'loss': '0.5916', 'grad_norm': '0.7555', 'learning_rate': '1.246e-05', 'entropy': '0.6653', 'num_tokens': '1.52e+06', 'mean_token_accuracy': '0.8737', 'epoch': '1.138'}","timestamp":"2026-04-25T20:06:33.129Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:06:33.129Z"}
data: {"data":"\r 38%|β–ˆβ–ˆβ–ˆβ–Š | 190/501 [06:52<11:39, 2.25s/it]\u001b[A","timestamp":"2026-04-25T20:06:35.184Z"}
data: {"data":"\r 38%|β–ˆβ–ˆβ–ˆβ–Š | 191/501 [06:54<11:19, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:06:37.443Z"}
data: {"data":"\r 38%|β–ˆβ–ˆβ–ˆβ–Š | 192/501 [06:56<11:23, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:06:39.713Z"}
data: {"data":"\r 39%|β–ˆβ–ˆβ–ˆβ–Š | 193/501 [06:58<11:26, 2.23s/it]\u001b[A","timestamp":"2026-04-25T20:06:41.770Z"}
data: {"data":"\r 39%|β–ˆβ–ˆβ–ˆβ–Š | 194/501 [07:01<11:08, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:06:44.029Z"}
data: {"data":"\r 39%|β–ˆβ–ˆβ–ˆβ–‰ | 195/501 [07:03<11:13, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:06:46.280Z"}
data: {"data":"\r 39%|β–ˆβ–ˆβ–ˆβ–‰ | 196/501 [07:05<11:16, 2.22s/it]\u001b[A","timestamp":"2026-04-25T20:06:48.545Z"}
data: {"data":"\r 39%|β–ˆβ–ˆβ–ˆβ–‰ | 197/501 [07:07<11:18, 2.23s/it]\u001b[A","timestamp":"2026-04-25T20:06:50.614Z"}
data: {"data":"\r 40%|β–ˆβ–ˆβ–ˆβ–‰ | 198/501 [07:09<11:01, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:06:52.679Z"}
data: {"data":"\r 40%|β–ˆβ–ˆβ–ˆβ–‰ | 199/501 [07:11<10:48, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:06:54.741Z"}
data: {"data":"\r 40%|β–ˆβ–ˆβ–ˆβ–‰ | 200/501 [07:13<10:38, 2.12s/it]\u001b[A","timestamp":"2026-04-25T20:06:54.741Z"}
data: {"data":"{'loss': '0.5276', 'grad_norm': '0.7433', 'learning_rate': '1.206e-05', 'entropy': '0.5887', 'num_tokens': '1.6e+06', 'mean_token_accuracy': '0.888', 'epoch': '1.198'}","timestamp":"2026-04-25T20:06:54.741Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:06:54.741Z"}
data: {"data":"\r 40%|β–ˆβ–ˆβ–ˆβ–‰ | 200/501 [07:13<10:38, 2.12s/it]\u001b[A","timestamp":"2026-04-25T20:06:57.007Z"}
data: {"data":"\r 40%|β–ˆβ–ˆβ–ˆβ–ˆ | 201/501 [07:16<10:49, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:06:59.284Z"}
data: {"data":"\r 40%|β–ˆβ–ˆβ–ˆβ–ˆ | 202/501 [07:18<10:57, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:07:01.346Z"}
data: {"data":"\r 41%|β–ˆβ–ˆβ–ˆβ–ˆ | 203/501 [07:20<10:43, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:07:03.403Z"}
data: {"data":"\r 41%|β–ˆβ–ˆβ–ˆβ–ˆ | 204/501 [07:22<10:31, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:07:05.675Z"}
data: {"data":"\r 41%|β–ˆβ–ˆβ–ˆβ–ˆ | 205/501 [07:24<10:42, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:07:07.742Z"}
data: {"data":"\r 41%|β–ˆβ–ˆβ–ˆβ–ˆ | 206/501 [07:26<10:31, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:07:10.007Z"}
data: {"data":"\r 41%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 207/501 [07:29<10:40, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:07:12.274Z"}
data: {"data":"\r 42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 208/501 [07:31<10:45, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:07:14.333Z"}
data: {"data":"\r 42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 209/501 [07:33<10:30, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:07:16.600Z"}
data: {"data":"\r 42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 210/501 [07:35<10:38, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:07:16.600Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:07:16.600Z"}
data: {"data":"{'loss': '0.4512', 'grad_norm': '0.6285', 'learning_rate': '1.166e-05', 'entropy': '0.508', 'num_tokens': '1.68e+06', 'mean_token_accuracy': '0.904', 'epoch': '1.257'}","timestamp":"2026-04-25T20:07:16.600Z"}
data: {"data":"\r 42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 210/501 [07:35<10:38, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:07:18.662Z"}
data: {"data":"\r 42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 211/501 [07:37<10:24, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:07:20.917Z"}
data: {"data":"\r 42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 212/501 [07:40<10:31, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:07:23.193Z"}
data: {"data":"\r 43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 213/501 [07:42<10:36, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:07:25.459Z"}
data: {"data":"\r 43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 214/501 [07:44<10:39, 2.23s/it]\u001b[A","timestamp":"2026-04-25T20:07:27.720Z"}
data: {"data":"\r 43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 215/501 [07:46<10:39, 2.24s/it]\u001b[A","timestamp":"2026-04-25T20:07:29.989Z"}
data: {"data":"\r 43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 216/501 [07:49<10:40, 2.25s/it]\u001b[A","timestamp":"2026-04-25T20:07:32.049Z"}
data: {"data":"\r 43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 217/501 [07:51<10:22, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:07:34.115Z"}
data: {"data":"\r 44%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 218/501 [07:53<10:09, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:07:36.166Z"}
data: {"data":"\r 44%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 219/501 [07:55<09:58, 2.12s/it]\u001b[A","timestamp":"2026-04-25T20:07:38.429Z"}
data: {"data":"\r 44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 220/501 [07:57<10:08, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:07:38.429Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:07:38.429Z"}
data: {"data":"{'loss': '0.4343', 'grad_norm': '0.5901', 'learning_rate': '1.126e-05', 'entropy': '0.4829', 'num_tokens': '1.76e+06', 'mean_token_accuracy': '0.9058', 'epoch': '1.317'}","timestamp":"2026-04-25T20:07:38.429Z"}
data: {"data":"\r 44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 220/501 [07:57<10:08, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:07:40.684Z"}
data: {"data":"\r 44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 221/501 [07:59<10:13, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:07:42.743Z"}
data: {"data":"\r 44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 222/501 [08:01<10:00, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:07:45.007Z"}
data: {"data":"\r 45%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 223/501 [08:04<10:07, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:07:47.278Z"}
data: {"data":"\r 45%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 224/501 [08:06<10:12, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:07:49.536Z"}
data: {"data":"\r 45%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 225/501 [08:08<10:14, 2.23s/it]\u001b[A","timestamp":"2026-04-25T20:07:51.806Z"}
data: {"data":"\r 45%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 226/501 [08:11<10:15, 2.24s/it]\u001b[A","timestamp":"2026-04-25T20:07:53.866Z"}
data: {"data":"\r 45%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 227/501 [08:13<09:58, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:07:55.932Z"}
data: {"data":"\r 46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 228/501 [08:15<09:46, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:07:58.198Z"}
data: {"data":"\r 46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 229/501 [08:17<09:54, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:08:00.463Z"}
data: {"data":"\r 46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 230/501 [08:19<09:58, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:08:00.464Z"}
data: {"data":"{'loss': '0.3473', 'grad_norm': '0.4776', 'learning_rate': '1.086e-05', 'entropy': '0.4007', 'num_tokens': '1.84e+06', 'mean_token_accuracy': '0.9269', 'epoch': '1.377'}","timestamp":"2026-04-25T20:08:00.464Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:08:00.464Z"}
data: {"data":"\r 46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 230/501 [08:19<09:58, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:08:02.723Z"}
data: {"data":"\r 46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 231/501 [08:21<10:00, 2.22s/it]\u001b[A","timestamp":"2026-04-25T20:08:04.998Z"}
data: {"data":"\r 46%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 232/501 [08:24<10:02, 2.24s/it]\u001b[A","timestamp":"2026-04-25T20:08:07.058Z"}
data: {"data":"\r 47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 233/501 [08:26<09:45, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:08:09.120Z"}
data: {"data":"\r 47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 234/501 [08:28<09:33, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:08:11.181Z"}
data: {"data":"\r 47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 235/501 [08:30<09:24, 2.12s/it]\u001b[A","timestamp":"2026-04-25T20:08:13.447Z"}
data: {"data":"\r 47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 236/501 [08:32<09:33, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:08:15.514Z"}
data: {"data":"\r 47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 237/501 [08:34<09:23, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:08:17.774Z"}
data: {"data":"\r 48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 238/501 [08:37<09:31, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:08:20.046Z"}
data: {"data":"\r 48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 239/501 [08:39<09:37, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:08:22.101Z"}
data: {"data":"\r 48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 240/501 [08:41<09:23, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:08:22.101Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:08:22.101Z"}
data: {"data":"{'loss': '0.3489', 'grad_norm': '0.4805', 'learning_rate': '1.046e-05', 'entropy': '0.3968', 'num_tokens': '1.919e+06', 'mean_token_accuracy': '0.9259', 'epoch': '1.437'}","timestamp":"2026-04-25T20:08:22.102Z"}
data: {"data":"\r 48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 240/501 [08:41<09:23, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:08:24.356Z"}
data: {"data":"\r 48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 241/501 [08:43<09:28, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:08:26.624Z"}
data: {"data":"\r 48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 242/501 [08:45<09:32, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:08:28.685Z"}
data: {"data":"\r 49%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 243/501 [08:47<09:18, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:08:30.745Z"}
data: {"data":"\r 49%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 244/501 [08:50<09:08, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:08:32.802Z"}
data: {"data":"\r 49%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 245/501 [08:52<09:00, 2.11s/it]\u001b[A","timestamp":"2026-04-25T20:08:35.061Z"}
data: {"data":"\r 49%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 246/501 [08:54<09:09, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:08:37.321Z"}
data: {"data":"\r 49%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 247/501 [08:56<09:15, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:08:39.591Z"}
data: {"data":"\r 50%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 248/501 [08:58<09:19, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:08:41.656Z"}
data: {"data":"\r 50%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 249/501 [09:00<09:06, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:08:43.719Z"}
data: {"data":"\r 50%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 250/501 [09:02<08:56, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:08:43.719Z"}
data: {"data":"{'loss': '0.3167', 'grad_norm': '0.4296', 'learning_rate': '1.006e-05', 'entropy': '0.3645', 'num_tokens': '1.999e+06', 'mean_token_accuracy': '0.9326', 'epoch': '1.497'}","timestamp":"2026-04-25T20:08:43.719Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:08:43.719Z"}
data: {"data":"\r 50%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 250/501 [09:02<08:56, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:08:45.770Z"}
data: {"data":"\r 50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 251/501 [09:05<08:47, 2.11s/it]\u001b[A","timestamp":"2026-04-25T20:08:48.044Z"}
data: {"data":"\r 50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 252/501 [09:07<08:57, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:08:50.111Z"}
data: {"data":"\r 50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 253/501 [09:09<08:48, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:08:52.384Z"}
data: {"data":"\r 51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 254/501 [09:11<08:57, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:08:54.450Z"}
data: {"data":"\r 51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 255/501 [09:13<08:46, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:08:56.519Z"}
data: {"data":"\r 51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 256/501 [09:15<08:39, 2.12s/it]\u001b[A","timestamp":"2026-04-25T20:08:58.584Z"}
data: {"data":"\r 51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 257/501 [09:17<08:33, 2.10s/it]\u001b[A","timestamp":"2026-04-25T20:09:00.854Z"}
data: {"data":"\r 51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 258/501 [09:20<08:43, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:09:02.913Z"}
data: {"data":"\r 52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 259/501 [09:22<08:34, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:09:05.170Z"}
data: {"data":"{'loss': '0.2932', 'grad_norm': '0.8133', 'learning_rate': '9.661e-06', 'entropy': '0.3425', 'num_tokens': '2.08e+06', 'mean_token_accuracy': '0.9361', 'epoch': '1.557'}","timestamp":"2026-04-25T20:09:05.171Z"}
data: {"data":"\r 52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 260/501 [09:24<08:41, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:09:05.171Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:09:05.171Z"}
data: {"data":"\r 52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 260/501 [09:24<08:41, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:09:07.436Z"}
data: {"data":"\r 52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 261/501 [09:26<08:46, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:09:09.491Z"}
data: {"data":"\r 52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 262/501 [09:28<08:34, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:09:11.765Z"}
data: {"data":"\r 52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 263/501 [09:31<08:41, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:09:13.825Z"}
data: {"data":"\r 53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 264/501 [09:33<08:29, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:09:16.095Z"}
data: {"data":"\r 53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 265/501 [09:35<08:36, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:09:18.156Z"}
data: {"data":"\r 53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 266/501 [09:37<08:24, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:09:20.417Z"}
data: {"data":"\r 53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 267/501 [09:39<08:30, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:09:22.473Z"}
data: {"data":"\r 53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 268/501 [09:41<08:19, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:09:24.535Z"}
data: {"data":"\r 54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 269/501 [09:43<08:11, 2.12s/it]\u001b[A","timestamp":"2026-04-25T20:09:26.588Z"}
data: {"data":"\r 54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 270/501 [09:45<08:05, 2.10s/it]\u001b[A","timestamp":"2026-04-25T20:09:26.588Z"}
data: {"data":"{'loss': '0.3099', 'grad_norm': '0.3904', 'learning_rate': '9.261e-06', 'entropy': '0.3643', 'num_tokens': '2.16e+06', 'mean_token_accuracy': '0.9327', 'epoch': '1.617'}","timestamp":"2026-04-25T20:09:26.588Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:09:26.588Z"}
data: {"data":"\r 54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 270/501 [09:45<08:05, 2.10s/it]\u001b[A","timestamp":"2026-04-25T20:09:28.860Z"}
data: {"data":"\r 54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 271/501 [09:48<08:14, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:09:30.912Z"}
data: {"data":"\r 54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 272/501 [09:50<08:05, 2.12s/it]\u001b[A","timestamp":"2026-04-25T20:09:33.179Z"}
data: {"data":"\r 54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 273/501 [09:52<08:13, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:09:35.239Z"}
data: {"data":"\r 55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 274/501 [09:54<08:04, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:09:37.302Z"}
data: {"data":"\r 55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 275/501 [09:56<07:57, 2.11s/it]\u001b[A","timestamp":"2026-04-25T20:09:39.361Z"}
data: {"data":"\r 55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 276/501 [09:58<07:51, 2.10s/it]\u001b[A","timestamp":"2026-04-25T20:09:41.627Z"}
data: {"data":"\r 55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 277/501 [10:00<08:01, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:09:43.885Z"}
data: {"data":"\r 55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 278/501 [10:03<08:06, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:09:46.141Z"}
data: {"data":"\r 56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 279/501 [10:05<08:09, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:09:48.203Z"}
data: {"data":"\r 56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 280/501 [10:07<07:57, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:09:48.203Z"}
data: {"data":"{'loss': '0.2672', 'grad_norm': '0.5745', 'learning_rate': '8.862e-06', 'entropy': '0.3198', 'num_tokens': '2.24e+06', 'mean_token_accuracy': '0.9451', 'epoch': '1.677'}","timestamp":"2026-04-25T20:09:48.203Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:09:48.204Z"}
data: {"data":"\r 56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 280/501 [10:07<07:57, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:09:50.264Z"}
data: {"data":"\r 56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 281/501 [10:09<07:48, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:09:52.528Z"}
data: {"data":"\r 56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 282/501 [10:11<07:55, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:09:54.584Z"}
data: {"data":"\r 56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 283/501 [10:13<07:45, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:09:56.838Z"}
data: {"data":"\r 57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 284/501 [10:16<07:51, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:09:59.095Z"}
data: {"data":"\r 57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 285/501 [10:18<07:54, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:10:01.140Z"}
data: {"data":"\r 57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 286/501 [10:20<07:42, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:10:03.399Z"}
data: {"data":"\r 57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 287/501 [10:22<07:47, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:10:05.663Z"}
data: {"data":"\r 57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 288/501 [10:24<07:50, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:10:07.725Z"}
data: {"data":"\r 58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 289/501 [10:26<07:38, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:10:09.782Z"}
data: {"data":"\r 58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 290/501 [10:29<07:29, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:10:09.783Z"}
data: {"data":"{'loss': '0.2567', 'grad_norm': '0.3679', 'learning_rate': '8.463e-06', 'entropy': '0.3086', 'num_tokens': '2.32e+06', 'mean_token_accuracy': '0.9474', 'epoch': '1.737'}","timestamp":"2026-04-25T20:10:09.783Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:10:09.783Z"}
data: {"data":"\r 58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 290/501 [10:29<07:29, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:10:11.849Z"}
data: {"data":"\r 58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 291/501 [10:31<07:23, 2.11s/it]\u001b[A","timestamp":"2026-04-25T20:10:13.903Z"}
data: {"data":"\r 58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 292/501 [10:33<07:17, 2.09s/it]\u001b[A","timestamp":"2026-04-25T20:10:16.173Z"}
data: {"data":"\r 58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 293/501 [10:35<07:26, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:10:18.227Z"}
data: {"data":"\r 59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 294/501 [10:37<07:18, 2.12s/it]\u001b[A","timestamp":"2026-04-25T20:10:20.491Z"}
data: {"data":"\r 59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 295/501 [10:39<07:25, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:10:22.558Z"}
data: {"data":"\r 59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 296/501 [10:41<07:17, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:10:24.625Z"}
data: {"data":"\r 59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 297/501 [10:43<07:11, 2.11s/it]\u001b[A","timestamp":"2026-04-25T20:10:26.684Z"}
data: {"data":"\r 59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 298/501 [10:45<07:05, 2.10s/it]\u001b[A","timestamp":"2026-04-25T20:10:28.948Z"}
data: {"data":"\r 60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 299/501 [10:48<07:13, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:10:31.007Z"}
data: {"data":"{'loss': '0.2163', 'grad_norm': '0.4672', 'learning_rate': '8.064e-06', 'entropy': '0.2649', 'num_tokens': '2.401e+06', 'mean_token_accuracy': '0.9553', 'epoch': '1.796'}","timestamp":"2026-04-25T20:10:31.007Z"}
data: {"data":"\r 60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 300/501 [10:50<07:06, 2.12s/it]\u001b[A","timestamp":"2026-04-25T20:10:31.007Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:10:31.008Z"}
data: {"data":"\r 60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 300/501 [10:50<07:06, 2.12s/it]\u001b[A","timestamp":"2026-04-25T20:10:33.063Z"}
data: {"data":"\r 60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 301/501 [10:52<07:00, 2.10s/it]\u001b[A","timestamp":"2026-04-25T20:10:35.334Z"}
data: {"data":"\r 60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 302/501 [10:54<07:08, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:10:37.601Z"}
data: {"data":"\r 60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 303/501 [10:56<07:12, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:10:39.658Z"}
data: {"data":"\r 61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 304/501 [10:58<07:03, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:10:41.922Z"}
data: {"data":"\r 61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 305/501 [11:01<07:07, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:10:44.191Z"}
data: {"data":"\r 61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 306/501 [11:03<07:10, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:10:46.254Z"}
data: {"data":"\r 61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 307/501 [11:05<07:00, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:10:48.314Z"}
data: {"data":"\r 61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 308/501 [11:07<06:51, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:10:50.583Z"}
data: {"data":"\r 62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 309/501 [11:09<06:57, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:10:52.646Z"}
data: {"data":"\r 62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 310/501 [11:11<06:48, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:10:52.647Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:10:52.647Z"}
data: {"data":"{'loss': '0.2175', 'grad_norm': '0.5738', 'learning_rate': '7.665e-06', 'entropy': '0.2677', 'num_tokens': '2.481e+06', 'mean_token_accuracy': '0.9549', 'epoch': '1.856'}","timestamp":"2026-04-25T20:10:52.647Z"}
data: {"data":"\r 62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 310/501 [11:11<06:48, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:10:54.914Z"}
data: {"data":"\r 62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 311/501 [11:14<06:53, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:10:56.981Z"}
data: {"data":"\r 62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 312/501 [11:16<06:45, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:10:59.246Z"}
data: {"data":"\r 62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 313/501 [11:18<06:50, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:11:01.508Z"}
data: {"data":"\r 63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 314/501 [11:20<06:52, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:11:03.574Z"}
data: {"data":"\r 63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 315/501 [11:22<06:42, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:11:05.642Z"}
data: {"data":"\r 63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 316/501 [11:24<06:34, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:11:07.706Z"}
data: {"data":"\r 63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 317/501 [11:26<06:28, 2.11s/it]\u001b[A","timestamp":"2026-04-25T20:11:09.772Z"}
data: {"data":"\r 63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 318/501 [11:29<06:24, 2.10s/it]\u001b[A","timestamp":"2026-04-25T20:11:11.842Z"}
data: {"data":"\r 64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 319/501 [11:31<06:20, 2.09s/it]\u001b[A","timestamp":"2026-04-25T20:11:14.100Z"}
data: {"data":"\r 64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 320/501 [11:33<06:27, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:11:14.100Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:11:14.100Z"}
data: {"data":"{'loss': '0.2033', 'grad_norm': '0.4641', 'learning_rate': '7.265e-06', 'entropy': '0.2481', 'num_tokens': '2.562e+06', 'mean_token_accuracy': '0.9584', 'epoch': '1.916'}","timestamp":"2026-04-25T20:11:14.100Z"}
data: {"data":"\r 64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 320/501 [11:33<06:27, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:11:16.364Z"}
data: {"data":"\r 64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 321/501 [11:35<06:31, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:11:18.421Z"}
data: {"data":"\r 64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 322/501 [11:37<06:23, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:11:20.687Z"}
data: {"data":"\r 64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 323/501 [11:39<06:27, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:11:22.942Z"}
data: {"data":"\r 65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 324/501 [11:42<06:29, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:11:25.207Z"}
data: {"data":"\r 65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 325/501 [11:44<06:30, 2.22s/it]\u001b[A","timestamp":"2026-04-25T20:11:27.468Z"}
data: {"data":"\r 65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 326/501 [11:46<06:30, 2.23s/it]\u001b[A","timestamp":"2026-04-25T20:11:29.533Z"}
data: {"data":"\r 65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 327/501 [11:48<06:19, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:11:31.811Z"}
data: {"data":"\r 65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 328/501 [11:51<06:22, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:11:33.871Z"}
data: {"data":"\r 66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 329/501 [11:53<06:12, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:11:35.936Z"}
data: {"data":"\r 66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 330/501 [11:55<06:05, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:11:35.936Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:11:35.936Z"}
data: {"data":"{'loss': '0.223', 'grad_norm': '0.5205', 'learning_rate': '6.866e-06', 'entropy': '0.2656', 'num_tokens': '2.642e+06', 'mean_token_accuracy': '0.9551', 'epoch': '1.976'}","timestamp":"2026-04-25T20:11:35.936Z"}
data: {"data":"\r 66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 330/501 [11:55<06:05, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:11:38.198Z"}
data: {"data":"\r 66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 331/501 [11:57<06:09, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:11:40.458Z"}
data: {"data":"\r 66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 332/501 [11:59<06:11, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:11:42.708Z"}
data: {"data":"\r 66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 333/501 [12:01<06:12, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:11:44.681Z"}
data: {"data":"\r 67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 334/501 [12:03<05:57, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:11:46.960Z"}
data: {"data":"\r 67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 335/501 [12:06<06:02, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:11:49.010Z"}
data: {"data":"\r 67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 336/501 [12:08<05:53, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:11:51.268Z"}
data: {"data":"\r 67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 337/501 [12:10<05:57, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:11:53.325Z"}
data: {"data":"\r 67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 338/501 [12:12<05:49, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:11:55.599Z"}
data: {"data":"\r 68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 339/501 [12:14<05:53, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:11:57.657Z"}
data: {"data":"\r 68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 340/501 [12:16<05:45, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:11:57.657Z"}
data: {"data":"{'loss': '0.2279', 'grad_norm': '0.4992', 'learning_rate': '6.467e-06', 'entropy': '0.2709', 'num_tokens': '2.72e+06', 'mean_token_accuracy': '0.9543', 'epoch': '2.036'}","timestamp":"2026-04-25T20:11:57.657Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:11:57.657Z"}
data: {"data":"\r 68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 340/501 [12:16<05:45, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:11:59.914Z"}
data: {"data":"\r 68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 341/501 [12:19<05:48, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:12:02.173Z"}
data: {"data":"\r 68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 342/501 [12:21<05:50, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:12:04.441Z"}
data: {"data":"\r 68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 343/501 [12:23<05:51, 2.22s/it]\u001b[A","timestamp":"2026-04-25T20:12:06.503Z"}
data: {"data":"\r 69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 344/501 [12:25<05:41, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:12:08.561Z"}
data: {"data":"\r 69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 345/501 [12:27<05:33, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:12:10.621Z"}
data: {"data":"\r 69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 346/501 [12:29<05:27, 2.12s/it]\u001b[A","timestamp":"2026-04-25T20:12:12.885Z"}
data: {"data":"\r 69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 347/501 [12:32<05:32, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:12:14.945Z"}
data: {"data":"\r 69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 348/501 [12:34<05:25, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:12:17.203Z"}
data: {"data":"\r 70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 349/501 [12:36<05:29, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:12:19.469Z"}
data: {"data":"\r 70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 350/501 [12:38<05:31, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:12:19.469Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:12:19.469Z"}
data: {"data":"{'loss': '0.1949', 'grad_norm': '0.6589', 'learning_rate': '6.068e-06', 'entropy': '0.2418', 'num_tokens': '2.8e+06', 'mean_token_accuracy': '0.9595', 'epoch': '2.096'}","timestamp":"2026-04-25T20:12:19.469Z"}
data: {"data":"\r 70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 350/501 [12:38<05:31, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:12:21.724Z"}
data: {"data":"\r 70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 351/501 [12:40<05:32, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:12:23.990Z"}
data: {"data":"\r 70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 352/501 [12:43<05:32, 2.23s/it]\u001b[A","timestamp":"2026-04-25T20:12:26.059Z"}
data: {"data":"\r 70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 353/501 [12:45<05:22, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:12:28.116Z"}
data: {"data":"\r 71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 354/501 [12:47<05:15, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:12:30.391Z"}
data: {"data":"\r 71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 355/501 [12:49<05:18, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:12:32.447Z"}
data: {"data":"\r 71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 356/501 [12:51<05:11, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:12:34.705Z"}
data: {"data":"\r 71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 357/501 [12:53<05:13, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:12:36.972Z"}
data: {"data":"\r 71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 358/501 [12:56<05:15, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:12:39.027Z"}
data: {"data":"\r 72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 359/501 [12:58<05:06, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:12:41.092Z"}
data: {"data":"\r 72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 360/501 [13:00<05:00, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:12:41.093Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:12:41.093Z"}
data: {"data":"{'loss': '0.1919', 'grad_norm': '0.5246', 'learning_rate': '5.669e-06', 'entropy': '0.2304', 'num_tokens': '2.879e+06', 'mean_token_accuracy': '0.9598', 'epoch': '2.156'}","timestamp":"2026-04-25T20:12:41.093Z"}
data: {"data":"\r 72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 360/501 [13:00<05:00, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:12:43.152Z"}
data: {"data":"\r 72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 361/501 [13:02<04:55, 2.11s/it]\u001b[A","timestamp":"2026-04-25T20:12:45.217Z"}
data: {"data":"\r 72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 362/501 [13:04<04:51, 2.10s/it]\u001b[A","timestamp":"2026-04-25T20:12:47.268Z"}
data: {"data":"\r 72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 363/501 [13:06<04:47, 2.08s/it]\u001b[A","timestamp":"2026-04-25T20:12:49.533Z"}
data: {"data":"\r 73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 364/501 [13:08<04:52, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:12:51.598Z"}
data: {"data":"\r 73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 365/501 [13:10<04:47, 2.12s/it]\u001b[A","timestamp":"2026-04-25T20:12:53.660Z"}
data: {"data":"\r 73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 366/501 [13:12<04:43, 2.10s/it]\u001b[A","timestamp":"2026-04-25T20:12:55.920Z"}
data: {"data":"\r 73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 367/501 [13:15<04:47, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:12:58.186Z"}
data: {"data":"\r 73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 368/501 [13:17<04:50, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:13:00.445Z"}
data: {"data":"\r 74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 369/501 [13:19<04:51, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:13:02.708Z"}
data: {"data":"\r 74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 370/501 [13:21<04:51, 2.22s/it]\u001b[A","timestamp":"2026-04-25T20:13:02.708Z"}
data: {"data":"{'loss': '0.1926', 'grad_norm': '0.4952', 'learning_rate': '5.269e-06', 'entropy': '0.2281', 'num_tokens': '2.959e+06', 'mean_token_accuracy': '0.9603', 'epoch': '2.216'}","timestamp":"2026-04-25T20:13:02.708Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:13:02.708Z"}
data: {"data":"\r 74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 370/501 [13:21<04:51, 2.22s/it]\u001b[A","timestamp":"2026-04-25T20:13:04.764Z"}
data: {"data":"\r 74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 371/501 [13:24<04:42, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:13:07.033Z"}
data: {"data":"\r 74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 372/501 [13:26<04:44, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:13:09.099Z"}
data: {"data":"\r 74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 373/501 [13:28<04:36, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:13:11.151Z"}
data: {"data":"\r 75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 374/501 [13:30<04:30, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:13:13.417Z"}
data: {"data":"\r 75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 375/501 [13:32<04:33, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:13:15.478Z"}
data: {"data":"\r 75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 376/501 [13:34<04:27, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:13:17.756Z"}
data: {"data":"\r 75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 377/501 [13:37<04:30, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:13:19.822Z"}
data: {"data":"\r 75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 378/501 [13:39<04:23, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:13:21.898Z"}
data: {"data":"\r 76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 379/501 [13:41<04:19, 2.12s/it]\u001b[A","timestamp":"2026-04-25T20:13:23.964Z"}
data: {"data":"\r 76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 380/501 [13:43<04:14, 2.11s/it]\u001b[A","timestamp":"2026-04-25T20:13:23.965Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:13:23.965Z"}
data: {"data":"{'loss': '0.179', 'grad_norm': '0.4726', 'learning_rate': '4.87e-06', 'entropy': '0.2133', 'num_tokens': '3.039e+06', 'mean_token_accuracy': '0.9628', 'epoch': '2.275'}","timestamp":"2026-04-25T20:13:23.965Z"}
data: {"data":"\r 76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 380/501 [13:43<04:14, 2.11s/it]\u001b[A","timestamp":"2026-04-25T20:13:26.030Z"}
data: {"data":"\r 76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 381/501 [13:45<04:11, 2.09s/it]\u001b[A","timestamp":"2026-04-25T20:13:28.097Z"}
data: {"data":"\r 76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 382/501 [13:47<04:08, 2.09s/it]\u001b[A","timestamp":"2026-04-25T20:13:30.158Z"}
data: {"data":"\r 76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 383/501 [13:49<04:05, 2.08s/it]\u001b[A","timestamp":"2026-04-25T20:13:32.212Z"}
data: {"data":"\r 77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 384/501 [13:51<04:02, 2.07s/it]\u001b[A","timestamp":"2026-04-25T20:13:34.624Z"}
data: {"data":"\r 77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 385/501 [13:53<04:12, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:13:36.679Z"}
data: {"data":"\r 77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 386/501 [13:55<04:05, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:13:38.948Z"}
data: {"data":"\r 77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 387/501 [13:58<04:08, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:13:41.211Z"}
data: {"data":"\r 77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 388/501 [14:00<04:08, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:13:43.268Z"}
data: {"data":"\r 78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 389/501 [14:02<04:01, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:13:45.527Z"}
data: {"data":"\r 78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 390/501 [14:04<04:02, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:13:45.527Z"}
data: {"data":"{'loss': '0.1914', 'grad_norm': '0.6392', 'learning_rate': '4.471e-06', 'entropy': '0.2287', 'num_tokens': '3.12e+06', 'mean_token_accuracy': '0.9596', 'epoch': '2.335'}","timestamp":"2026-04-25T20:13:45.527Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:13:45.527Z"}
data: {"data":"\r 78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 390/501 [14:04<04:02, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:13:47.797Z"}
data: {"data":"\r 78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 391/501 [14:07<04:03, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:13:50.061Z"}
data: {"data":"\r 78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 392/501 [14:09<04:02, 2.23s/it]\u001b[A","timestamp":"2026-04-25T20:13:52.328Z"}
data: {"data":"\r 78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 393/501 [14:11<04:01, 2.24s/it]\u001b[A","timestamp":"2026-04-25T20:13:54.599Z"}
data: {"data":"\r 79%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 394/501 [14:13<04:00, 2.25s/it]\u001b[A","timestamp":"2026-04-25T20:13:56.663Z"}
data: {"data":"\r 79%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 395/501 [14:15<03:52, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:13:58.725Z"}
data: {"data":"\r 79%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 396/501 [14:17<03:46, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:14:00.793Z"}
data: {"data":"\r 79%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 397/501 [14:20<03:41, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:14:02.860Z"}
data: {"data":"\r 79%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 398/501 [14:22<03:37, 2.11s/it]\u001b[A","timestamp":"2026-04-25T20:14:04.926Z"}
data: {"data":"\r 80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 399/501 [14:24<03:33, 2.10s/it]\u001b[A","timestamp":"2026-04-25T20:14:07.193Z"}
data: {"data":"\r 80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 400/501 [14:26<03:36, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:14:07.193Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:14:07.193Z"}
data: {"data":"{'loss': '0.1782', 'grad_norm': '0.5421', 'learning_rate': '4.072e-06', 'entropy': '0.2141', 'num_tokens': '3.201e+06', 'mean_token_accuracy': '0.962', 'epoch': '2.395'}","timestamp":"2026-04-25T20:14:07.193Z"}
data: {"data":"\r 80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 400/501 [14:26<03:36, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:14:09.459Z"}
data: {"data":"\r 80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 401/501 [14:28<03:38, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:14:11.516Z"}
data: {"data":"\r 80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 402/501 [14:30<03:32, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:14:13.787Z"}
data: {"data":"\r 80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 403/501 [14:33<03:33, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:14:16.057Z"}
data: {"data":"\r 81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 404/501 [14:35<03:34, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:14:18.110Z"}
data: {"data":"\r 81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 405/501 [14:37<03:27, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:14:20.380Z"}
data: {"data":"\r 81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 406/501 [14:39<03:28, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:14:22.451Z"}
data: {"data":"\r 81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 407/501 [14:41<03:22, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:14:24.523Z"}
data: {"data":"\r 81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 408/501 [14:43<03:18, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:14:26.591Z"}
data: {"data":"\r 82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 409/501 [14:45<03:14, 2.11s/it]\u001b[A","timestamp":"2026-04-25T20:14:28.646Z"}
data: {"data":"\r 82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 410/501 [14:47<03:10, 2.10s/it]\u001b[A","timestamp":"2026-04-25T20:14:28.647Z"}
data: {"data":"{'loss': '0.1689', 'grad_norm': '0.4012', 'learning_rate': '3.673e-06', 'entropy': '0.2013', 'num_tokens': '3.281e+06', 'mean_token_accuracy': '0.9647', 'epoch': '2.455'}","timestamp":"2026-04-25T20:14:28.647Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:14:28.647Z"}
data: {"data":"\r 82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 410/501 [14:47<03:10, 2.10s/it]\u001b[A","timestamp":"2026-04-25T20:14:30.919Z"}
data: {"data":"\r 82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 411/501 [14:50<03:13, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:14:33.189Z"}
data: {"data":"\r 82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 412/501 [14:52<03:14, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:14:35.250Z"}
data: {"data":"\r 82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 413/501 [14:54<03:09, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:14:37.517Z"}
data: {"data":"\r 83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 414/501 [14:56<03:09, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:14:39.779Z"}
data: {"data":"\r 83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 415/501 [14:59<03:09, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:14:42.040Z"}
data: {"data":"\r 83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 416/501 [15:01<03:08, 2.22s/it]\u001b[A","timestamp":"2026-04-25T20:14:44.300Z"}
data: {"data":"\r 83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 417/501 [15:03<03:07, 2.23s/it]\u001b[A","timestamp":"2026-04-25T20:14:46.572Z"}
data: {"data":"\r 83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 418/501 [15:05<03:06, 2.25s/it]\u001b[A","timestamp":"2026-04-25T20:14:48.844Z"}
data: {"data":"\r 84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 419/501 [15:08<03:04, 2.25s/it]\u001b[A","timestamp":"2026-04-25T20:14:51.112Z"}
data: {"data":"\r 84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 420/501 [15:10<03:02, 2.26s/it]\u001b[A","timestamp":"2026-04-25T20:14:51.112Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:14:51.112Z"}
data: {"data":"{'loss': '0.184', 'grad_norm': '0.455', 'learning_rate': '3.273e-06', 'entropy': '0.2205', 'num_tokens': '3.36e+06', 'mean_token_accuracy': '0.9612', 'epoch': '2.515'}","timestamp":"2026-04-25T20:14:51.112Z"}
data: {"data":"\r 84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 420/501 [15:10<03:02, 2.26s/it]\u001b[A","timestamp":"2026-04-25T20:14:53.368Z"}
data: {"data":"\r 84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 421/501 [15:12<03:00, 2.26s/it]\u001b[A","timestamp":"2026-04-25T20:14:55.436Z"}
data: {"data":"\r 84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 422/501 [15:14<02:53, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:14:57.501Z"}
data: {"data":"\r 84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 423/501 [15:16<02:48, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:14:59.565Z"}
data: {"data":"\r 85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 424/501 [15:18<02:44, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:15:01.636Z"}
data: {"data":"\r 85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 425/501 [15:20<02:40, 2.11s/it]\u001b[A","timestamp":"2026-04-25T20:15:03.700Z"}
data: {"data":"\r 85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 426/501 [15:22<02:37, 2.10s/it]\u001b[A","timestamp":"2026-04-25T20:15:05.967Z"}
data: {"data":"\r 85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 427/501 [15:25<02:39, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:15:08.237Z"}
data: {"data":"\r 85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 428/501 [15:27<02:39, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:15:10.290Z"}
data: {"data":"\r 86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 429/501 [15:29<02:34, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:15:12.558Z"}
data: {"data":"\r 86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 430/501 [15:31<02:34, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:15:12.558Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:15:12.558Z"}
data: {"data":"{'loss': '0.1483', 'grad_norm': '0.4924', 'learning_rate': '2.874e-06', 'entropy': '0.1837', 'num_tokens': '3.441e+06', 'mean_token_accuracy': '0.9692', 'epoch': '2.575'}","timestamp":"2026-04-25T20:15:12.558Z"}
data: {"data":"\r 86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 430/501 [15:31<02:34, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:15:14.827Z"}
data: {"data":"\r 86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 431/501 [15:34<02:34, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:15:16.884Z"}
data: {"data":"\r 86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 432/501 [15:36<02:29, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:15:19.138Z"}
data: {"data":"\r 86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 433/501 [15:38<02:28, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:15:21.403Z"}
data: {"data":"\r 87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 434/501 [15:40<02:28, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:15:23.674Z"}
data: {"data":"\r 87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 435/501 [15:42<02:27, 2.23s/it]\u001b[A","timestamp":"2026-04-25T20:15:25.932Z"}
data: {"data":"\r 87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 436/501 [15:45<02:25, 2.24s/it]\u001b[A","timestamp":"2026-04-25T20:15:28.197Z"}
data: {"data":"\r 87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 437/501 [15:47<02:23, 2.25s/it]\u001b[A","timestamp":"2026-04-25T20:15:30.452Z"}
data: {"data":"\r 87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 438/501 [15:49<02:21, 2.25s/it]\u001b[A","timestamp":"2026-04-25T20:15:32.709Z"}
data: {"data":"\r 88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 439/501 [15:51<02:19, 2.25s/it]\u001b[A","timestamp":"2026-04-25T20:15:34.767Z"}
data: {"data":"\r 88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 440/501 [15:54<02:13, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:15:34.767Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:15:34.767Z"}
data: {"data":"{'loss': '0.1643', 'grad_norm': '0.7521', 'learning_rate': '2.475e-06', 'entropy': '0.1987', 'num_tokens': '3.52e+06', 'mean_token_accuracy': '0.9659', 'epoch': '2.635'}","timestamp":"2026-04-25T20:15:34.767Z"}
data: {"data":"\r 88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 440/501 [15:54<02:13, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:15:37.030Z"}
data: {"data":"\r 88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 441/501 [15:56<02:12, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:15:39.088Z"}
data: {"data":"\r 88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 442/501 [15:58<02:07, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:15:41.356Z"}
data: {"data":"\r 88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 443/501 [16:00<02:07, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:15:43.419Z"}
data: {"data":"\r 89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 444/501 [16:02<02:02, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:15:45.481Z"}
data: {"data":"\r 89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 445/501 [16:04<01:59, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:15:47.738Z"}
data: {"data":"\r 89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 446/501 [16:06<01:59, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:15:50.005Z"}
data: {"data":"\r 89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 447/501 [16:09<01:58, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:15:52.274Z"}
data: {"data":"\r 89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 448/501 [16:11<01:57, 2.22s/it]\u001b[A","timestamp":"2026-04-25T20:15:54.334Z"}
data: {"data":"\r 90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 449/501 [16:13<01:52, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:15:56.594Z"}
data: {"data":"\r 90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 450/501 [16:15<01:52, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:15:56.594Z"}
data: {"data":"{'loss': '0.1631', 'grad_norm': '0.5542', 'learning_rate': '2.076e-06', 'entropy': '0.1994', 'num_tokens': '3.6e+06', 'mean_token_accuracy': '0.9651', 'epoch': '2.695'}","timestamp":"2026-04-25T20:15:56.594Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:15:56.594Z"}
data: {"data":"\r 90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 450/501 [16:15<01:52, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:15:58.659Z"}
data: {"data":"\r 90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 451/501 [16:17<01:47, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:16:00.721Z"}
data: {"data":"\r 90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 452/501 [16:19<01:44, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:16:02.990Z"}
data: {"data":"\r 90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 453/501 [16:22<01:44, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:16:05.050Z"}
data: {"data":"\r 91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 454/501 [16:24<01:40, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:16:07.106Z"}
data: {"data":"\r 91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 455/501 [16:26<01:37, 2.11s/it]\u001b[A","timestamp":"2026-04-25T20:16:09.163Z"}
data: {"data":"\r 91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 456/501 [16:28<01:34, 2.10s/it]\u001b[A","timestamp":"2026-04-25T20:16:11.431Z"}
data: {"data":"\r 91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 457/501 [16:30<01:34, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:16:13.493Z"}
data: {"data":"\r 91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 458/501 [16:32<01:31, 2.12s/it]\u001b[A","timestamp":"2026-04-25T20:16:15.904Z"}
data: {"data":"\r 92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 459/501 [16:35<01:32, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:16:17.966Z"}
data: {"data":"\r 92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 460/501 [16:37<01:28, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:16:17.967Z"}
data: {"data":"{'loss': '0.156', 'grad_norm': '0.4535', 'learning_rate': '1.677e-06', 'entropy': '0.1903', 'num_tokens': '3.681e+06', 'mean_token_accuracy': '0.9667', 'epoch': '2.754'}","timestamp":"2026-04-25T20:16:17.967Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:16:17.967Z"}
data: {"data":"\r 92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 460/501 [16:37<01:28, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:16:20.238Z"}
data: {"data":"\r 92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 461/501 [16:39<01:27, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:16:22.301Z"}
data: {"data":"\r 92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 462/501 [16:41<01:24, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:16:24.567Z"}
data: {"data":"\r 92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 463/501 [16:43<01:23, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:16:26.832Z"}
data: {"data":"\r 93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 464/501 [16:46<01:21, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:16:29.094Z"}
data: {"data":"\r 93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 465/501 [16:48<01:20, 2.23s/it]\u001b[A","timestamp":"2026-04-25T20:16:31.349Z"}
data: {"data":"\r 93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 466/501 [16:50<01:18, 2.24s/it]\u001b[A","timestamp":"2026-04-25T20:16:33.405Z"}
data: {"data":"\r 93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 467/501 [16:52<01:14, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:16:35.672Z"}
data: {"data":"\r 93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 468/501 [16:54<01:12, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:16:37.745Z"}
data: {"data":"\r 94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 469/501 [16:57<01:09, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:16:39.814Z"}
data: {"data":"\r 94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 470/501 [16:59<01:06, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:16:39.814Z"}
data: {"data":"{'loss': '0.1569', 'grad_norm': '0.4086', 'learning_rate': '1.277e-06', 'entropy': '0.1883', 'num_tokens': '3.76e+06', 'mean_token_accuracy': '0.9665', 'epoch': '2.814'}","timestamp":"2026-04-25T20:16:39.814Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:16:39.814Z"}
data: {"data":"\r 94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 470/501 [16:59<01:06, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:16:42.073Z"}
data: {"data":"\r 94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 471/501 [17:01<01:05, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:16:44.340Z"}
data: {"data":"\r 94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 472/501 [17:03<01:03, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:16:46.398Z"}
data: {"data":"\r 94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 473/501 [17:05<01:00, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:16:48.672Z"}
data: {"data":"\r 95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 474/501 [17:07<00:59, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:16:50.938Z"}
data: {"data":"\r 95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 475/501 [17:10<00:57, 2.22s/it]\u001b[A","timestamp":"2026-04-25T20:16:53.005Z"}
data: {"data":"\r 95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 476/501 [17:12<00:54, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:16:55.276Z"}
data: {"data":"\r 95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 477/501 [17:14<00:52, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:16:57.341Z"}
data: {"data":"\r 95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 478/501 [17:16<00:49, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:16:59.414Z"}
data: {"data":"\r 96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 479/501 [17:18<00:46, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:17:01.464Z"}
data: {"data":"\r 96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 480/501 [17:20<00:44, 2.11s/it]\u001b[A","timestamp":"2026-04-25T20:17:01.464Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:17:01.464Z"}
data: {"data":"{'loss': '0.1574', 'grad_norm': '0.5595', 'learning_rate': '8.782e-07', 'entropy': '0.1872', 'num_tokens': '3.841e+06', 'mean_token_accuracy': '0.9665', 'epoch': '2.874'}","timestamp":"2026-04-25T20:17:01.464Z"}
data: {"data":"\r 96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 480/501 [17:20<00:44, 2.11s/it]\u001b[A","timestamp":"2026-04-25T20:17:03.733Z"}
data: {"data":"\r 96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 481/501 [17:22<00:43, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:17:06.002Z"}
data: {"data":"\r 96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 482/501 [17:25<00:41, 2.19s/it]\u001b[A","timestamp":"2026-04-25T20:17:08.264Z"}
data: {"data":"\r 96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 483/501 [17:27<00:39, 2.21s/it]\u001b[A","timestamp":"2026-04-25T20:17:10.321Z"}
data: {"data":"\r 97%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 484/501 [17:29<00:36, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:17:12.592Z"}
data: {"data":"\r 97%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 485/501 [17:31<00:35, 2.20s/it]\u001b[A","timestamp":"2026-04-25T20:17:14.861Z"}
data: {"data":"\r 97%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 486/501 [17:34<00:33, 2.22s/it]\u001b[A","timestamp":"2026-04-25T20:17:16.932Z"}
data: {"data":"\r 97%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 487/501 [17:36<00:30, 2.17s/it]\u001b[A","timestamp":"2026-04-25T20:17:18.993Z"}
data: {"data":"\r 97%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 488/501 [17:38<00:27, 2.14s/it]\u001b[A","timestamp":"2026-04-25T20:17:21.267Z"}
data: {"data":"\r 98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 489/501 [17:40<00:26, 2.18s/it]\u001b[A","timestamp":"2026-04-25T20:17:23.334Z"}
data: {"data":"{'loss': '0.1478', 'grad_norm': '0.4566', 'learning_rate': '4.79e-07', 'entropy': '0.1755', 'num_tokens': '3.921e+06', 'mean_token_accuracy': '0.9688', 'epoch': '2.934'}","timestamp":"2026-04-25T20:17:23.334Z"}
data: {"data":"\r 98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 490/501 [17:42<00:23, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:17:23.334Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:17:23.334Z"}
data: {"data":"\r 98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 490/501 [17:42<00:23, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:17:25.392Z"}
data: {"data":"\r 98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 491/501 [17:44<00:21, 2.12s/it]\u001b[A","timestamp":"2026-04-25T20:17:27.451Z"}
data: {"data":"\r 98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 492/501 [17:46<00:18, 2.10s/it]\u001b[A","timestamp":"2026-04-25T20:17:29.519Z"}
data: {"data":"\r 98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 493/501 [17:48<00:16, 2.09s/it]\u001b[A","timestamp":"2026-04-25T20:17:31.580Z"}
data: {"data":"\r 99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 494/501 [17:50<00:14, 2.08s/it]\u001b[A","timestamp":"2026-04-25T20:17:33.642Z"}
data: {"data":"\r 99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 495/501 [17:52<00:12, 2.08s/it]\u001b[A","timestamp":"2026-04-25T20:17:35.898Z"}
data: {"data":"\r 99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 496/501 [17:55<00:10, 2.13s/it]\u001b[A","timestamp":"2026-04-25T20:17:37.954Z"}
data: {"data":"\r 99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 497/501 [17:57<00:08, 2.11s/it]\u001b[A","timestamp":"2026-04-25T20:17:40.217Z"}
data: {"data":"\r 99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 498/501 [17:59<00:06, 2.15s/it]\u001b[A","timestamp":"2026-04-25T20:17:42.270Z"}
data: {"data":"\r100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 499/501 [18:01<00:04, 2.12s/it]\u001b[A","timestamp":"2026-04-25T20:17:44.525Z"}
data: {"data":"\r100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 500/501 [18:03<00:02, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:17:44.526Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:17:44.526Z"}
data: {"data":"{'loss': '0.1538', 'grad_norm': '0.4972', 'learning_rate': '7.984e-08', 'entropy': '0.1857', 'num_tokens': '4.001e+06', 'mean_token_accuracy': '0.967', 'epoch': '2.994'}","timestamp":"2026-04-25T20:17:44.526Z"}
data: {"data":"\r100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 500/501 [18:03<00:02, 2.16s/it]\u001b[A","timestamp":"2026-04-25T20:17:46.961Z"}
data: {"data":"\r100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 501/501 [18:06<00:00, 2.24s/it]\u001b[A","timestamp":"2026-04-25T20:17:47.267Z"}
data: {"data":"{'train_runtime': '1087', 'train_samples_per_second': '3.686', 'train_steps_per_second': '0.461', 'train_loss': '0.6784', 'entropy': '0.1464', 'num_tokens': '4.008e+06', 'mean_token_accuracy': '0.9756', 'epoch': '3'}","timestamp":"2026-04-25T20:17:47.267Z"}
data: {"data":"\r \r\u001b[A","timestamp":"2026-04-25T20:17:47.267Z"}
data: {"data":"\r100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 501/501 [18:06<00:00, 2.24s/it]\u001b[A\r100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 501/501 [18:06<00:00, 2.17s/it]","timestamp":"2026-04-25T20:17:47.268Z"}
data: {"data":"βœ… Training completed.","timestamp":"2026-04-25T20:17:48.789Z"}
data: {"data":"\rProcessing Files (0 / 0) : | | 0.00B / 0.00B ","timestamp":"2026-04-25T20:17:49.231Z"}
data: {"data":"\rNew Data Upload : | | 0.00B / 0.00B \u001b[A","timestamp":"2026-04-25T20:17:49.353Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:49.354Z"}
data: {"data":"\r .../output/training_args.bin: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 5.71kB / 5.71kB \u001b[A\u001b[A","timestamp":"2026-04-25T20:17:49.354Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:49.354Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:49.354Z"}
data: {"data":"\r /data/output/tokenizer.json : 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 11.4MB / 11.4MB \u001b[A\u001b[A\u001b[A","timestamp":"2026-04-25T20:17:49.354Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:49.354Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:49.354Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:49.354Z"}
data: {"data":"\r ...adapter_model.safetensors: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 20.2MB / 20.2MB \u001b[A\u001b[A\u001b[A\u001b[A","timestamp":"2026-04-25T20:17:49.354Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:49.354Z"}
data: {"data":"\r .../output/training_args.bin: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 5.71kB / 5.71kB \u001b[A\u001b[A","timestamp":"2026-04-25T20:17:49.354Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:49.354Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:49.354Z"}
data: {"data":"\r /data/output/tokenizer.json : 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 11.4MB / 11.4MB \u001b[A\u001b[A\u001b[A","timestamp":"2026-04-25T20:17:49.354Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:49.354Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:49.354Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:49.354Z"}
data: {"data":"\r ...adapter_model.safetensors: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 20.2MB / 20.2MB \u001b[A\u001b[A\u001b[A\u001b[A","timestamp":"2026-04-25T20:17:49.448Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:49.448Z"}
data: {"data":"\r .../output/training_args.bin: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 5.71kB / 5.71kB \u001b[A\u001b[A","timestamp":"2026-04-25T20:17:49.448Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:49.448Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:49.448Z"}
data: {"data":"\r /data/output/tokenizer.json : 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 11.4MB / 11.4MB \u001b[A\u001b[A\u001b[A","timestamp":"2026-04-25T20:17:49.448Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:49.448Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:49.448Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:49.448Z"}
data: {"data":"\r ...adapter_model.safetensors: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 20.2MB / 20.2MB \u001b[A\u001b[A\u001b[A\u001b[A\rProcessing Files (3 / 3) : 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 31.6MB / 31.6MB, 0.00B/s ","timestamp":"2026-04-25T20:17:49.448Z"}
data: {"data":"\rNew Data Upload : | | 0.00B / 0.00B, 0.00B/s ","timestamp":"2026-04-25T20:17:49.448Z"}
data: {"data":"\r .../output/training_args.bin: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 5.71kB / 5.71kB ","timestamp":"2026-04-25T20:17:49.448Z"}
data: {"data":"\r /data/output/tokenizer.json : 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 11.4MB / 11.4MB ","timestamp":"2026-04-25T20:17:49.449Z"}
data: {"data":"\r ...adapter_model.safetensors: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 20.2MB / 20.2MB ","timestamp":"2026-04-25T20:17:49.449Z"}
data: {"data":"πŸ’Ύ Model saved to /data/output.","timestamp":"2026-04-25T20:17:49.781Z"}
data: {"data":"\rProcessing Files (0 / 0) : | | 0.00B / 0.00B ","timestamp":"2026-04-25T20:17:50.124Z"}
data: {"data":"\rNew Data Upload : | | 0.00B / 0.00B \u001b[A","timestamp":"2026-04-25T20:17:50.234Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:50.234Z"}
data: {"data":"\r .../output/training_args.bin: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 5.71kB / 5.71kB \u001b[A\u001b[A","timestamp":"2026-04-25T20:17:50.234Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:50.234Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:50.234Z"}
data: {"data":"\r /data/output/tokenizer.json : 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 11.4MB / 11.4MB \u001b[A\u001b[A\u001b[A","timestamp":"2026-04-25T20:17:50.234Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:50.234Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:50.234Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:50.234Z"}
data: {"data":"\r ...adapter_model.safetensors: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 20.2MB / 20.2MB \u001b[A\u001b[A\u001b[A\u001b[A","timestamp":"2026-04-25T20:17:50.234Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:50.234Z"}
data: {"data":"\r .../output/training_args.bin: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 5.71kB / 5.71kB \u001b[A\u001b[A","timestamp":"2026-04-25T20:17:50.234Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:50.234Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:50.234Z"}
data: {"data":"\r /data/output/tokenizer.json : 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 11.4MB / 11.4MB \u001b[A\u001b[A\u001b[A","timestamp":"2026-04-25T20:17:50.234Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:50.234Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:50.234Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:50.234Z"}
data: {"data":"\r ...adapter_model.safetensors: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 20.2MB / 20.2MB \u001b[A\u001b[A\u001b[A\u001b[A","timestamp":"2026-04-25T20:17:50.334Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:50.334Z"}
data: {"data":"\r .../output/training_args.bin: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 5.71kB / 5.71kB \u001b[A\u001b[A","timestamp":"2026-04-25T20:17:50.334Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:50.334Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:50.334Z"}
data: {"data":"\r /data/output/tokenizer.json : 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 11.4MB / 11.4MB \u001b[A\u001b[A\u001b[A","timestamp":"2026-04-25T20:17:50.334Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:50.334Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:50.335Z"}
data: {"data":"","timestamp":"2026-04-25T20:17:50.335Z"}
data: {"data":"\r ...adapter_model.safetensors: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 20.2MB / 20.2MB \u001b[A\u001b[A\u001b[A\u001b[A\rProcessing Files (3 / 3) : 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 31.6MB / 31.6MB, 0.00B/s ","timestamp":"2026-04-25T20:17:50.335Z"}
data: {"data":"\rNew Data Upload : | | 0.00B / 0.00B, 0.00B/s ","timestamp":"2026-04-25T20:17:50.335Z"}
data: {"data":"\r .../output/training_args.bin: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 5.71kB / 5.71kB ","timestamp":"2026-04-25T20:17:50.335Z"}
data: {"data":"\r /data/output/tokenizer.json : 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 11.4MB / 11.4MB ","timestamp":"2026-04-25T20:17:50.335Z"}
data: {"data":"\r ...adapter_model.safetensors: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 20.2MB / 20.2MB ","timestamp":"2026-04-25T20:17:50.335Z"}
data: {"data":"πŸ€— Model pushed to the Hub in https://huggingface.co/Yaswanth-Bolla/qwen2.5-7b-sre-sft.","timestamp":"2026-04-25T20:17:50.641Z"}